Repository: munificent/craftinginterpreters
Branch: master
Commit: 4a840f70f69c
Files: 548
Total size: 4.5 MB

Directory structure:
gitextract_s3p1_s2h/

├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── asset/
│   ├── index.scss
│   ├── mustache/
│   │   ├── contents-nav.html
│   │   ├── contents-part.html
│   │   ├── contents.html
│   │   ├── footer.html
│   │   ├── header.html
│   │   ├── in_design.html
│   │   ├── index.html
│   │   ├── nav.html
│   │   ├── page.html
│   │   └── prev-next.html
│   ├── sass/
│   │   ├── chapter.scss
│   │   ├── contents.scss
│   │   ├── print.scss
│   │   ├── shared.scss
│   │   └── sign-up.scss
│   └── style.scss
├── book/
│   ├── a-bytecode-virtual-machine.md
│   ├── a-map-of-the-territory.md
│   ├── a-tree-walk-interpreter.md
│   ├── a-virtual-machine.md
│   ├── acknowledgements.md
│   ├── appendix-i.md
│   ├── appendix-ii.md
│   ├── backmatter.md
│   ├── calls-and-functions.md
│   ├── chunks-of-bytecode.md
│   ├── classes-and-instances.md
│   ├── classes.md
│   ├── closures.md
│   ├── compiling-expressions.md
│   ├── contents.md
│   ├── control-flow.md
│   ├── dedication.md
│   ├── evaluating-expressions.md
│   ├── functions.md
│   ├── garbage-collection.md
│   ├── global-variables.md
│   ├── hash-tables.md
│   ├── index.md
│   ├── inheritance.md
│   ├── introduction.md
│   ├── jumping-back-and-forth.md
│   ├── local-variables.md
│   ├── methods-and-initializers.md
│   ├── optimization.md
│   ├── parsing-expressions.md
│   ├── representing-code.md
│   ├── resolving-and-binding.md
│   ├── scanning-on-demand.md
│   ├── scanning.md
│   ├── statements-and-state.md
│   ├── strings.md
│   ├── superclasses.md
│   ├── the-lox-language.md
│   ├── types-of-values.md
│   └── welcome.md
├── c/
│   ├── chunk.c
│   ├── chunk.h
│   ├── clox.xcodeproj/
│   │   ├── project.pbxproj
│   │   ├── project.xcworkspace/
│   │   │   ├── contents.xcworkspacedata
│   │   │   └── xcshareddata/
│   │   │       ├── IDEWorkspaceChecks.plist
│   │   │       └── WorkspaceSettings.xcsettings
│   │   └── xcshareddata/
│   │       └── xcschemes/
│   │           └── clox.xcscheme
│   ├── common.h
│   ├── compiler.c
│   ├── compiler.h
│   ├── debug.c
│   ├── debug.h
│   ├── main.c
│   ├── memory.c
│   ├── memory.h
│   ├── object.c
│   ├── object.h
│   ├── scanner.c
│   ├── scanner.h
│   ├── table.c
│   ├── table.h
│   ├── value.c
│   ├── value.h
│   ├── vm.c
│   └── vm.h
├── java/
│   └── com/
│       └── craftinginterpreters/
│           ├── lox/
│           │   ├── AstPrinter.java
│           │   ├── Environment.java
│           │   ├── Expr.java
│           │   ├── Interpreter.java
│           │   ├── Lox.java
│           │   ├── LoxCallable.java
│           │   ├── LoxClass.java
│           │   ├── LoxFunction.java
│           │   ├── LoxInstance.java
│           │   ├── Parser.java
│           │   ├── Resolver.java
│           │   ├── Return.java
│           │   ├── RuntimeError.java
│           │   ├── Scanner.java
│           │   ├── Stmt.java
│           │   ├── Token.java
│           │   └── TokenType.java
│           └── tool/
│               └── GenerateAst.java
├── jlox
├── note/
│   ├── BISAC.txt
│   ├── answers/
│   │   ├── chapter01_introduction/
│   │   │   ├── 1.md
│   │   │   ├── 2/
│   │   │   │   ├── Hello.java
│   │   │   │   └── Makefile
│   │   │   └── 3/
│   │   │       ├── Makefile
│   │   │       ├── linked_list
│   │   │       ├── linked_list.c
│   │   │       └── linked_list.xcodeproj/
│   │   │           ├── project.pbxproj
│   │   │           └── project.xcworkspace/
│   │   │               └── contents.xcworkspacedata
│   │   ├── chapter02_map.md
│   │   ├── chapter03_lox.md
│   │   ├── chapter04_scanning.md
│   │   ├── chapter05_representing.md
│   │   ├── chapter06_parsing.md
│   │   ├── chapter07_evaluating.md
│   │   ├── chapter08_statements.md
│   │   ├── chapter09_control.md
│   │   ├── chapter10_functions.md
│   │   ├── chapter11_resolving/
│   │   │   ├── 4/
│   │   │   │   └── com/
│   │   │   │       └── craftinginterpreters/
│   │   │   │           ├── lox/
│   │   │   │           │   ├── AstPrinter.java
│   │   │   │           │   ├── Environment.java
│   │   │   │           │   ├── Expr.java
│   │   │   │           │   ├── Interpreter.java
│   │   │   │           │   ├── Lox.java
│   │   │   │           │   ├── LoxCallable.java
│   │   │   │           │   ├── LoxFunction.java
│   │   │   │           │   ├── Parser.java
│   │   │   │           │   ├── Resolver.java
│   │   │   │           │   ├── Return.java
│   │   │   │           │   ├── RuntimeError.java
│   │   │   │           │   ├── Scanner.java
│   │   │   │           │   ├── Stmt.java
│   │   │   │           │   ├── Token.java
│   │   │   │           │   └── TokenType.java
│   │   │   │           └── tool/
│   │   │   │               └── GenerateAst.java
│   │   │   └── chapter11_resolving.md
│   │   ├── chapter12_classes.md
│   │   ├── chapter13_inheritance/
│   │   │   ├── 1.md
│   │   │   ├── 2.md
│   │   │   └── 3.md
│   │   ├── chapter14_chunks/
│   │   │   ├── 1.md
│   │   │   └── 2.md
│   │   ├── chapter15_virtual/
│   │   │   ├── 1.md
│   │   │   ├── 2.md
│   │   │   └── 3.md
│   │   ├── chapter16_scanning.md
│   │   ├── chapter17_compiling.md
│   │   ├── chapter18_types.md
│   │   ├── chapter19_strings.md
│   │   ├── chapter20_hash/
│   │   │   └── 1.md
│   │   ├── chapter21_global.md
│   │   ├── chapter23_jumping/
│   │   │   ├── 1.md
│   │   │   ├── 2.md
│   │   │   └── 3.md
│   │   ├── chapter24_calls/
│   │   │   ├── 1.md
│   │   │   └── 2.md
│   │   ├── chapter25_closures/
│   │   │   ├── 1.md
│   │   │   ├── 2.md
│   │   │   └── 3.lox
│   │   ├── chapter26_garbage/
│   │   │   ├── 1.md
│   │   │   └── 2.md
│   │   ├── chapter27_classes/
│   │   │   ├── 1.md
│   │   │   ├── 2.md
│   │   │   ├── 3.md
│   │   │   └── 4.md
│   │   ├── chapter28_methods/
│   │   │   ├── 1.md
│   │   │   ├── 2.md
│   │   │   └── 3.md
│   │   └── chapter29_superclasses/
│   │       ├── 1.md
│   │       ├── 2.md
│   │       ├── 3.diff
│   │       └── 3.md
│   ├── blurb.txt
│   ├── contents.txt
│   ├── design breaks.md
│   ├── images.md
│   ├── indexing.md
│   ├── log.txt
│   ├── names.txt
│   ├── objects.txt
│   ├── outline.md
│   ├── research.txt
│   ├── scope.txt
│   ├── struct sizes.txt
│   ├── style guide.md
│   └── todo.txt
├── site/
│   ├── .htaccess
│   ├── 404.html
│   ├── a-bytecode-virtual-machine.html
│   ├── a-map-of-the-territory.html
│   ├── a-tree-walk-interpreter.html
│   ├── a-virtual-machine.html
│   ├── acknowledgements.html
│   ├── appendix-i.html
│   ├── appendix-ii.html
│   ├── backmatter.html
│   ├── calls-and-functions.html
│   ├── chunks-of-bytecode.html
│   ├── classes-and-instances.html
│   ├── classes.html
│   ├── closures.html
│   ├── compiling-expressions.html
│   ├── contents.html
│   ├── control-flow.html
│   ├── dedication.html
│   ├── evaluating-expressions.html
│   ├── functions.html
│   ├── garbage-collection.html
│   ├── global-variables.html
│   ├── hash-tables.html
│   ├── index.css
│   ├── index.html
│   ├── inheritance.html
│   ├── introduction.html
│   ├── jumping-back-and-forth.html
│   ├── local-variables.html
│   ├── methods-and-initializers.html
│   ├── optimization.html
│   ├── parsing-expressions.html
│   ├── representing-code.html
│   ├── resolving-and-binding.html
│   ├── scanning-on-demand.html
│   ├── scanning.html
│   ├── script.js
│   ├── statements-and-state.html
│   ├── strings.html
│   ├── style.css
│   ├── superclasses.html
│   ├── the-lox-language.html
│   ├── types-of-values.html
│   └── welcome.html
├── test/
│   ├── assignment/
│   │   ├── associativity.lox
│   │   ├── global.lox
│   │   ├── grouping.lox
│   │   ├── infix_operator.lox
│   │   ├── local.lox
│   │   ├── prefix_operator.lox
│   │   ├── syntax.lox
│   │   ├── to_this.lox
│   │   └── undefined.lox
│   ├── benchmark/
│   │   ├── binary_trees.lox
│   │   ├── equality.lox
│   │   ├── fib.lox
│   │   ├── instantiation.lox
│   │   ├── invocation.lox
│   │   ├── method_call.lox
│   │   ├── properties.lox
│   │   ├── string_equality.lox
│   │   ├── trees.lox
│   │   ├── zoo.lox
│   │   └── zoo_batch.lox
│   ├── block/
│   │   ├── empty.lox
│   │   └── scope.lox
│   ├── bool/
│   │   ├── equality.lox
│   │   └── not.lox
│   ├── call/
│   │   ├── bool.lox
│   │   ├── nil.lox
│   │   ├── num.lox
│   │   ├── object.lox
│   │   └── string.lox
│   ├── class/
│   │   ├── empty.lox
│   │   ├── inherit_self.lox
│   │   ├── inherited_method.lox
│   │   ├── local_inherit_other.lox
│   │   ├── local_inherit_self.lox
│   │   ├── local_reference_self.lox
│   │   └── reference_self.lox
│   ├── closure/
│   │   ├── assign_to_closure.lox
│   │   ├── assign_to_shadowed_later.lox
│   │   ├── close_over_function_parameter.lox
│   │   ├── close_over_later_variable.lox
│   │   ├── close_over_method_parameter.lox
│   │   ├── closed_closure_in_function.lox
│   │   ├── nested_closure.lox
│   │   ├── open_closure_in_function.lox
│   │   ├── reference_closure_multiple_times.lox
│   │   ├── reuse_closure_slot.lox
│   │   ├── shadow_closure_with_local.lox
│   │   ├── unused_closure.lox
│   │   └── unused_later_closure.lox
│   ├── comments/
│   │   ├── line_at_eof.lox
│   │   ├── only_line_comment.lox
│   │   ├── only_line_comment_and_line.lox
│   │   └── unicode.lox
│   ├── constructor/
│   │   ├── arguments.lox
│   │   ├── call_init_early_return.lox
│   │   ├── call_init_explicitly.lox
│   │   ├── default.lox
│   │   ├── default_arguments.lox
│   │   ├── early_return.lox
│   │   ├── extra_arguments.lox
│   │   ├── init_not_method.lox
│   │   ├── missing_arguments.lox
│   │   ├── return_in_nested_function.lox
│   │   └── return_value.lox
│   ├── empty_file.lox
│   ├── expressions/
│   │   ├── evaluate.lox
│   │   └── parse.lox
│   ├── field/
│   │   ├── call_function_field.lox
│   │   ├── call_nonfunction_field.lox
│   │   ├── get_and_set_method.lox
│   │   ├── get_on_bool.lox
│   │   ├── get_on_class.lox
│   │   ├── get_on_function.lox
│   │   ├── get_on_nil.lox
│   │   ├── get_on_num.lox
│   │   ├── get_on_string.lox
│   │   ├── many.lox
│   │   ├── method.lox
│   │   ├── method_binds_this.lox
│   │   ├── on_instance.lox
│   │   ├── set_evaluation_order.lox
│   │   ├── set_on_bool.lox
│   │   ├── set_on_class.lox
│   │   ├── set_on_function.lox
│   │   ├── set_on_nil.lox
│   │   ├── set_on_num.lox
│   │   ├── set_on_string.lox
│   │   └── undefined.lox
│   ├── for/
│   │   ├── class_in_body.lox
│   │   ├── closure_in_body.lox
│   │   ├── fun_in_body.lox
│   │   ├── return_closure.lox
│   │   ├── return_inside.lox
│   │   ├── scope.lox
│   │   ├── statement_condition.lox
│   │   ├── statement_increment.lox
│   │   ├── statement_initializer.lox
│   │   ├── syntax.lox
│   │   └── var_in_body.lox
│   ├── function/
│   │   ├── body_must_be_block.lox
│   │   ├── empty_body.lox
│   │   ├── extra_arguments.lox
│   │   ├── local_mutual_recursion.lox
│   │   ├── local_recursion.lox
│   │   ├── missing_arguments.lox
│   │   ├── missing_comma_in_parameters.lox
│   │   ├── mutual_recursion.lox
│   │   ├── nested_call_with_arguments.lox
│   │   ├── parameters.lox
│   │   ├── print.lox
│   │   ├── recursion.lox
│   │   ├── too_many_arguments.lox
│   │   └── too_many_parameters.lox
│   ├── if/
│   │   ├── class_in_else.lox
│   │   ├── class_in_then.lox
│   │   ├── dangling_else.lox
│   │   ├── else.lox
│   │   ├── fun_in_else.lox
│   │   ├── fun_in_then.lox
│   │   ├── if.lox
│   │   ├── truth.lox
│   │   ├── var_in_else.lox
│   │   └── var_in_then.lox
│   ├── inheritance/
│   │   ├── constructor.lox
│   │   ├── inherit_from_function.lox
│   │   ├── inherit_from_nil.lox
│   │   ├── inherit_from_number.lox
│   │   ├── inherit_methods.lox
│   │   ├── parenthesized_superclass.lox
│   │   └── set_fields_from_base_class.lox
│   ├── limit/
│   │   ├── loop_too_large.lox
│   │   ├── no_reuse_constants.lox
│   │   ├── stack_overflow.lox
│   │   ├── too_many_constants.lox
│   │   ├── too_many_locals.lox
│   │   └── too_many_upvalues.lox
│   ├── logical_operator/
│   │   ├── and.lox
│   │   ├── and_truth.lox
│   │   ├── or.lox
│   │   └── or_truth.lox
│   ├── method/
│   │   ├── arity.lox
│   │   ├── empty_block.lox
│   │   ├── extra_arguments.lox
│   │   ├── missing_arguments.lox
│   │   ├── not_found.lox
│   │   ├── print_bound_method.lox
│   │   ├── refer_to_name.lox
│   │   ├── too_many_arguments.lox
│   │   └── too_many_parameters.lox
│   ├── nil/
│   │   └── literal.lox
│   ├── number/
│   │   ├── decimal_point_at_eof.lox
│   │   ├── leading_dot.lox
│   │   ├── literals.lox
│   │   ├── nan_equality.lox
│   │   └── trailing_dot.lox
│   ├── operator/
│   │   ├── add.lox
│   │   ├── add_bool_nil.lox
│   │   ├── add_bool_num.lox
│   │   ├── add_bool_string.lox
│   │   ├── add_nil_nil.lox
│   │   ├── add_num_nil.lox
│   │   ├── add_string_nil.lox
│   │   ├── comparison.lox
│   │   ├── divide.lox
│   │   ├── divide_nonnum_num.lox
│   │   ├── divide_num_nonnum.lox
│   │   ├── equals.lox
│   │   ├── equals_class.lox
│   │   ├── equals_method.lox
│   │   ├── greater_nonnum_num.lox
│   │   ├── greater_num_nonnum.lox
│   │   ├── greater_or_equal_nonnum_num.lox
│   │   ├── greater_or_equal_num_nonnum.lox
│   │   ├── less_nonnum_num.lox
│   │   ├── less_num_nonnum.lox
│   │   ├── less_or_equal_nonnum_num.lox
│   │   ├── less_or_equal_num_nonnum.lox
│   │   ├── multiply.lox
│   │   ├── multiply_nonnum_num.lox
│   │   ├── multiply_num_nonnum.lox
│   │   ├── negate.lox
│   │   ├── negate_nonnum.lox
│   │   ├── not.lox
│   │   ├── not_class.lox
│   │   ├── not_equals.lox
│   │   ├── subtract.lox
│   │   ├── subtract_nonnum_num.lox
│   │   └── subtract_num_nonnum.lox
│   ├── precedence.lox
│   ├── print/
│   │   └── missing_argument.lox
│   ├── regression/
│   │   ├── 394.lox
│   │   └── 40.lox
│   ├── return/
│   │   ├── after_else.lox
│   │   ├── after_if.lox
│   │   ├── after_while.lox
│   │   ├── at_top_level.lox
│   │   ├── in_function.lox
│   │   ├── in_method.lox
│   │   └── return_nil_if_no_value.lox
│   ├── scanning/
│   │   ├── identifiers.lox
│   │   ├── keywords.lox
│   │   ├── numbers.lox
│   │   ├── punctuators.lox
│   │   ├── strings.lox
│   │   └── whitespace.lox
│   ├── string/
│   │   ├── error_after_multiline.lox
│   │   ├── literals.lox
│   │   ├── multiline.lox
│   │   └── unterminated.lox
│   ├── super/
│   │   ├── bound_method.lox
│   │   ├── call_other_method.lox
│   │   ├── call_same_method.lox
│   │   ├── closure.lox
│   │   ├── constructor.lox
│   │   ├── extra_arguments.lox
│   │   ├── indirectly_inherited.lox
│   │   ├── missing_arguments.lox
│   │   ├── no_superclass_bind.lox
│   │   ├── no_superclass_call.lox
│   │   ├── no_superclass_method.lox
│   │   ├── parenthesized.lox
│   │   ├── reassign_superclass.lox
│   │   ├── super_at_top_level.lox
│   │   ├── super_in_closure_in_inherited_method.lox
│   │   ├── super_in_inherited_method.lox
│   │   ├── super_in_top_level_function.lox
│   │   ├── super_without_dot.lox
│   │   ├── super_without_name.lox
│   │   └── this_in_superclass_method.lox
│   ├── this/
│   │   ├── closure.lox
│   │   ├── nested_class.lox
│   │   ├── nested_closure.lox
│   │   ├── this_at_top_level.lox
│   │   ├── this_in_method.lox
│   │   └── this_in_top_level_function.lox
│   ├── unexpected_character.lox
│   ├── variable/
│   │   ├── collide_with_parameter.lox
│   │   ├── duplicate_local.lox
│   │   ├── duplicate_parameter.lox
│   │   ├── early_bound.lox
│   │   ├── in_middle_of_block.lox
│   │   ├── in_nested_block.lox
│   │   ├── local_from_method.lox
│   │   ├── redeclare_global.lox
│   │   ├── redefine_global.lox
│   │   ├── scope_reuse_in_different_blocks.lox
│   │   ├── shadow_and_local.lox
│   │   ├── shadow_global.lox
│   │   ├── shadow_local.lox
│   │   ├── undefined_global.lox
│   │   ├── undefined_local.lox
│   │   ├── uninitialized.lox
│   │   ├── unreached_undefined.lox
│   │   ├── use_false_as_var.lox
│   │   ├── use_global_in_initializer.lox
│   │   ├── use_local_in_initializer.lox
│   │   ├── use_nil_as_var.lox
│   │   └── use_this_as_var.lox
│   └── while/
│       ├── class_in_body.lox
│       ├── closure_in_body.lox
│       ├── fun_in_body.lox
│       ├── return_closure.lox
│       ├── return_inside.lox
│       ├── syntax.lox
│       └── var_in_body.lox
├── tool/
│   ├── analysis_options.yaml
│   ├── bin/
│   │   ├── benchmark.dart
│   │   ├── build.dart
│   │   ├── build_xml.dart
│   │   ├── compile_snippets.dart
│   │   ├── split_chapters.dart
│   │   ├── test.dart
│   │   └── tile_pages.dart
│   ├── lib/
│   │   └── src/
│   │       ├── book.dart
│   │       ├── code_tag.dart
│   │       ├── format.dart
│   │       ├── location.dart
│   │       ├── markdown/
│   │       │   ├── block_syntax.dart
│   │       │   ├── code_syntax.dart
│   │       │   ├── html_renderer.dart
│   │       │   ├── inline_syntax.dart
│   │       │   ├── markdown.dart
│   │       │   └── xml_renderer.dart
│   │       ├── mustache.dart
│   │       ├── page.dart
│   │       ├── page_parser.dart
│   │       ├── snippet.dart
│   │       ├── source_file_parser.dart
│   │       ├── split_chapter.dart
│   │       ├── syntax/
│   │       │   ├── grammar.dart
│   │       │   ├── highlighter.dart
│   │       │   ├── language.dart
│   │       │   └── rule.dart
│   │       ├── term.dart
│   │       └── text.dart
│   └── pubspec.yaml
└── util/
    ├── c.make
    ├── intellij/
    │   ├── chap04_read.iml
    │   ├── chap05_scanning.iml
    │   ├── chap06_representing.iml
    │   ├── chap07_parsing.iml
    │   ├── chap08_evaluating.iml
    │   ├── chap09_statements.iml
    │   ├── chap10_control.iml
    │   ├── chap11_functions.iml
    │   ├── chap12_resolving.iml
    │   ├── chap13_classes.iml
    │   ├── chap14_inheritance.iml
    │   ├── intellij.iml
    │   ├── jlox.iml
    │   ├── section_test.iml
    │   └── snippet_test.iml
    └── java.make

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Intermediate and built stuff.
.sass-cache/
/build/
/gen/
clox
*.class
exercises/chapter01_introduction/3/linked_list
.idea/

# I keep a scratch file at the top level to try stuff out.
temp.lox

# XCode user-specific stuff.
xcuserdata/

# Dart stuff.
/tool/.dart_tool/
/tool/.packages


================================================
FILE: LICENSE
================================================
Copyright (c) 2015 Robert Nystrom

---------------------------------- Commentary ----------------------------------

The licensing story for this repository is a little complex. Here's my
motivation:

* I want you to get as much use out of the material here as possible. I wrote
  this book to help you, and I don't want you to be encumbered when it comes to
  making the most of it. That's also why I put it online for free.

* With my previous book, collaboration on GitHub was immesensely helpful. I want
  to ensure people can fork the repo, send me fixes, etc. without violating the
  license or feeling weird.

* When it comes to code, I'm completely comfortable with people redistributing,
  remixing, changing, whatever with it. I've been using the MIT license for open
  source stuff for decades.

  This book contains two complete interpreters and I would be delighted for them
  to be the jumping-off point for any number of real full-featured language
  implementations.

* When it comes to my prose, illustrations, and the visual design of the site,
  that feels a little more, I don't know, *me* than the code. The words are in
  my voice, the drawings are literally my handwriting, and the look of the site
  is part of the book's and, by extension, my brand.

  I feel weird thinking about someone, say taking one of the chapters and making
  significant changes to it to fit their writing style while still having some
  of it read like it came from me. Likewise, I'd be sad to see another site
  online that looked exactly like mine because it reuses my stylesheets.

* My previous book ended up being translated into several languages. I want to
  be careful to not be so permissive that it prevents me from signing typical
  contracts that give them exclusive translation rights to certain territories
  and languages.

* If I allow the prose and illustrations to be redistributed commercially, there
  is nothing preventing someone from slapping together a cheap print or ebook
  version of the book and putting it up for sale. I'm not too worried about my
  own sales being undercut, but I very much want to avoid readers finding
  themselves with a low quality book that they incorrectly think is from me.

  I worked very hard on this book. I want you to get the best possible
  experience.

All of this is way more complex than I'd like, especially since my brain isn't
wired to care about intellectual property. I like thinking about making stuff,
not thinking about the legal rights around the stuff I made. (If your brain is
wired to think about legal stuff and you see that I'm doing something dumb,
please do let me know.)

The best solution I've been able to come up with is to use two licenses:

---------------------------------- License(s) ----------------------------------

Each file in this repository falls under one of two licenses. Files whose
extension is ".c", ".dart", ".h", ".java", or ".lox" use the MIT license:

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
    deal in the Software without restriction, including without limitation the
    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
    sell copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

    The above copyright notice and this permission notice shall be included in
    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    IN THE SOFTWARE.

All other files, including (but not limited to) ".md" (except for
"book/appendix-i.md" which uses the MIT license above), ".png", ".jpg", ".html",
".scss", ".css", and ".txt" use this Creative Commons license:

    Attribution-NonCommercial-NoDerivatives 4.0
    International (CC BY-NC-ND 4.0)

    https://creativecommons.org/licenses/by-nc-nd/4.0/


================================================
FILE: Makefile
================================================
BUILD_DIR := build
TOOL_SOURCES := tool/pubspec.lock $(shell find tool -name '*.dart')
BUILD_SNAPSHOT := $(BUILD_DIR)/build.dart.snapshot
TEST_SNAPSHOT := $(BUILD_DIR)/test.dart.snapshot

default: book clox jlox

# Run dart pub get on tool directory.
get:
	@ cd ./tool; dart pub get

# Remove all build outputs and intermediate files.
clean:
	@ rm -rf $(BUILD_DIR)
	@ rm -rf gen

# Build the site.
book: $(BUILD_SNAPSHOT)
	@ dart $(BUILD_SNAPSHOT)

# Run a local development server for the site that rebuilds automatically.
serve: $(BUILD_SNAPSHOT)
	@ dart $(BUILD_SNAPSHOT) --serve

$(BUILD_SNAPSHOT): $(TOOL_SOURCES)
	@ mkdir -p build
	@ echo "Compiling Dart snapshot..."
	@ dart --snapshot=$@ --snapshot-kind=app-jit tool/bin/build.dart >/dev/null

# Run the tests for the final versions of clox and jlox.
test: debug jlox $(TEST_SNAPSHOT)
	@- dart $(TEST_SNAPSHOT) clox
	@ dart $(TEST_SNAPSHOT) jlox

# Run the tests for the final version of clox.
test_clox: debug $(TEST_SNAPSHOT)
	@ dart $(TEST_SNAPSHOT) clox

# Run the tests for the final version of jlox.
test_jlox: jlox $(TEST_SNAPSHOT)
	@ dart $(TEST_SNAPSHOT) jlox

# Run the tests for every chapter's version of clox.
test_c: debug c_chapters $(TEST_SNAPSHOT)
	@ dart $(TEST_SNAPSHOT) c

# Run the tests for every chapter's version of jlox.
test_java: jlox java_chapters $(TEST_SNAPSHOT)
	@ dart $(TEST_SNAPSHOT) java

# Run the tests for every chapter's version of clox and jlox.
test_all: debug jlox c_chapters java_chapters compile_snippets $(TEST_SNAPSHOT)
	@ dart $(TEST_SNAPSHOT) all

$(TEST_SNAPSHOT): $(TOOL_SOURCES)
	@ mkdir -p build
	@ echo "Compiling Dart snapshot..."
	@ dart --snapshot=$@ --snapshot-kind=app-jit tool/bin/test.dart clox >/dev/null

# Compile a debug build of clox.
debug:
	@ $(MAKE) -f util/c.make NAME=cloxd MODE=debug SOURCE_DIR=c

# Compile the C interpreter.
clox:
	@ $(MAKE) -f util/c.make NAME=clox MODE=release SOURCE_DIR=c
	@ cp build/clox clox # For convenience, copy the interpreter to the top level.

# Compile the C interpreter as ANSI standard C++.
cpplox:
	@ $(MAKE) -f util/c.make NAME=cpplox MODE=debug CPP=true SOURCE_DIR=c

# Compile and run the AST generator.
generate_ast:
	@ $(MAKE) -f util/java.make DIR=java PACKAGE=tool
	@ java -cp build/java com.craftinginterpreters.tool.GenerateAst \
			java/com/craftinginterpreters/lox

# Compile the Java interpreter .java files to .class files.
jlox: generate_ast
	@ $(MAKE) -f util/java.make DIR=java PACKAGE=lox

run_generate_ast = @ java -cp build/gen/$(1) \
			com.craftinginterpreters.tool.GenerateAst \
			gen/$(1)/com/craftinginterpreters/lox

java_chapters: split_chapters
	@ $(MAKE) -f util/java.make DIR=gen/chap04_scanning PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap05_representing PACKAGE=tool
	$(call run_generate_ast,chap05_representing)
	@ $(MAKE) -f util/java.make DIR=gen/chap05_representing PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap06_parsing PACKAGE=tool
	$(call run_generate_ast,chap06_parsing)
	@ $(MAKE) -f util/java.make DIR=gen/chap06_parsing PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap07_evaluating PACKAGE=tool
	$(call run_generate_ast,chap07_evaluating)
	@ $(MAKE) -f util/java.make DIR=gen/chap07_evaluating PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap08_statements PACKAGE=tool
	$(call run_generate_ast,chap08_statements)
	@ $(MAKE) -f util/java.make DIR=gen/chap08_statements PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap09_control PACKAGE=tool
	$(call run_generate_ast,chap09_control)
	@ $(MAKE) -f util/java.make DIR=gen/chap09_control PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap10_functions PACKAGE=tool
	$(call run_generate_ast,chap10_functions)
	@ $(MAKE) -f util/java.make DIR=gen/chap10_functions PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap11_resolving PACKAGE=tool
	$(call run_generate_ast,chap11_resolving)
	@ $(MAKE) -f util/java.make DIR=gen/chap11_resolving PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap12_classes PACKAGE=tool
	$(call run_generate_ast,chap12_classes)
	@ $(MAKE) -f util/java.make DIR=gen/chap12_classes PACKAGE=lox

	@ $(MAKE) -f util/java.make DIR=gen/chap13_inheritance PACKAGE=tool
	$(call run_generate_ast,chap13_inheritance)
	@ $(MAKE) -f util/java.make DIR=gen/chap13_inheritance PACKAGE=lox

c_chapters: split_chapters
	@ $(MAKE) -f util/c.make NAME=chap14_chunks MODE=release SOURCE_DIR=gen/chap14_chunks
	@ $(MAKE) -f util/c.make NAME=chap15_virtual MODE=release SOURCE_DIR=gen/chap15_virtual
	@ $(MAKE) -f util/c.make NAME=chap16_scanning MODE=release SOURCE_DIR=gen/chap16_scanning
	@ $(MAKE) -f util/c.make NAME=chap17_compiling MODE=release SOURCE_DIR=gen/chap17_compiling
	@ $(MAKE) -f util/c.make NAME=chap18_types MODE=release SOURCE_DIR=gen/chap18_types
	@ $(MAKE) -f util/c.make NAME=chap19_strings MODE=release SOURCE_DIR=gen/chap19_strings
	@ $(MAKE) -f util/c.make NAME=chap20_hash MODE=release SOURCE_DIR=gen/chap20_hash
	@ $(MAKE) -f util/c.make NAME=chap21_global MODE=release SOURCE_DIR=gen/chap21_global
	@ $(MAKE) -f util/c.make NAME=chap22_local MODE=release SOURCE_DIR=gen/chap22_local
	@ $(MAKE) -f util/c.make NAME=chap23_jumping MODE=release SOURCE_DIR=gen/chap23_jumping
	@ $(MAKE) -f util/c.make NAME=chap24_calls MODE=release SOURCE_DIR=gen/chap24_calls
	@ $(MAKE) -f util/c.make NAME=chap25_closures MODE=release SOURCE_DIR=gen/chap25_closures
	@ $(MAKE) -f util/c.make NAME=chap26_garbage MODE=release SOURCE_DIR=gen/chap26_garbage
	@ $(MAKE) -f util/c.make NAME=chap27_classes MODE=release SOURCE_DIR=gen/chap27_classes
	@ $(MAKE) -f util/c.make NAME=chap28_methods MODE=release SOURCE_DIR=gen/chap28_methods
	@ $(MAKE) -f util/c.make NAME=chap29_superclasses MODE=release SOURCE_DIR=gen/chap29_superclasses
	@ $(MAKE) -f util/c.make NAME=chap30_optimization MODE=release SOURCE_DIR=gen/chap30_optimization

cpp_chapters: split_chapters
	@ $(MAKE) -f util/c.make NAME=cpp_chap14_chunks MODE=release CPP=true SOURCE_DIR=gen/chap14_chunks
	@ $(MAKE) -f util/c.make NAME=cpp_chap15_virtual MODE=release CPP=true SOURCE_DIR=gen/chap15_virtual
	@ $(MAKE) -f util/c.make NAME=cpp_chap16_scanning MODE=release CPP=true SOURCE_DIR=gen/chap16_scanning
	@ $(MAKE) -f util/c.make NAME=cpp_chap17_compiling MODE=release CPP=true SOURCE_DIR=gen/chap17_compiling
	@ $(MAKE) -f util/c.make NAME=cpp_chap18_types MODE=release CPP=true SOURCE_DIR=gen/chap18_types
	@ $(MAKE) -f util/c.make NAME=cpp_chap19_strings MODE=release CPP=true SOURCE_DIR=gen/chap19_strings
	@ $(MAKE) -f util/c.make NAME=cpp_chap20_hash MODE=release CPP=true SOURCE_DIR=gen/chap20_hash
	@ $(MAKE) -f util/c.make NAME=cpp_chap21_global MODE=release CPP=true SOURCE_DIR=gen/chap21_global
	@ $(MAKE) -f util/c.make NAME=cpp_chap22_local MODE=release CPP=true SOURCE_DIR=gen/chap22_local
	@ $(MAKE) -f util/c.make NAME=cpp_chap23_jumping MODE=release CPP=true SOURCE_DIR=gen/chap23_jumping
	@ $(MAKE) -f util/c.make NAME=cpp_chap24_calls MODE=release CPP=true SOURCE_DIR=gen/chap24_calls
	@ $(MAKE) -f util/c.make NAME=cpp_chap25_closures MODE=release CPP=true SOURCE_DIR=gen/chap25_closures
	@ $(MAKE) -f util/c.make NAME=cpp_chap26_garbage MODE=release CPP=true SOURCE_DIR=gen/chap26_garbage
	@ $(MAKE) -f util/c.make NAME=cpp_chap27_classes MODE=release CPP=true SOURCE_DIR=gen/chap27_classes
	@ $(MAKE) -f util/c.make NAME=cpp_chap28_methods MODE=release CPP=true SOURCE_DIR=gen/chap28_methods
	@ $(MAKE) -f util/c.make NAME=cpp_chap29_superclasses MODE=release CPP=true SOURCE_DIR=gen/chap29_superclasses
	@ $(MAKE) -f util/c.make NAME=cpp_chap30_optimization MODE=release CPP=true SOURCE_DIR=gen/chap30_optimization

diffs: split_chapters java_chapters
	@ mkdir -p build/diffs
	@ -diff --recursive --new-file nonexistent/ gen/chap04_scanning/com/craftinginterpreters/ > build/diffs/chap04_scanning.diff
	@ -diff --recursive --new-file gen/chap04_scanning/com/craftinginterpreters/ gen/chap05_representing/com/craftinginterpreters/ > build/diffs/chap05_representing.diff
	@ -diff --recursive --new-file gen/chap05_representing/com/craftinginterpreters/ gen/chap06_parsing/com/craftinginterpreters/ > build/diffs/chap06_parsing.diff
	@ -diff --recursive --new-file gen/chap06_parsing/com/craftinginterpreters/ gen/chap07_evaluating/com/craftinginterpreters/ > build/diffs/chap07_evaluating.diff
	@ -diff --recursive --new-file gen/chap07_evaluating/com/craftinginterpreters/ gen/chap08_statements/com/craftinginterpreters/ > build/diffs/chap08_statements.diff
	@ -diff --recursive --new-file gen/chap08_statements/com/craftinginterpreters/ gen/chap09_control/com/craftinginterpreters/ > build/diffs/chap09_control.diff
	@ -diff --recursive --new-file gen/chap09_control/com/craftinginterpreters/ gen/chap10_functions/com/craftinginterpreters/ > build/diffs/chap10_functions.diff
	@ -diff --recursive --new-file gen/chap10_functions/com/craftinginterpreters/ gen/chap11_resolving/com/craftinginterpreters/ > build/diffs/chap11_resolving.diff
	@ -diff --recursive --new-file gen/chap11_resolving/com/craftinginterpreters/ gen/chap12_classes/com/craftinginterpreters/ > build/diffs/chap12_classes.diff
	@ -diff --recursive --new-file gen/chap12_classes/com/craftinginterpreters/ gen/chap13_inheritance/com/craftinginterpreters/ > build/diffs/chap13_inheritance.diff

	@ -diff --new-file nonexistent/ gen/chap14_chunks/ > build/diffs/chap14_chunks.diff
	@ -diff --new-file gen/chap14_chunks/ gen/chap15_virtual/ > build/diffs/chap15_virtual.diff
	@ -diff --new-file gen/chap15_virtual/ gen/chap16_scanning/ > build/diffs/chap16_scanning.diff
	@ -diff --new-file gen/chap16_scanning/ gen/chap17_compiling/ > build/diffs/chap17_compiling.diff
	@ -diff --new-file gen/chap17_compiling/ gen/chap18_types/ > build/diffs/chap18_types.diff
	@ -diff --new-file gen/chap18_types/ gen/chap19_strings/ > build/diffs/chap19_strings.diff
	@ -diff --new-file gen/chap19_strings/ gen/chap20_hash/ > build/diffs/chap20_hash.diff
	@ -diff --new-file gen/chap20_hash/ gen/chap21_global/ > build/diffs/chap21_global.diff
	@ -diff --new-file gen/chap21_global/ gen/chap22_local/ > build/diffs/chap22_local.diff
	@ -diff --new-file gen/chap22_local/ gen/chap23_jumping/ > build/diffs/chap23_jumping.diff
	@ -diff --new-file gen/chap23_jumping/ gen/chap24_calls/ > build/diffs/chap24_calls.diff
	@ -diff --new-file gen/chap24_calls/ gen/chap25_closures/ > build/diffs/chap25_closures.diff
	@ -diff --new-file gen/chap25_closures/ gen/chap26_garbage/ > build/diffs/chap26_garbage.diff
	@ -diff --new-file gen/chap26_garbage/ gen/chap27_classes/ > build/diffs/chap27_classes.diff
	@ -diff --new-file gen/chap27_classes/ gen/chap28_methods/ > build/diffs/chap28_methods.diff
	@ -diff --new-file gen/chap28_methods/ gen/chap29_superclasses/ > build/diffs/chap29_superclasses.diff
	@ -diff --new-file gen/chap29_superclasses/ gen/chap30_optimization/ > build/diffs/chap30_optimization.diff

split_chapters:
	@ dart tool/bin/split_chapters.dart

compile_snippets:
	@ dart tool/bin/compile_snippets.dart

# Generate the XML for importing into InDesign.
xml: $(TOOL_SOURCES)
	@ dart --enable-asserts tool/bin/build_xml.dart

.PHONY: book c_chapters clean clox compile_snippets debug default diffs \
	get java_chapters jlox serve split_chapters test test_all test_c test_java


================================================
FILE: README.md
================================================
This is the repo used for the in-progress book "[Crafting Interpreters][]". It
contains the Markdown text of the book, full implementations of both
interpreters, as well as the build system to weave the two together into the
final site.

[crafting interpreters]: http://craftinginterpreters.com

If you find an error or have a suggestion, please do file an issue here. Thank
you!

## Contributing

One of the absolute best things about writing a book online and putting it out
there before it's done is that people like you have been kind enough to give me
feedback, point out typos, and find other errors or unclear text.

If you'd like to do that, great! You can just file bugs here on the repo, or
send a pull request if you're so inclined. If you want to send a pull request,
but don't want to get the build system set up to regenerate the HTML too, don't
worry about it. I'll do that when I pull it in.

## Ports and implementations

Another way to get involved is by sharing your own implementation of Lox. Ports
to other languages are particularly useful since not every reader likes Java and
C. Feel free to add your Lox port or implementation to the wiki:

* [Lox implementations][]

[lox implementations]: https://github.com/munificent/craftinginterpreters/wiki/Lox-implementations

## Building Stuff

I am a terribly forgetful, error-prone mammal, so I automated as much as I
could.

### Prerequisites

I develop on an OS X machine, but any POSIX system should work too. With a
little extra effort, you should be able to get this working on Windows as well,
though I can't help you out much.

Most of the work is orchestrated by make. The build scripts, test runner, and
other utilities are all written in [Dart][]. Instructions to install Dart are
[here][install]. Once you have Dart installed and on your path, run:

```sh
$ make get
```

[dart]: https://dart.dev/
[install]: https://dart.dev/get-dart

This downloads all of the packages used by the build and test scripts.

In order to compile the two interpreters, you also need a C compiler on your
path as well as `javac`.

### Building

Once you've got that setup, try:

```sh
$ make
```

If everything is working, that will generate the site for the book as well as
compiling the two interpreters clox and jlox. You can run either interpreter
right from the root of the repo:

```sh
$ ./clox
$ ./jlox
```

### Hacking on the book

The Markdown and snippets of source code are woven together into the final HTML
using a hand-written static site generator that started out as a [single tiny
Python script][py] for [my first book][gpp] and somehow grew into something
approximating a real program.

[py]: https://github.com/munificent/game-programming-patterns/blob/master/script/format.py
[gpp]: http://gameprogrammingpatterns.com/

The generated HTML is committed in the repo under `site/`. It is built from a
combination of Markdown for prose, which lives in `book/`, and snippets of code
that are weaved in from the Java and C implementations in `java/` and `c/`. (All
of those funny looking comments in the source code are how it knows which
snippet goes where.)

The script that does all the magic is `tool/bin/build.dart`. You can run that
directly, or run:

```sh
$ make book
```

That generates the entire site in one batch. If you are incrementally working
on it, you'll want to run the development server:

```sh
$ make serve
```

This runs a little HTTP server on localhost rooted at the `site/` directory.
Any time you request a page, it regenerates any files whose sources have been
changed, including Markdown files, interpreter source files, templates, and
assets. Just let that keep running, edit files locally, and refresh your
browser to see the changes.

### Building the interpreters

You can build each interpreter like so:

```sh
$ make clox
$ make jlox
```

This builds the final version of each interpreter as it appears at the end of
its part in the book.

You can also see what the interpreters look like at the end of each chapter. (I
use this to make sure they are working even in the middle of the book.) This is
driven by a script, `tool/bin/split_chapters.dart` that uses the same comment
markers for the code snippets to determine which chunks of code are present in
each chapter. It takes only the snippets that have been seen by the end of each
chapter and produces a new copy of the source in `gen/`, one directory for each
chapter's code. (These are also an easier way to view the source code since they
have all of the distracting marker comments stripped out.)

Then, each of those can be built separately. Run:

```sh
$ make c_chapters
```

And in the `build/` directory, you'll get an executable for each chapter, like
`chap14_chunks`, etc. Likewise:

```sh
$ make java_chapters
```

This compiles the Java code to classfiles in `build/gen/` in a subdirectory for
each chapter.

## Testing

I have a full Lox test suite that I use to ensure the interpreters in the book
do what they're supposed to do. The test cases live in `test/`. The Dart
program `tool/bin/test.dart` is a test runner that runs each of those test
files on a Lox interpreter, parses the result, and validates that that the test
does what it's expected to do.

There are various interpreters you can run the tests against:

```sh
$ make test       # The final versions of clox and jlox.
$ make test_clox  # The final version of clox.
$ make test_jlox  # The final version of jlox.
$ make test_c     # Every chapter's version of clox.
$ make test_java  # Every chapter's version of jlox.
$ make test_all   # All of the above.
```

### Testing your implementation

You are welcome to use the test suite and the test runner to test your own Lox
implementation. The test runner is at `tool/bin/test.dart` and can be given a
custom interpreter executable to run using `--interpreter`. For example, if you
had an interpreter executable at `my_code/boblox`, you could test it like:

```sh
$ dart tool/bin/test.dart clox --interpreter my_code/boblox
```

You still need to tell it which suite of tests to run because that determines
the test expectations. If your interpreter should behave like jlox, use "jlox"
as the suite name. If it behaves like clox, use "clox". If your interpreter is
only complete up to the end of one of the chapters in the book, you can use
that chapter as the suite, like "chap10_functions". See the Makefile for the
names of all of the chapters.

If your interpreter needs other command line arguments passed to use, pass them
to the test runner using `--arguments` and it will forward to your interpreter.

## Repository Layout

*   `asset/` – Sass files and jinja2 templates used to generate the site.
*   `book/` - Markdown files for the text of each chapter.
*   `build/` - Intermediate files and other build output (except for the site
    itself) go here. Not committed to Git.
*   `c/` – Source code of clox, the interpreter written in C. Also contains an
    XCode project, if that's your thing.
*   `gen/` – Java source files generated by GenerateAst.java go here. Not
    committed.
*   `java/` – Source code of jlox, the interpreter written in Java.
*   `note/` – Various research, notes, TODOs, and other miscellanea.
*   `note/answers` – Sample answers for the challenges. No cheating!
*   `site/` – The final generated site. The contents of this directory directly
    mirror craftinginterpreters.com. Most content here is generated by build.py,
    but fonts, images, and JS only live here. Everything is committed, even the
    generated content.
*   `test/` – Test cases for the Lox implementations.
*   `tool/` – Dart package containing the build, test, and other scripts.


================================================
FILE: asset/index.scss
================================================
@import 'sass/shared';
@import 'sass/sign-up';

body, h1, h2, h3, h4, p, blockquote, code, ul, ol, dl, dd, img {
  margin: 0;
}

body {
  background: $dark url('image/background.png') top center / 100% auto no-repeat;
  color: #222;
  font: normal 16px/24px $serif;
}

a {
  color: $primary;
  text-decoration: none;

  border-bottom: solid 1px transparentize($light, 1.0);

  transition: color 0.2s ease,
              border-color 0.4s ease;
}

a:hover {
  color: $primary;
  border-bottom: solid 1px opacify($light, 1.0);
}

article {
  margin: 0 auto;
  padding: 0 0 12px 0;
  max-width: $col * 20;
  background: #fff;
}

header {
  margin: 0 0 $col 0;
  color: $warm-dark;
  background: $warm-5;
  border-bottom: solid 1px $warm-4;
}

main {
  margin: 0 $col;
}

img.header {
  display: block;
  width: 100%;
}

img.small {
  display: none;
}

div.intro {
  display: flex;

  blockquote {
    flex-basis: 40%;
    margin: 0 $col 0 0;
    font: italic 28px/42px $serif;
  }

  div.text {
    flex-basis: 60%;
    margin: 8px 0 24px 0;
  }
}

p + p {
  margin-top: 24px;
}

.format {
  margin: 0 -12px 24px -12px;
  padding: 12px 12px 8px 12px;
  height: 244px;

  box-sizing: border-box;
  background: $lighter;
  background-size: cover;
  background-position: left;

  color: #444;
  border-radius: 3px;
  font: normal 16px/24px $nav;

  h3 {
    margin: 0;
    padding: 0 0 4px 0;
    font: 600 16px/24px $nav;
    text-transform: uppercase;
    letter-spacing: 1px;
  }

  p {
    margin-bottom: 8px;
  }
}

.format.print, .format.pdf {
  background-position: right;
  text-align: right;
}

.format-info {
  display: inline-block;
  width: $col * 8;
  text-align: left;

  table {
    width: 100%;
    border-collapse: collapse;

    td + td {
      padding-left: 5px;
    }
  }
}

.format.print { background-image: url("image/format-print.jpg"); }
.format.ebook { background-image: url("image/format-ebook.jpg"); }
.format.pdf {   background-image: url("image/format-pdf.jpg"); }
.format.web {   background-image: url("image/format-web.jpg"); }

a.action {
  display: block;

  margin: 0 0 4px 0;
  padding: 4px 0;
  text-align: center;
  border-radius: 3px;
  background: $primary;

  transition: background-color 0.2s ease,
              color 0.2s ease;

  font: 400 17px/24px $nav;
  color: white;

  small {
    font-size: 14px;
    padding: 4px;
    color: hsla(0, 0, 100%, 0.7);
    transition: color 0.2s ease;
  }
}

a.action:hover {
  background-color: hsl(200, 85%, 55%);

  small {
    color: white;
  }
}

  h3 {
    font: italic 24px/24px $serif;
    margin: 12px 0;
  }

img.author {
  float: left;
  width: 240px;
  margin: 0 12px 0 -12px;
  padding: 12px;

  background: $warm-5;
  border-radius: 3px;
}

div.author {
  vertical-align: top;
  margin: 36px 0 0 240px + $col;
}

footer {
  position: relative;
  border-top: solid 1px $light;
  color: $gray-4;
  font: 400 15px $nav;
  text-align: center;
  margin: 12px 0 36px 0;
  padding-top: 48px;

  a, a:hover {
    border: none;
  }
}

@media only screen and (max-width: 700px) {
  main {
    margin: 0 24px;
  }

  header {
    margin-bottom: 24px;
  }

  img.big {
    display: none;
  }

  img.small {
    display: block;
  }

  div.intro {
    display: block;

    blockquote {
      display: block;
      font: italic 24px/36px $serif;
    }

    div.text {
      display: block;
      margin: 24px 0 24px 0;
    }
  }

  .format {
    margin-bottom: 12px;
    height: auto;
    background-blend-mode: lighten;
  }

  .format-info {
    display: block;
    width: 100%;
  }

  .format.print { background-color: #a6a29f; }
  .format.ebook { background-color: #97a2aa; }
  .format.pdf {   background-color: #cfccca; }
  .format.web {   background-color: #d6dbd3; }

  img.author {
    float: none;
  }

  div.author {
    margin: 0 0 0 0;
  }
}


================================================
FILE: asset/mustache/contents-nav.html
================================================
<h2><a href="#top"><small>&nbsp;</small> Table of Contents</a></h2>
<ul>
  <li><a href="#welcome"><small>I</small>Welcome</a></li>
  <li><a href="#a-tree-walk-interpreter"><small>II</small>A Tree-Walk Interpreter</a></li>
  <li><a href="#a-bytecode-virtual-machine"><small>III</small>A Bytecode Virtual Machine</a></li>
  <li><a href="#backmatter"><small>&#10087;</small>Backmatter</a></li>
</ul>
{{> prev-next }}


================================================
FILE: asset/mustache/contents-part.html
================================================
<h2><span class="num">{{ number }}.</span><a href="{{ file }}.html" name="{{ file }}">{{ title }}</a></h2>
<ul>
{{# chapters }}
  <li><span class="num">{{ number }}.</span><a href="{{ file }}.html">{{ title }}</a>
  </li>
  {{# design_note }}
  <li class="design-note">
  <span class="num">&nbsp;</span><a href="{{ file }}.html#design-note">Design Note: {{{ design_note }}}</a>
  </li>
  {{/ design_note }}
{{/ chapters }}
</ul>

================================================
FILE: asset/mustache/contents.html
================================================
{{> header }}

<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
    {{> contents-nav }}
  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
{{# has_prev }}
<a href="{{ prev_file }}.html" title="{{ prev }}" class="prev">←</a>
{{/ has_prev }}
{{# has_next }}
<a href="{{ next_file }}.html" title="{{ next }}" class="next">→</a>
{{/ has_next }}
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
    {{> contents-nav }}
  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="contents">

<h1 class="part">{{title}}</h1>

<div class="chapters">
  <div class="row">
    <div class="first">
    <h2><span class="num">&#10087;</span>Frontmatter</h2>
    <ul>
      <li><span class="num">&nbsp;</span><a href="dedication.html">Dedication</a></li>
      <li><span class="num">&nbsp;</span><a href="acknowledgements.html">Acknowledgements</a></li>
    </ul>

    {{# part_1 }}
      {{> contents-part }}
    {{/ part_1 }}
    {{# part_2 }}
      {{> contents-part }}
    {{/ part_2 }}
    </div>
    <div class="second">
    {{# part_3 }}
      {{> contents-part }}
    {{/ part_3 }}

    <h2><span class="num">&#10087;</span><a href="backmatter.html" name="backmatter">Backmatter</a></h2>
    <ul>
      <li><span class="num">A1.</span><a href="appendix-i.html">Appendix I: Lox Grammar</a></li>
      <li><span class="num">A2.</span><a href="appendix-ii.html">Appendix II: Generated Syntax Tree Classes</a></li>
    </ul>
    </div>
  </div>
</div>

<footer>
  <a href="{{ next_file }}.html" class="next">
    First {{ next_type }}: &ldquo;{{ next }}&rdquo; &rarr;
  </a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2020</a>
</footer>
</article>

{{> footer }}


================================================
FILE: asset/mustache/footer.html
================================================
</div>
</body>
</html>


================================================
FILE: asset/mustache/header.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>{{title}} &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->


================================================
FILE: asset/mustache/in_design.html
================================================
<chapter>
<chapter-number>{{ number }}</chapter-number>
<title>{{ title }}</title>
<part>{{ part }}</part>
{{{ body }}}
</chapter>


================================================
FILE: asset/mustache/index.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Crafting Interpreters</title>
<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="index.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>
<link rel="icon" type="image/png" href="image/favicon.png" />

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body>

<article>

<header>
  <a href="dedication.html"><img class="header big" src="image/header.png" alt="Crafting Interpreters by Robert Nystrom" /><img class="header small" src="image/header-small.png" alt="Crafting Interpreters by Robert Nystrom" /></a>
</header>

<main>

<div class="intro">

<blockquote><p>Ever wanted to make your own programming language or wondered how
they are designed and built?</p><p>If so, this book is for you.</p></blockquote>

<div class="text">

<p><em>Crafting Interpreters</em> contains everything you need to implement a
full-featured, efficient scripting language. You&#8217;ll learn both high-level
concepts around parsing and semantics and gritty details like bytecode
representation and garbage collection. Your brain will light up with new ideas,
and your hands will get dirty and calloused. It&#8217;s a blast.</p>

<p>Starting from <code>main()</code>, you build a language that features rich
syntax, dynamic typing, garbage collection, lexical scope, first-class
functions, closures, classes, and inheritance. All packed into a few thousand
lines of clean, fast code that you thoroughly understand because you write each
one yourself.</p>

<p>The book is available in four delectable formats:</p>

</div>

</div>

<div class="format print">
  <div class="format-info">
    <h3>Print</h3>
    <p>640 pages of beautiful typography and high resolution hand-drawn
    illustrations. Each page lovingly typeset by the author. The premiere reading
    experience.</p>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.amazon.com/dp/0990582930" target="_blank">Amazon<small>.com</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.ca/dp/0990582930" target="_blank"><small>.ca</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.uk/dp/0990582930" target="_blank"><small>.uk</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.au/dp/0990582930" target="_blank"><small>.au</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.de/dp/0990582930" target="_blank"><small>.de</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.fr/dp/0990582930" target="_blank"><small>.fr</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.es/dp/0990582930" target="_blank"><small>.es</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.it/dp/0990582930" target="_blank"><small>.it</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.jp/dp/0990582930" target="_blank"><small>.jp</small></a>
    </td>
    </tr>
    </table>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.barnesandnoble.com/w/crafting-interpreters-robert-nystrom/1139915245?ean=9780990582939" target="_blank">Barnes and Noble</a>
    </td>
    <td>
      <a class="action" href="https://www.bookdepository.com/Crafting-Interpreters-Robert-Nystrom/9780990582939" target="_blank">Book Depository</a>
    </td>
    </tr>
    </table>
    <a class="action" href="/sample.pdf" target="_blank">Download Sample <small>PDF</small></a>
  </div>
</div>
<div class="format ebook">
  <div class="format-info">
    <h3>eBook</h3>
    <p>Carefully tuned CSS fits itself to your ebook reader and screen size.
    Full-color syntax highlighting and live hyperlinks. Like Alan Kay's Dynabook
    but real.</p>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.amazon.com/dp/B09BCCVLCL" target="_blank">Kindle <small class="hide-small"><span class="hide-medium">Amazon</span>.com</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.uk/dp/B09BCCVLCL" target="_blank"><small>.uk</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.ca/dp/B09BCCVLCL" target="_blank"><small>.ca</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.au/dp/B09BCCVLCL" target="_blank"><small>.au</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.de/dp/B09BCCVLCL" target="_blank"><small>.de</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.in/dp/B09BCCVLCL" target="_blank"><small>.in</small></a>
    </td>
    </tr>
    </table>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.amazon.fr/dp/B09BCCVLCL" target="_blank"><small>.fr</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.es/dp/B09BCCVLCL" target="_blank"><small>.es</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.it/dp/B09BCCVLCL" target="_blank"><small>.it</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.jp/dp/B09BCCVLCL" target="_blank"><small>.jp</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.br/dp/B09BCCVLCL" target="_blank"><small>.br</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.mx/dp/B09BCCVLCL" target="_blank"><small>.mx</small></a>
    </td>
    <td>
      <a class="action" href="https://books.apple.com/us/book/crafting-interpreters/id1578795812" target="_blank">Apple Books</a>
    </td>
    </tr>
    </table>
    <table>
    <tr>
    <td>
      <a class="action" href="https://play.google.com/store/books/details?id=q0c6EAAAQBAJ" target="_blank">Play Books <small class="hide-small">Google</small></a>
    </td>
    <td>
      <a class="action" href="https://www.barnesandnoble.com/w/crafting-interpreters-robert-nystrom/1139915245?ean=2940164977092" target="_blank">Nook <small class="hide-small">B&amp;N</small></a>
    </td>
    <td>
      <a class="action" href="https://www.smashwords.com/books/view/1096463" target="_blank">EPUB <small class="hide-small">Smashwords</small></a>
    </td>
    </tr>
    </table>
  </div>
</div>
<div class="format pdf">
  <div class="format-info">
    <h3>PDF</h3>
    <p>Perfectly mirrors the hand-crafted typesetting and sharp illustrations of
    the print book, but much easier to carry around.</p>
    <a class="action" href="https://payhip.com/b/F0zkr" target="_blank">Buy from Payhip</a>
    <a class="action" href="/sample.pdf" target="_blank">Download Free Sample</a>
  </div>
</div>
<div class="format web">
  <div class="format-info">
    <h3>Web</h3>
    <p>Meticulous responsive design looks great from your desktop down to your
    phone. Every chapter, aside, and illustration is there. Read the whole book
    for free. Really.</p>
    <a class="action" href="contents.html">Read Now</a>
  </div>
</div>

<img src="image/dogshot.jpg" class="author" />

<div class="author">
<h3>About Robert Nystrom</h3>

<p>I got bitten by the language bug years ago while on paternity leave between
midnight feedings. I cobbled together a <a href="http://wren.io/"
target="_blank">number</a> <a href="http://magpie-lang.org/"
target="_blank">of</a> <a href="http://finch.stuffwithstuff.com/"
target="_blank">hobby</a> <a href="https://github.com/munificent/vigil"
target="_blank">languages</a> before worming my way into an honest-to-God,
full-time programming language job. Today, I work at Google on the <a
href="http://dart.dev/" target="_blank">Dart language</a>.</p>

<p>Before I fell in love with languages, I developed games at Electronic Arts
for eight years. I wrote the best-selling book <em><a
href="http://gameprogrammingpatterns.com/" target="_blank">Game Programming
Patterns</a></em> based on what I learned there. You can read that book for free
too.</p>

<p>If you want more, you can find me on Twitter (<a
href="https://twitter.com/intent/user?screen_name=munificentbob"
target="_blank"><code>@munificentbob</code></a>), email me at <code>bob</code>
at this site's domain (though I am slow to respond), read <a
href="http://journal.stuffwithstuff.com/" target="_blank">my blog</a>, or join
my low frequency mailing list:</p>

<div class="sign-up">
  <!-- Begin MailChimp Signup Form -->
  <div id="mc_embed_signup">
  <form action="//gameprogrammingpatterns.us7.list-manage.com/subscribe/post?u=0952ca43ed2536d6717766b88&amp;id=6e96334109" method="post" id="mc-embedded-subscribe-form" name="mc-embedded-subscribe-form" class="validate" target="_blank" novalidate>
    <input type="email" value="" name="EMAIL" class="email" id="mce-EMAIL" placeholder="Your email address" required>
    <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups -->
    <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_0952ca43ed2536d6717766b88_6e96334109" tabindex="-1" value=""></div>
    <input type="submit" value="Sign me up!" name="subscribe" id="mc-embedded-subscribe" class="button">
  </form>
  </div>
  <!--End mc_embed_signup -->
</div>

</div>

<footer>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</main>
</article>
</body>
</html>


================================================
FILE: asset/mustache/nav.html
================================================
{{# is_chapter }}
<h3><a href="#top">{{ title }}<small>{{ number }}</small></a></h3>

<ul>
  {{# sections }}
    <li><a href="#{{ anchor }}"><small>{{ number }}.{{ index }}</small> {{ name }}</a></li>
  {{/ sections }}
  {{# has_challenges_or_design_note }}
    <li class="divider"></li>
  {{/ has_challenges_or_design_note }}
  {{# has_challenges }}
    <li class="end-part"><a href="#challenges">Challenges</a></li>
  {{/ has_challenges }}
  {{# has_design_note }}
    <li class="end-part"><a href="#design-note"><small>note</small>{{ design_note }}</a></li>
  {{/ has_design_note }}
</ul>

{{/ is_chapter }}
{{# is_part }}
<h2><small>{{ number }}</small>{{ title }}</h2>

<ul>
  {{# chapters }}
    <li><a href="{{ file }}.html"><small>{{ number }}</small>{{ title }}</a></li>
  {{/ chapters }}
</ul>

{{/ is_part }}
{{# is_frontmatter }}
<h2><small>{{ number }}</small>{{ title }}</h2>
<hr>
{{/ is_frontmatter }}

{{> prev-next }}

================================================
FILE: asset/mustache/page.html
================================================
{{> header }}
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
{{> nav }}  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
{{# has_prev }}
<a href="{{ prev_file }}.html" title="{{ prev }}" class="prev">←</a>
{{/ has_prev }}
{{# has_next }}
<a href="{{ next_file }}.html" title="{{ next }}" class="next">→</a>
{{/ has_next }}
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
{{> nav }}
  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

{{# has_number }}
  <div class="number">{{ number }}</div>
{{/ has_number }}
{{# is_chapter }}
  <h1>{{ title }}</h1>
{{/ is_chapter }}
{{^ is_chapter }}
  <h1 class="part">{{ title }}</h1>
{{/ is_chapter }}

{{{ body }}}
<footer>
{{# has_next }}
<a href="{{ next_file }}.html" class="next">
  Next {{ next_type }}: &ldquo;{{ next }}&rdquo; &rarr;
</a>
{{/ has_next }}
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

{{> footer }}


================================================
FILE: asset/mustache/prev-next.html
================================================
<div class="prev-next">
  {{# has_prev }}
    <a href="{{ prev_file }}.html" title="{{ prev }}" class="left">&larr;&nbsp;Previous</a>
  {{/ has_prev }}
  {{# has_up }}
    <a href="{{ up_file }}.html" title="{{ up }}">&uarr;&nbsp;Up</a>
  {{/ has_up }}
  {{# has_next }}
    <a href="{{ next_file }}.html" title="{{ next }}" class="right">Next&nbsp;&rarr;</a>
  {{/ has_next }}
</div>

================================================
FILE: asset/sass/chapter.scss
================================================
article.chapter {
  h2 {
    font: 600 30px/24px $serif;
    margin: 69px 0 0 0;
    padding-bottom: 3px;

    small {
      font: 800 22px/24px $serif;
      float: right;
    }
  }

  h3 {
    font: italic 24px/24px $serif;
    margin: 71px 0 0 0;
    padding-bottom: 1px;

    small {
      font: 600 16px/24px $serif;
      float: right;
    }
  }

  h2 a, h3 a {
    color: #222;
    border-bottom: none;
  }

  h2 a:hover, h3 a:hover {
    border-bottom: none;
    color: inherit;
  }

  h2 a::before, h3 a::before {
    position: absolute;
    left: -$col;
    width: $col;
    content: "\000A7";
    color: #fff;
    transition: color 0.2s ease;
    text-align: center;
  }

  h2 a:hover::before, h3 a:hover::before {
    color: #ddd;
  }

  .challenges, .design-note {
    border-radius: 3px;
    padding: 12px;
    margin: -2px -12px 26px -12px;

    font: normal 16px/24px $nav;
    color: #444;

    h2 {
      margin: 0 0 -12px 0;
      padding: 0;
      font: 600 16px/24px $nav;
      text-transform: uppercase;
      letter-spacing: 1px;
    }

    h2 a {
      color: inherit;
    }

    h2 a::before {
      content: none;
    }

    ol {
      padding: 0 0 0 18px;

      li {
        padding: 0 0 0 6px;
        font-weight: 600;

        p {
          font-weight: 400;
        }
      }
    }

    pre {
      margin: 0;
    }

    // Chapter 23 has some blockquotes in the design note.
    > blockquote {
      p {
        margin: 0 24px;
        font: italic 16px/24px $nav;
        color: #444;
      }

      &::before, &::after {
        content: none;
      }
    }

    // Use the regular code colors in asides, and not the tinted versions used
    // inside the challenge or design notes boxes themselves.
    aside {
      code, .codehilite {
        color: $warm-dark;
        background: $warm-light;
      }
    }

    // Remove the extra padding at the bottom of the box.
    *:last-child {
      margin-bottom: 0;
    }
  }

  .challenges .codehilite,
  .design-note .codehilite {
    margin: -12px 0 -12px 0;
  }

  .challenges {
    background: $lighter;

    code, .codehilite {
      background: hsl(195, 30%, 92%);
    }
  }

  .design-note {
    background: hsl(80, 30%, 96%);

    code, .codehilite {
      background: hsl(80, 20%, 93%);
    }
  }

  table {
    width: 100%;
    border-collapse: collapse;

    thead {
      font: 700 15px $serif;
    }

    td {
      border-bottom: solid 1px $light;
      line-height: 22px;
      padding: 3px 0 0 0;
      margin: 0;
    }

    td + td {
      padding-left: 12px;
    }
  }
}

// Tablets and mobile go single column.
@media only screen and (max-width: $col * 20) {
  article.chapter {

    // Now that the asides are inline, make them match the challenge/design-note
    // colors and font.
    .challenges, .design-note {
      aside {
        font: normal 15px/24px $nav;
        padding-bottom: 4px;
      }
    }

    .challenges {
      aside {
        code, .codehilite {
          background: hsl(195, 30%, 92%);
        }
      }
    }

    .design-note {
      aside {
        code, .codehilite {
          background: hsl(80, 20%, 93%);
        }
      }
    }
  }
}

// Then bring the margins in some.
// The cut-off sizes here are based on trying to get 72 columns of code to fit.
@media only screen and (max-width: 630px) {
  article.chapter {
    h2 a::before, h3 a::before {
      left: -($col / 2);
      width: $col / 2;
    }
  }
}

// Finally start shrinking text.
@media only screen and (max-width: 580px) {
  article.chapter {
    h2 {
      margin-top: 64px;
      padding-bottom: 2px;
      font-size: 22px;
      line-height: 22px;
    }

    h3 {
      margin-top: 64px;
      padding-bottom: 0;
      font-size: 20px;
    }

    .challenges, .design-note {
      padding: 11px 11px 8px 11px;
      margin: 25px 0 0 0;

      font-size: 15px;
      line-height: 22px;

      code, .codehilite {
        font-size: 14px;
      }

      h2 {
        padding: 5px 0 4px 6px;
        font-size: 17px;
        line-height: 22px;
      }

      aside {
        line-height: 22px;
      }
    }
  }
}


================================================
FILE: asset/sass/contents.scss
================================================
article.contents {
  h2 {
    margin: 22px 0 6px 0;
    font: 600 normal 18px/24px $nav;
    text-transform: uppercase;
    letter-spacing: 1px;

    .num {
      display: inline-block;
      width: 36px;
    }
  }

  ul {
    margin: -12px 0 0 0;
    padding: 6px 0 14px 0;
  }

  li {
    padding: 12px 0 0 36px;
    font: normal 16px/24px $nav;
    color: $gray-4;

    list-style-type: none;

    .num {
      display: inline-block;
      letter-spacing: 1px;
      width: 36px;
    }

    a {
      font: 600 17px/24px $nav;
    }
  }

  li.design-note {
    padding-top: 0;

    a {
      font: 400 16px/23px $nav;
    }
  }

  // Format the chapter list in two columns.
  .chapters {
    display: table;
    width: $col * 18;
  }

  .row {
    display: table-row;
  }

  .first, .second {
    display: table-cell;
    vertical-align: top;
  }

  .second {
    padding-left: $col;
  }

  footer {
    width: $col * 18;
  }
}

// Go single-column with the chapter list.
@media only screen and (max-width: 1344px) {
  article.contents {
    .chapters, .row, .first, .second {
      display: block;
      width: auto;
    }

    .second {
      padding-left: 0;
    }

    footer {
      width: inherit;
    }
  }
}

// Then bring the margins in some.
@media only screen and (max-width: 630px) {
  article.contents {
    h2 .num, li .num {
      width: 28px;
    }

    ol, ul {
      margin-left: 0;
    }

    li {
      padding-left: 0;
    }
  }
}

// Finally start shrinking text.
@media only screen and (max-width: 580px) {
  article.contents {
    h2 {
      margin: 19px 0 6px 0;
      font-size: 17px;
      line-height: 22px;
    }

    h3 {
      padding: 1px 0 2px 0;
      font-size: 17px;
      line-height: 22px;
    }

    p {
      font-size: 15px;
      line-height: 22px;
    }

    ol, ul {
      padding-bottom: 8px;
    }

    li {
      font-size: 14px;
      line-height: 22px;
      padding: 4px 0 3px 0;
    }
  }
}


================================================
FILE: asset/sass/print.scss
================================================
@import 'shared';

@media print {
  // Pure black text.
  body, a, code {
    color: #000 !important;
    background: none !important;
  }

  // Hide non-content stuff.
  nav, .sign-up {
    display: none;
  }

  // Get rid of extra margins. The page margin will handle this.
  .page {
    margin: 0 !important;
  }

  // Tweak how code is formatted since we don't want to use a background color.
  .codehilite {
    pre {
      color: #000 !important;
    }

    margin: 0 !important;

    // Borders above and below and no background.
    background: none !important;
    border-radius: 0 !important;
    border-left: solid 1px $warm-4;
    border-right: solid 1px $warm-4;

    // Show thicker borders on the left and right instead of a background.
    .insert {
      border-left: solid 3px $warm-4 !important;
      border-right: solid 3px $warm-4 !important;
      background: none !important;
    }

    .delete {
      -webkit-print-color-adjust: exact;
      color-adjust: exact;
    }

    // Browsers don't honor the specific authored colors when printing if the
    // color is too close the background. Tell the browser not to do that.
    .insert-before span, .insert-after span {
      -webkit-print-color-adjust: exact;
      color-adjust: exact;
    }
  }
}


================================================
FILE: asset/sass/shared.scss
================================================
// Font stacks.
$serif:   "Crimson", Georgia, serif;
$mono:    "Source Code Pro", Menlo, Consolas, Monaco, monospace;
$nav:     "Source Sans Pro", sans-serif;

// The main intense primary accent color.
$primary:       hsl(200, 80%, 40%);
$primary-dark:  hsl(200, 100%, 20%);
$primary-light: hsl(200, 70%, 60%);

// A ramp of washed out blues from dark to light.
$dark:    hsl(215, 20%, 20%);
$gray-1:  hsl(212, 23%, 30%);
$gray-2:  hsl(209, 26%, 40%);
$gray-3:  hsl(206, 30%, 50%);
$gray-4:  hsl(203, 30%, 60%);
$light:   hsl(195, 30%, 90%);
$lighter: hsl(195, 35%, 95%);

// An opposing warm light color (code background).
$warm-dark:  hsl(40, 0%, 35%);
$warm-light: hsl(40, 30%, 97%);
$warm-1:     mix($warm-light, $warm-dark, 15%);
$warm-2:     mix($warm-light, $warm-dark, 40%);
$warm-3:     mix($warm-light, $warm-dark, 60%);
$warm-4:     mix($warm-light, $warm-dark, 80%);
$warm-5:     hsl(40, 20%, 95%);

// The full-size design is 28 units wide, in three columns:
// [][][][][][][][][][][][][][][][][][][][][][][][][][][][]
//   (   5    )    (          12          )  (    6     )
// They are asymmetric because the left column has a dark background, which
// requires a double margin.
$col: 48px;

@font-face {
  font-family: 'Crimson';
  src: url('font/crimson-roman.woff') format('woff');
}

@font-face {
  font-family: 'Crimson';
  src: url('font/crimson-italic.woff') format('woff');
  font-style: italic;
}

@font-face {
  font-family: 'Crimson';
  src: url('font/crimson-semibold.woff') format('woff');
  font-weight: 600;
}

@font-face {
  font-family: 'Crimson';
  src: url('font/crimson-semibolditalic.woff') format('woff');
  font-style: italic;
  font-weight: 600;
}

@font-face {
  font-family: 'Crimson';
  src: url('font/crimson-bold.woff') format('woff');
  font-weight: bold;
}

@font-face {
  font-family: 'Crimson';
  src: url('font/crimson-bolditalic.woff') format('woff');
  font-style: italic;
  font-weight: bold;
}

// Reset stuff.

body, h1, h2, h3, h4, p, blockquote, code, ul, ol, dl, dd, img {
  margin: 0;
}

img {
  outline: none;
}

img.arrow {
  width: auto;
  height: 11px;
}

img.dot {
  width: auto;
  height: 18px;
  vertical-align: text-bottom;
}

// Basic styles.

body {
  color: #222;
  font: normal 16px/24px $serif;
}


================================================
FILE: asset/sass/sign-up.scss
================================================
.sign-up {
  padding: 12px;
  margin: 24px 0 24px 0;
  background: hsl(40, 80%, 95%);
  color: hsl(40, 50%, 50%);
  border-radius: 3px;

  form {
    display: flex;
  }

  input {
    padding: 4px;
    font: 16px $nav;
    outline: none;
    border-radius: 3px;
    border: solid 2px hsl(40, 100%, 75%);
    color: hsl(40, 70%, 30%);
    height: 32px;
  }

  input.email {
    display: block;
    box-sizing: border-box;
    width: 100%;
  }

  input.button {
    margin-left: 8px;
    padding: 4px 8px;
    font: 600 13px $nav;
    text-transform: uppercase;
    letter-spacing: 1px;
    background: hsl(40, 100%, 60%);
    border: none;

  transition: background-color 0.2s ease;
  }

  input.button:hover {
    background: hsl(40, 100%, 75%);
  }

  input:focus {
    border-color: hsl(40, 100%, 50%);
  }
}

================================================
FILE: asset/style.scss
================================================
@import 'sass/shared';
@import 'sass/chapter';
@import 'sass/contents';
@import 'sass/sign-up';
@import 'sass/print';

// Make sure we don't split on the thin spaces around an em dash.
.emdash {
  white-space: nowrap;
}

.scrim {
  position: absolute;
  width: 100%;
  height: 10000px;

  z-index: 4;

  // background: url('columns.png');
  background: url('rows.png');
}

// Used for drawing the bitwise operators "AND", "OR", and "NOT" in small caps.
.small-caps {
  font-weight: 600;
  font-size: 13px;
}

a {
  color: $primary;
  text-decoration: none;

  border-bottom: solid 1px transparentize($light, 1.0);

  transition: color 0.2s ease,
              border-color 0.4s ease;
}

a:hover {
  color: $primary;
  border-bottom: solid 1px opacify($light, 1.0);
}

nav {
  font: 300 15px/24px $nav;
  background: $dark;
  color: $gray-2;

  a, h2 a {
    color: $gray-4;
    text-decoration: none;
    border-bottom: none;
  }

  a:hover {
    color: $light;
    text-decoration: none;
    border-bottom: none;
  }

  img {
    box-sizing: border-box;
    width: 100%;
    padding: 55px $col 23px $col;
  }

  h2 {
    font: 400 16px/24px $nav;
    text-transform: uppercase;
    letter-spacing: 1px;
    color: $gray-4;
  }

  h3 {
    font: 400 18px/24px $nav;
    color: $gray-4;
  }

  h2 small, h3 small {
    float: right;
    font-size: 16px;
    color: $gray-2;
  }

  ol, ul {
    margin: 6px 0 3px 0;
    padding: 6px 0 4px 24px;
    border-top: solid 1px $gray-1;
    border-bottom: solid 1px $gray-1;
  }

  ul {
    list-style-type: none;
    padding-left: 0;
  }

  hr {
    border: none;
    border-top: solid 1px $gray-1;
    margin: 6px 0 0 0;
    padding: 0 0 3px 0;
  }

  li small {
    float: right;
    font-size: 14px;
    color: $gray-2;
  }

  li.divider {
    margin: 5px 0 7px 0;
    border-top: solid 1px $gray-1;
  }

  li.end-part {
    font-size: 12px;
    font-weight: 400;
    text-transform: uppercase;
    letter-spacing: 1px;

    small {
      font-weight: 300;
      text-transform: none;
      letter-spacing: 0;
    }
  }

  .prev-next {
    padding-top: 7px;
    font: 400 12px/18px $nav;
    text-align: center;
    text-transform: uppercase;
    letter-spacing: 1px;
  }
}

nav.wide {
  position: fixed;
  width: $col * 7;
  height: 100%;

  .contents {
    margin: 24px $col;
  }
}

// This is needed to make the nav fixed (not scrolling with the content) but
// still positioned horizontally based on the page.
// See: http://stackoverflow.com/a/11833892/9457
.nav-wrapper {
  position: absolute;
  right: $col * 6;
}

// For medium-sized screens, the navigation floats over the same column as the
// asides.
nav.floating {
  // Only shown on narrower screens.
  display: none;

  z-index: 2;
  position: absolute;
  width: $col * 6;

  border-bottom-left-radius: 3px;
  border-bottom-right-radius: 3px;

  #expand-nav {
    padding: 0 0 4px 0;
    display: block;
    font-size: 20px;
    text-align: center;
    color: $gray-2;
    cursor: pointer;

    transition: padding 0.2s ease,
                margin 0.2s ease,
                color 0.2s ease;
  }

  #expand-nav, #expand-nav:hover {
    border-bottom: none;
  }

  #expand-nav:hover {
    color: $light;
  }

  .expandable {
    overflow: hidden;
    padding: 0 12px;

    // Using max-height instead of height to allow the list to navigation to
    // automatically choose its height based on the size of the list while
    // still transitioning.
    // See: http://stackoverflow.com/a/8331169/9457
    max-height: 0;
    transition: margin 0.2s ease,
                max-height 1.0s ease;

    .prev-next {
      padding-bottom: 6px;
    }
  }

  .expandable.shown {
    // This should be as small as possible while still being large enough for
    // the worst case chapter.
    max-height: 550px;
  }

  img {
    padding: 110px $col/2 23px $col/2;
  }
}

nav.floating.pinned {
  position: fixed;

  top: -85px;

  .expandable {
    margin-top: -13px;
  }

  #expand-nav {
    margin-top: -14px;
  }
}

nav.narrow {
  display: none;

  text-align: center;

  img {
    box-sizing: content-box;
    padding: 11px 0 3px 0;
    width: auto;
    height: 27px;
  }

  .prev, .next {
    font-size: 32px;
    position: absolute;
    top: 12px;
    padding: 0 $col;
  }

  .prev {
    left: 0;
  }

  .next {
    right: 0;
  }
}

.left {
  float: left;
}

.right {
  float: right;
}

.page {
  position: relative;

  width: $col * 19;
  margin: 0 auto 0 $col * 8;
}

// Make em dashes look pretty. Goals:
//
// - Add a tiny bit of space on either side. Completely unspaced em dashes
//   look too tight to me.
// - Allow an em dash at the end of a line.
// - Prevent an em dash at the beginning of a line.
//
// Wrapping each `&mdash;` in a span with this class and consuming the
// preceding whitespace seems to accomplish that.
.em {
  padding: 0 .1em;
  white-space: nowrap;
}

// Make ellipses follow Chicago style. The `&hellip;` entity puts a tiny amount
// of space between each `.`, but not as much as Chicago style specificies. It
// also doesn't put any space before. Instead, the build system writes a span
// of this class with thin-space separated dots. This class here ensures there
// is no splitting between the dots.
.ellipse {
  white-space: nowrap;
}

code {
  font: normal 16px $mono;
  color: $warm-1;
  white-space: nowrap;
  padding: 2px;
}

strong code {
  font-weight: bold;
  color: inherit;
}

a code {
  color: $primary;
}

.codehilite {
  color: $warm-dark;
  background: $warm-light;
  border-radius: 3px;
  padding: 12px;
  margin: -12px;
}

pre {
  font: normal 13px/20px $mono;
  margin: 0;
  padding: 0;

  // If the code doesn't fit, just force it to wrap instead of cropping it. It
  // doesn't look great, but it ensures the code is visible and can be correctly
  // copy-pasted.
  white-space: pre-wrap;
  overflow-wrap: anywhere;
}

// If the chapter ends with code, don't overlap the challenges box.
div.codehilite + div.challenges {
  margin-top: $col / 2;
}

article {
  position: relative;
  width: $col * 12;

  h1 {
    position: relative;
    font: 48px/48px $serif;
    padding: 109px 0 19px 0;
    z-index: 2;
  }

  h1.part {
    font: 600 36px/48px $nav;
    padding: 108px 0 20px 0;
    text-transform: uppercase;
    letter-spacing: 1px;
  }

  .number {
    position: absolute;
    top: 50px;
    left: $col * 13;

    z-index: 1;

    font: 300 96px $nav;
    color: $light;
  }

  p {
    margin: 24px 0;
  }

  ol, ul {
    margin: 24px 0;
    padding: 0 0 0 24px;
  }

  img {
    max-width: 100%;
  }

  img.wide {
    max-width: none;
    width: $col * 19;
  }
}

aside {
  position: absolute;
  right: -$col * 7;
  width: $col * 6;

  font: normal 14px/20px $serif;

  border-top: solid 1px $light;

  p {
    margin: 20px 0;
  }

  p:first-child,
  img:first-child {
    margin-top: 4px;
  }

  p:last-child {
    margin-bottom: 4px;
  }

  code {
    font-size: 14px;
    border-radius: 2px;
    padding: 1px 2px;
  }

  .codehilite {
    padding: 6px;
    margin: -12px 0;
  }

  .codehilite:last-child {
    margin-bottom: 4px;
  }

  img.above {
    position: absolute;
    bottom: 100%;
    margin-bottom: 16px;
  }

  blockquote {
    margin: 20px 0;

    &::before, &::after {
      content: none;
    }

    p {
      margin: 0 12px;
      font: italic 15px/20px $serif;
      color: inherit;
    }
  }
}

// Sometimes there isn't room to hang the aside *down* next to the content it's
// annotating, so support asides where the bottom is aligned with the content.
aside.bottom {
  border-top: none;
  border-bottom: solid 1px $light;
}

blockquote {
  position: relative;

  margin: 29px 0 31px 0;

  &::before, &::after {
    position: absolute;
    top: -20px;
    font: italic 72px $serif;
    color: $light;
  }

  &::before {
    content: "\201c";
    left: -7px;
  }

  &::after {
    content: "\201d";
    right: 8px;
  }

  p {
    margin: 0 $col;
    font: italic 24px/36px $serif;
    color: $gray-3;
  
    em {
      font-style: normal;
    }
  }

  cite {
    display: block;
    text-align: right;
    color: $gray-4;
    font-style: normal;
    font-size: 18px;

    &::before {
      content: "\2014\00a0";
      color: $light;
    }

    em {
      font-style: italic;
    }
  }
}

footer {
  position: relative;
  border-top: solid 1px $light;
  color: $gray-4;
  font: 400 15px $nav;
  text-align: center;
  margin: 48px 0;
  padding-top: 48px;

  a, a:hover {
    border: none;
  }

  .next {
    position: absolute;
    right: 0;
    top: -13px;

    padding-left: 4px;
    background: #fff;

    font: 400 17px/24px $nav;
    text-transform: uppercase;
    letter-spacing: 1px;
  }

  .next:hover {
    color: $primary-dark;
    border: none;
  }
}

.dedication {
  margin: 96px 0 128px 0;
  text-align: center;

  img {
    width: 50%;
  }
}

.source-file, .source-file-narrow {
  font: normal 11px/16px $mono;
  color: $warm-3;

  em {
    color: $warm-2;
    font-style: normal;
  }
}

.source-file-narrow {
  // Don't show unless in single-column.
  display: none;

  margin: 0px -12px 0 0;
  padding: 14px 0 0 0;
  text-align: right;
}

.source-file {
  position: absolute;
  right: -$col * 7;
  width: $col * 6;
  padding: 2px 0 0 0;

  &::before {
    content: "<<";
    color: $warm-4;
    position: absolute;
    left: -($col - 12px);
    width: $col - 12px;
    text-align: center;
  }
}

// Syntax highlighting.
.codehilite {
  pre { color: mix($warm-light, $warm-dark, 20%); }

  .k { color: hsl(200, 100%,  45%); }               // Keyword.
  .n { color: hsl( 20,  70%,  55%); }               // Number.
  .s { color: hsl( 40,  70%,  45%); }               // String.
  .e { color: hsl( 45,  80%,  55%); }               // String escape.
  .c { color: mix($warm-light, $warm-dark, 50%); }  // Comment.
  .a { color: hsl(270,  50%,  60%); }               // Preprocessor, annotation.
  .i { color: hsl(200,  70%,  35%); }               // Identifier.
  .t { color: hsl(185, 100%,  35%); }               // Type name.

  .insert {
    margin: -2px -12px;
    padding: 2px 10px;
    border-left: solid 2px $warm-4;
    border-right: solid 2px $warm-4;
    background: $warm-5;
  }

  .delete {
    margin: -2px -12px;
    padding: 2px 10px;
    border-left: solid 2px $warm-4;
    border-right: solid 2px $warm-4;
    // Hatched lines.
    background: repeating-linear-gradient(
      -45deg,
      $warm-4,
      $warm-4 1px,
      rgba(0, 0, 0, 0.0) 1px,
      rgba(0, 0, 0, 0.0) 6px
    );

    span {
      color: $warm-3;
    }
  }

  // Snippets of code before and after real code to show where to insert it.
  .insert-before, .insert-after {
    color: $warm-3;
  }

  // When we just add a trailing comma to a line, highlight it specially.
  .insert-before .insert-comma {
    margin: -2px -1px;
    padding: 2px 1px;
    border-radius: 2px;

    background: $warm-5;
    color: $warm-dark;
  }
}

// On a not-entirely-large screen, don't show the fixed nav on the left.
@media only screen and (max-width: 1344px) {
  nav.wide { display: none; }
  nav.floating { display: block; }

  body {
    margin: 0 24px;
  }

  .page {
    position: relative;
    width: inherit;
    max-width: $col * 19;
    margin: 0 auto;
  }

  article {
    width: inherit;
    margin-right: $col * 7;

    // Move the number over to not be hidden behind the navigation.
    .number {
      top: 73px;
      left: inherit;
      right: 0;
      font-size: 72px;
    }

    h1 {
      padding: 110px 0 18px 0;
      font-size: 44px;
    }
  }
}

// Tablets and mobile go single column.
@media only screen and (max-width: $col * 20) {
  body {
    margin: 0;
  }

  nav.floating {
    display: none;
  }

  nav.narrow {
    display: block;
  }

  .page {
    margin: 0 $col;
    width: inherit;
  }

  article {
    margin: 0;

    // Size wide images to fit inside the column again.
    img.wide {
      width: inherit;
      max-width: 100%;
    }
  }

  aside {
    position: inherit;
    right: inherit;
    width: inherit;

    border-bottom: solid 1px $light;

    p:first-child {
      margin-top: 8px;
    }

    p:last-child {
      margin-bottom: 8px;
    }

    // If an aside ends with code (like in "classes.html"), then make sure we
    // give it some margin.
    div.codehilite:last-child {
      margin-bottom: 12px;
    }

    // Make sure aside images don't get too big when the asides are inlined
    // in single column mode.
    img {
      display: block;
      max-width: $col * 6;
      margin: 0 auto;
    }

    img.above {
      position: relative;
    }
  }

  // If aside is right before a code block (when the asides are inline), make
  // sure they don't overlap.
  aside + div.codehilite {
    margin-top: 12px;
  }

  div.codehilite + aside {
    margin-top: 24px;
  }

  .source-file {
    display: none;
  }

  .source-file-narrow {
    display: block;
  }
}

// Then bring the margins in some.
// The cut-off sizes here are based on trying to get 72 columns of code to fit.
@media only screen and (max-width: 630px) {
  .page {
    margin: 0 $col / 2;
    width: inherit;
  }

  nav.narrow {
    .prev, .next {
      padding: 0 $col / 2;
    }
  }
}

// Finally, shrink the grid to 22px and shrink the text.
@media only screen and (max-width: 580px) {
  body {
    font-size: 15px;
    line-height: 22px;
  }

  .small-caps {
    font-size: 12px;
  }

  .scrim {
    background: url('rows-22.png');
  }

  nav.narrow {
    img {
      padding: 9px 0 1px 0;
      height: 27px;
    }

    .prev, .next {
      top: 11px;
    }
  }

  article {
    h1 {
      font-size: 36px;
      padding: 100px 0 14px 0;
    }

    h1.part {
      font-size: 30px;
      padding: 97px 0 17px 0;
    }

    .number {
      top: 61px;
      font-size: 72px;
    }

    p {
      margin: 22px 0;
    }

    ol, ul {
      margin: 22px 0;
      padding: 0 0 0 22px;
    }
  }

  blockquote {
    margin: 27px 0 28px 0;

    &::before, &::after {
      top: -17px;
      font-size: 52px;
    }

    p {
      margin: 0 22px;
      font-size: 20px;
      line-height: 33px;
    }
  }

  footer {
    .next {
      font-size: 15px;
    }
  }
}


================================================
FILE: book/a-bytecode-virtual-machine.md
================================================
Our Java interpreter, jlox, taught us many of the fundamentals of programming
languages, but we still have much to learn. First, if you run any interesting
Lox programs in jlox, you'll discover it's achingly slow. The style of
interpretation it uses -- walking the AST directly -- is good enough for *some*
real-world uses, but leaves a lot to be desired for a general-purpose scripting
language.

Also, we implicitly rely on runtime features of the JVM itself. We take for
granted that things like `instanceof` in Java work *somehow*. And we never for a
second worry about memory management because the JVM's garbage collector takes
care of it for us.

When we were focused on high-level concepts, it was fine to gloss over those.
But now that we know our way around an interpreter, it's time to dig down to
those lower layers and build our own virtual machine from scratch using nothing
more than the C standard library...


================================================
FILE: book/a-map-of-the-territory.md
================================================
> You must have a map, no matter how rough. Otherwise you wander all over the
> place. In *The Lord of the Rings* I never made anyone go farther than he could
> on a given day.
>
> <cite>J. R. R. Tolkien</cite>

We don't want to wander all over the place, so before we set off, let's scan
the territory charted by previous language implementers. It will help us
understand where we are going and the alternate routes others have taken.

First, let me establish a shorthand. Much of this book is about a language's
*implementation*, which is distinct from the *language itself* in some sort of
Platonic ideal form. Things like "stack", "bytecode", and "recursive descent",
are nuts and bolts one particular implementation might use. From the user's
perspective, as long as the resulting contraption faithfully follows the
language's specification, it's all implementation detail.

We're going to spend a lot of time on those details, so if I have to write
"language *implementation*" every single time I mention them, I'll wear my
fingers off. Instead, I'll use "language" to refer to either a language or an
implementation of it, or both, unless the distinction matters.

## The Parts of a Language

Engineers have been building programming languages since the Dark Ages of
computing. As soon as we could talk to computers, we discovered doing so was too
hard, and we enlisted their help. I find it fascinating that even though today's
machines are literally a million times faster and have orders of magnitude more
storage, the way we build programming languages is virtually unchanged.

Though the area explored by language designers is vast, the trails they've
carved through it are <span name="dead">few</span>. Not every language takes the
exact same path -- some take a shortcut or two -- but otherwise they are
reassuringly similar, from Rear Admiral Grace Hopper's first COBOL compiler all
the way to some hot, new, transpile-to-JavaScript language whose "documentation"
consists entirely of a single, poorly edited README in a Git repository
somewhere.

<aside name="dead">

There are certainly dead ends, sad little cul-de-sacs of CS papers with zero
citations and now-forgotten optimizations that only made sense when memory was
measured in individual bytes.

</aside>

I visualize the network of paths an implementation may choose as climbing a
mountain. You start off at the bottom with the program as raw source text,
literally just a string of characters. Each phase analyzes the program and
transforms it to some higher-level representation where the semantics -- what
the author wants the computer to do -- become more apparent.

Eventually we reach the peak. We have a bird's-eye view of the user's program
and can see what their code *means*. We begin our descent down the other side of
the mountain. We transform this highest-level representation down to
successively lower-level forms to get closer and closer to something we know how
to make the CPU actually execute.

<img src="image/a-map-of-the-territory/mountain.png" alt="The branching paths a language may take over the mountain." class="wide" />

Let's trace through each of those trails and points of interest. Our journey
begins on the left with the bare text of the user's source code:

<img src="image/a-map-of-the-territory/string.png" alt="var average = (min + max) / 2;" />

### Scanning

The first step is **scanning**, also known as **lexing**, or (if you're trying
to impress someone) **lexical analysis**. They all mean pretty much the same
thing. I like "lexing" because it sounds like something an evil supervillain
would do, but I'll use "scanning" because it seems to be marginally more
commonplace.

A **scanner** (or **lexer**) takes in the linear stream of characters and chunks
them together into a series of something more akin to <span
name="word">"words"</span>. In programming languages, each of these words is
called a **token**. Some tokens are single characters, like `(` and `,`. Others
may be several characters long, like numbers (`123`), string literals (`"hi!"`),
and identifiers (`min`).

<aside name="word">

"Lexical" comes from the Greek root "lex", meaning "word".

</aside>

Some characters in a source file don't actually mean anything. Whitespace is
often insignificant, and comments, by definition, are ignored by the language.
The scanner usually discards these, leaving a clean sequence of meaningful
tokens.

<img src="image/a-map-of-the-territory/tokens.png" alt="[var] [average] [=] [(] [min] [+] [max] [)] [/] [2] [;]" />

### Parsing

The next step is **parsing**. This is where our syntax gets a **grammar** -- the
ability to compose larger expressions and statements out of smaller parts. Did
you ever diagram sentences in English class? If so, you've done what a parser
does, except that English has thousands and thousands of "keywords" and an
overflowing cornucopia of ambiguity. Programming languages are much simpler.

A **parser** takes the flat sequence of tokens and builds a tree structure that
mirrors the nested nature of the grammar. These trees have a couple of different
names -- **parse tree** or **abstract syntax tree** -- depending on how
close to the bare syntactic structure of the source language they are. In
practice, language hackers usually call them **syntax trees**, **ASTs**, or
often just **trees**.

<img src="image/a-map-of-the-territory/ast.png" alt="An abstract syntax tree." />

Parsing has a long, rich history in computer science that is closely tied to the
artificial intelligence community. Many of the techniques used today to parse
programming languages were originally conceived to parse *human* languages by AI
researchers who were trying to get computers to talk to us.

It turns out human languages were too messy for the rigid grammars those parsers
could handle, but they were a perfect fit for the simpler artificial grammars of
programming languages. Alas, we flawed humans still manage to use those simple
grammars incorrectly, so the parser's job also includes letting us know when we
do by reporting **syntax errors**.

### Static analysis

The first two stages are pretty similar across all implementations. Now, the
individual characteristics of each language start coming into play. At this
point, we know the syntactic structure of the code -- things like which
expressions are nested in which -- but we don't know much more than that.

In an expression like `a + b`, we know we are adding `a` and `b`, but we don't
know what those names refer to. Are they local variables? Global? Where are they
defined?

The first bit of analysis that most languages do is called **binding** or
**resolution**. For each **identifier**, we find out where that name is defined
and wire the two together. This is where **scope** comes into play -- the region
of source code where a certain name can be used to refer to a certain
declaration.

If the language is <span name="type">statically typed</span>, this is when we
type check. Once we know where `a` and `b` are declared, we can also figure out
their types. Then if those types don't support being added to each other, we
report a **type error**.

<aside name="type">

The language we'll build in this book is dynamically typed, so it will do its
type checking later, at runtime.

</aside>

Take a deep breath. We have attained the summit of the mountain and a sweeping
view of the user's program. All this semantic insight that is visible to us from
analysis needs to be stored somewhere. There are a few places we can squirrel it
away:

* Often, it gets stored right back as **attributes** on the syntax tree
  itself -- extra fields in the nodes that aren't initialized during parsing
  but get filled in later.

* Other times, we may store data in a lookup table off to the side. Typically,
  the keys to this table are identifiers -- names of variables and declarations.
  In that case, we call it a **symbol table** and the values it associates with
  each key tell us what that identifier refers to.

* The most powerful bookkeeping tool is to transform the tree into an entirely
  new data structure that more directly expresses the semantics of the code.
  That's the next section.

Everything up to this point is considered the **front end** of the
implementation. You might guess everything after this is the **back end**, but
no. Back in the days of yore when "front end" and "back end" were coined,
compilers were much simpler. Later researchers invented new phases to stuff
between the two halves. Rather than discard the old terms, William Wulf and
company lumped those new phases into the charming but spatially paradoxical name
**middle end**.

### Intermediate representations

You can think of the compiler as a pipeline where each stage's job is to
organize the data representing the user's code in a way that makes the next
stage simpler to implement. The front end of the pipeline is specific to the
source language the program is written in. The back end is concerned with the
final architecture where the program will run.

In the middle, the code may be stored in some <span name="ir">**intermediate
representation**</span> (**IR**) that isn't tightly tied to either the source or
destination forms (hence "intermediate"). Instead, the IR acts as an interface
between these two languages.

<aside name="ir">

There are a few well-established styles of IRs out there. Hit your search engine
of choice and look for "control flow graph", "static single-assignment",
"continuation-passing style", and "three-address code".

</aside>

This lets you support multiple source languages and target platforms with less
effort. Say you want to implement Pascal, C, and Fortran compilers, and you want
to target x86, ARM, and, I dunno, SPARC. Normally, that means you're signing up
to write *nine* full compilers: Pascal&rarr;x86, C&rarr;ARM, and every other
combination.

A <span name="gcc">shared</span> intermediate representation reduces that
dramatically. You write *one* front end for each source language that produces
the IR. Then *one* back end for each target architecture. Now you can mix and
match those to get every combination.

<aside name="gcc">

If you've ever wondered how [GCC][] supports so many crazy languages and
architectures, like Modula-3 on Motorola 68k, now you know. Language front ends
target one of a handful of IRs, mainly [GIMPLE][] and [RTL][]. Target back ends
like the one for 68k then take those IRs and produce native code.

[gcc]: https://en.wikipedia.org/wiki/GNU_Compiler_Collection
[gimple]: https://gcc.gnu.org/onlinedocs/gccint/GIMPLE.html
[rtl]: https://gcc.gnu.org/onlinedocs/gccint/RTL.html

</aside>

There's another big reason we might want to transform the code into a form that
makes the semantics more apparent...

### Optimization

Once we understand what the user's program means, we are free to swap it out
with a different program that has the *same semantics* but implements them more
efficiently -- we can **optimize** it.

A simple example is **constant folding**: if some expression always evaluates to
the exact same value, we can do the evaluation at compile time and replace the
code for the expression with its result. If the user typed in this:

```java
pennyArea = 3.14159 * (0.75 / 2) * (0.75 / 2);
```

we could do all of that arithmetic in the compiler and change the code to:

```java
pennyArea = 0.4417860938;
```

Optimization is a huge part of the programming language business. Many language
hackers spend their entire careers here, squeezing every drop of performance
they can out of their compilers to get their benchmarks a fraction of a percent
faster. It can become a sort of obsession.

We're mostly going to <span name="rathole">hop over that rathole</span> in this
book. Many successful languages have surprisingly few compile-time
optimizations. For example, Lua and CPython generate relatively unoptimized
code, and focus most of their performance effort on the runtime.

<aside name="rathole">

If you can't resist poking your foot into that hole, some keywords to get you
started are "constant propagation", "common subexpression elimination", "loop
invariant code motion", "global value numbering", "strength reduction", "scalar
replacement of aggregates", "dead code elimination", and "loop unrolling".

</aside>

### Code generation

We have applied all of the optimizations we can think of to the user's program.
The last step is converting it to a form the machine can actually run. In other
words, **generating code** (or **code gen**), where "code" here usually refers to
the kind of primitive assembly-like instructions a CPU runs and not the kind of
"source code" a human might want to read.

Finally, we are in the **back end**, descending the other side of the mountain.
From here on out, our representation of the code becomes more and more
primitive, like evolution run in reverse, as we get closer to something our
simple-minded machine can understand.

We have a decision to make. Do we generate instructions for a real CPU or a
virtual one? If we generate real machine code, we get an executable that the OS
can load directly onto the chip. Native code is lightning fast, but generating
it is a lot of work. Today's architectures have piles of instructions, complex
pipelines, and enough <span name="aad">historical baggage</span> to fill a 747's
luggage bay.

Speaking the chip's language also means your compiler is tied to a specific
architecture. If your compiler targets [x86][] machine code, it's not going to
run on an [ARM][] device. All the way back in the '60s, during the
Cambrian explosion of computer architectures, that lack of portability was a
real obstacle.

<aside name="aad">

For example, the [AAD][] ("ASCII Adjust AX Before Division") instruction lets
you perform division, which sounds useful. Except that instruction takes, as
operands, two binary-coded decimal digits packed into a single 16-bit register.
When was the last time *you* needed BCD on a 16-bit machine?

[aad]: http://www.felixcloutier.com/x86/AAD.html

</aside>

[x86]: https://en.wikipedia.org/wiki/X86
[arm]: https://en.wikipedia.org/wiki/ARM_architecture

To get around that, hackers like Martin Richards and Niklaus Wirth, of BCPL and
Pascal fame, respectively, made their compilers produce *virtual* machine code.
Instead of instructions for some real chip, they produced code for a
hypothetical, idealized machine. Wirth called this **p-code** for *portable*,
but today, we generally call it **bytecode** because each instruction is often a
single byte long.

These synthetic instructions are designed to map a little more closely to the
language's semantics, and not be so tied to the peculiarities of any one
computer architecture and its accumulated historical cruft. You can think of it
like a dense, binary encoding of the language's low-level operations.

### Virtual machine

If your compiler produces bytecode, your work isn't over once that's done. Since
there is no chip that speaks that bytecode, it's your job to translate. Again,
you have two options. You can write a little mini-compiler for each target
architecture that converts the bytecode to native code for that machine. You
still have to do work for <span name="shared">each</span> chip you support, but
this last stage is pretty simple and you get to reuse the rest of the compiler
pipeline across all of the machines you support. You're basically using your
bytecode as an intermediate representation.

<aside name="shared" class="bottom">

The basic principle here is that the farther down the pipeline you push the
architecture-specific work, the more of the earlier phases you can share across
architectures.

There is a tension, though. Many optimizations, like register allocation and
instruction selection, work best when they know the strengths and capabilities
of a specific chip. Figuring out which parts of your compiler can be shared and
which should be target-specific is an art.

</aside>

Or you can write a <span name="vm">**virtual machine**</span> (**VM**), a
program that emulates a hypothetical chip supporting your virtual architecture
at runtime. Running bytecode in a VM is slower than translating it to native
code ahead of time because every instruction must be simulated at runtime each
time it executes. In return, you get simplicity and portability. Implement your
VM in, say, C, and you can run your language on any platform that has a C
compiler. This is how the second interpreter we build in this book works.

<aside name="vm">

The term "virtual machine" also refers to a different kind of abstraction. A
**system virtual machine** emulates an entire hardware platform and operating
system in software. This is how you can play Windows games on your Linux
machine, and how cloud providers give customers the user experience of
controlling their own "server" without needing to physically allocate separate
computers for each user.

The kind of VMs we'll talk about in this book are **language virtual machines**
or **process virtual machines** if you want to be unambiguous.

</aside>

### Runtime

We have finally hammered the user's program into a form that we can execute. The
last step is running it. If we compiled it to machine code, we simply tell the
operating system to load the executable and off it goes. If we compiled it to
bytecode, we need to start up the VM and load the program into that.

In both cases, for all but the basest of low-level languages, we usually need
some services that our language provides while the program is running. For
example, if the language automatically manages memory, we need a garbage
collector going in order to reclaim unused bits. If our language supports
"instance of" tests so you can see what kind of object you have, then we need
some representation to keep track of the type of each object during execution.

All of this stuff is going at runtime, so it's called, appropriately, the
**runtime**. In a fully compiled language, the code implementing the runtime
gets inserted directly into the resulting executable. In, say, [Go][], each
compiled application has its own copy of Go's runtime directly embedded in it.
If the language is run inside an interpreter or VM, then the runtime lives
there. This is how most implementations of languages like Java, Python, and
JavaScript work.

[go]: https://golang.org/

## Shortcuts and Alternate Routes

That's the long path covering every possible phase you might implement. Many
languages do walk the entire route, but there are a few shortcuts and alternate
paths.

### Single-pass compilers

Some simple compilers interleave parsing, analysis, and code generation so that
they produce output code directly in the parser, without ever allocating any
syntax trees or other IRs. These <span name="sdt">**single-pass
compilers**</span> restrict the design of the language. You have no intermediate
data structures to store global information about the program, and you don't
revisit any previously parsed part of the code. That means as soon as you see
some expression, you need to know enough to correctly compile it.

<aside name="sdt">

[**Syntax-directed translation**][pass] is a structured technique for building
these all-at-once compilers. You associate an *action* with each piece of the
grammar, usually one that generates output code. Then, whenever the parser
matches that chunk of syntax, it executes the action, building up the target
code one rule at a time.

[pass]: https://en.wikipedia.org/wiki/Syntax-directed_translation

</aside>

Pascal and C were designed around this limitation. At the time, memory was so
precious that a compiler might not even be able to hold an entire *source file*
in memory, much less the whole program. This is why Pascal's grammar requires
type declarations to appear first in a block. It's why in C you can't call a
function above the code that defines it unless you have an explicit forward
declaration that tells the compiler what it needs to know to generate code for a
call to the later function.

### Tree-walk interpreters

Some programming languages begin executing code right after parsing it to an AST
(with maybe a bit of static analysis applied). To run the program, the
interpreter traverses the syntax tree one branch and leaf at a time, evaluating
each node as it goes.

This implementation style is common for student projects and little languages,
but is not widely used for <span name="ruby">general-purpose</span> languages
since it tends to be slow. Some people use "interpreter" to mean only these
kinds of implementations, but others define that word more generally, so I'll
use the inarguably explicit **tree-walk interpreter** to refer to these. Our
first interpreter rolls this way.

<aside name="ruby">

A notable exception is early versions of Ruby, which were tree walkers. At 1.9,
the canonical implementation of Ruby switched from the original MRI (Matz's Ruby
Interpreter) to Koichi Sasada's YARV (Yet Another Ruby VM). YARV is a
bytecode virtual machine.

</aside>

### Transpilers

<span name="gary">Writing</span> a complete back end for a language can be a lot
of work. If you have some existing generic IR to target, you could bolt your
front end onto that. Otherwise, it seems like you're stuck. But what if you
treated some other *source language* as if it were an intermediate
representation?

You write a front end for your language. Then, in the back end, instead of doing
all the work to *lower* the semantics to some primitive target language, you
produce a string of valid source code for some other language that's about as
high level as yours. Then, you use the existing compilation tools for *that*
language as your escape route off the mountain and down to something you can
execute.

They used to call this a **source-to-source compiler** or a **transcompiler**.
After the rise of languages that compile to JavaScript in order to run in the
browser, they've affected the hipster sobriquet **transpiler**.

<aside name="gary">

The first transcompiler, XLT86, translated 8080 assembly into 8086 assembly.
That might seem straightforward, but keep in mind the 8080 was an 8-bit chip and
the 8086 a 16-bit chip that could use each register as a pair of 8-bit ones.
XLT86 did data flow analysis to track register usage in the source program and
then efficiently map it to the register set of the 8086.

It was written by Gary Kildall, a tragic hero of computer science if there
ever was one. One of the first people to recognize the promise of
microcomputers, he created PL/M and CP/M, the first high-level language and OS
for them.

He was a sea captain, business owner, licensed pilot, and motorcyclist. A TV
host with the Kris Kristofferson-esque look sported by dashing bearded dudes in
the '80s. He took on Bill Gates and, like many, lost, before meeting his end in
a biker bar under mysterious circumstances. He died too young, but sure as hell
lived before he did.

</aside>

While the first transcompiler translated one assembly language to another,
today, most transpilers work on higher-level languages. After the viral spread
of UNIX to machines various and sundry, there began a long tradition of
compilers that produced C as their output language. C compilers were available
everywhere UNIX was and produced efficient code, so targeting C was a good way
to get your language running on a lot of architectures.

Web browsers are the "machines" of today, and their "machine code" is
JavaScript, so these days it seems [almost every language out there][js] has a
compiler that targets JS since that's the <span name="js">main</span> way to get
your code running in a browser.

[js]: https://github.com/jashkenas/coffeescript/wiki/list-of-languages-that-compile-to-js

<aside name="js">

JS used to be the *only* way to execute code in a browser. Thanks to
[WebAssembly][], compilers now have a second, lower-level language they can
target that runs on the web.

[webassembly]: https://github.com/webassembly/

</aside>

The front end -- scanner and parser -- of a transpiler looks like other
compilers. Then, if the source language is only a simple syntactic skin over the
target language, it may skip analysis entirely and go straight to outputting the
analogous syntax in the destination language.

If the two languages are more semantically different, you'll see more of the
typical phases of a full compiler including analysis and possibly even
optimization. Then, when it comes to code generation, instead of outputting some
binary language like machine code, you produce a string of grammatically correct
source (well, destination) code in the target language.

Either way, you then run that resulting code through the output language's
existing compilation pipeline, and you're good to go.

### Just-in-time compilation

This last one is less a shortcut and more a dangerous alpine scramble best
reserved for experts. The fastest way to execute code is by compiling it to
machine code, but you might not know what architecture your end user's machine
supports. What to do?

You can do the same thing that the HotSpot Java Virtual Machine (JVM),
Microsoft's Common Language Runtime (CLR), and most JavaScript interpreters do.
On the end user's machine, when the program is loaded -- either from source in
the case of JS, or platform-independent bytecode for the JVM and CLR -- you
compile it to native code for the architecture their computer supports.
Naturally enough, this is called **just-in-time compilation**. Most hackers just
say "JIT", pronounced like it rhymes with "fit".

The most sophisticated JITs insert profiling hooks into the generated code to
see which regions are most performance critical and what kind of data is flowing
through them. Then, over time, they will automatically recompile those <span
name="hot">hot spots</span> with more advanced optimizations.

<aside name="hot">

This is, of course, exactly where the HotSpot JVM gets its name.

</aside>

## Compilers and Interpreters

Now that I've stuffed your head with a dictionary's worth of programming
language jargon, we can finally address a question that's plagued coders since
time immemorial: What's the difference between a compiler and an interpreter?

It turns out this is like asking the difference between a fruit and a vegetable.
That seems like a binary either-or choice, but actually "fruit" is a *botanical*
term and "vegetable" is *culinary*. One does not strictly imply the negation of
the other. There are fruits that aren't vegetables (apples) and vegetables that
aren't fruits (carrots), but also edible plants that are both fruits *and*
vegetables, like tomatoes.

<span name="veg"></span>

<img src="image/a-map-of-the-territory/plants.png" alt="A Venn diagram of edible plants" />

<aside name="veg">

Peanuts (which are not even nuts) and cereals like wheat are actually fruit, but
I got this drawing wrong. What can I say, I'm a software engineer, not a
botanist. I should probably erase the little peanut guy, but he's so cute that I
can't bear to.

Now *pine nuts*, on the other hand, are plant-based foods that are neither
fruits nor vegetables. At least as far as I can tell.

</aside>

So, back to languages:

* **Compiling** is an *implementation technique* that involves translating a
  source language to some other -- usually lower-level -- form. When you
  generate bytecode or machine code, you are compiling. When you transpile to
  another high-level language, you are compiling too.

* When we say a language implementation "is a **compiler**", we mean it
  translates source code to some other form but doesn't execute it. The user has
  to take the resulting output and run it themselves.

* Conversely, when we say an implementation "is an **interpreter**", we mean it
  takes in source code and executes it immediately. It runs programs "from
  source".

Like apples and oranges, some implementations are clearly compilers and *not*
interpreters. GCC and Clang take your C code and compile it to machine code. An
end user runs that executable directly and may never even know which tool was
used to compile it. So those are *compilers* for C.

In older versions of Matz's canonical implementation of Ruby, the user ran Ruby
from source. The implementation parsed it and executed it directly by traversing
the syntax tree. No other translation occurred, either internally or in any
user-visible form. So this was definitely an *interpreter* for Ruby.

But what of CPython? When you run your Python program using it, the code is
parsed and converted to an internal bytecode format, which is then executed
inside the VM. From the user's perspective, this is clearly an interpreter --
they run their program from source. But if you look under CPython's scaly skin,
you'll see that there is definitely some compiling going on.

The answer is that it is <span name="go">both</span>. CPython *is* an
interpreter, and it *has* a compiler. In practice, most scripting languages work
this way, as you can see:

<aside name="go">

The [Go tool][go] is even more of a horticultural curiosity. If you run `go
build`, it compiles your Go source code to machine code and stops. If you type
`go run`, it does that, then immediately executes the generated executable.

So `go` *is* a compiler (you can use it as a tool to compile code without
running it), *is* an interpreter (you can invoke it to immediately run a program
from source), and also *has* a compiler (when you use it as an interpreter, it
is still compiling internally).

[go tool]: https://golang.org/cmd/go/

</aside>

<img src="image/a-map-of-the-territory/venn.png" alt="A Venn diagram of compilers and interpreters" />

That overlapping region in the center is where our second interpreter lives too,
since it internally compiles to bytecode. So while this book is nominally about
interpreters, we'll cover some compilation too.

## Our Journey

That's a lot to take in all at once. Don't worry. This isn't the chapter where
you're expected to *understand* all of these pieces and parts. I just want you
to know that they are out there and roughly how they fit together.

This map should serve you well as you explore the territory beyond the guided
path we take in this book. I want to leave you yearning to strike out on your
own and wander all over that mountain.

But, for now, it's time for our own journey to begin. Tighten your bootlaces,
cinch up your pack, and come along. From <span name="here">here</span> on out,
all you need to focus on is the path in front of you.

<aside name="here">

Henceforth, I promise to tone down the whole mountain metaphor thing.

</aside>

<div class="challenges">

## Challenges

1. Pick an open source implementation of a language you like. Download the
   source code and poke around in it. Try to find the code that implements the
   scanner and parser. Are they handwritten, or generated using tools like
   Lex and Yacc? (`.l` or `.y` files usually imply the latter.)

1. Just-in-time compilation tends to be the fastest way to implement dynamically
   typed languages, but not all of them use it. What reasons are there to *not*
   JIT?

1. Most Lisp implementations that compile to C also contain an interpreter that
   lets them execute Lisp code on the fly as well. Why?

</div>


================================================
FILE: book/a-tree-walk-interpreter.md
================================================
With this part, we begin jlox, the first of our two interpreters. Programming
languages are a huge topic with piles of concepts and terminology to cram into
your brain all at once. Programming language theory requires a level of mental
rigor that you probably haven't had to summon since your last calculus final.
(Fortunately there isn't too much theory in this book.)

Implementing an interpreter uses a few architectural tricks and design
patterns uncommon in other kinds of applications, so we'll be getting used to
the engineering side of things too. Given all of that, we'll keep the code we
have to write as simple and plain as possible.

In less than two thousand lines of clean Java code, we'll build a complete
interpreter for Lox that implements every single feature of the language,
exactly as we've specified. The first few chapters work front-to-back through
the phases of the interpreter -- [scanning][], [parsing][], and
[evaluating code][]. After that, we add language features one at a time,
growing a simple calculator into a full-fledged scripting language.

[scanning]: scanning.html
[parsing]: parsing-expressions.html
[evaluating code]: evaluating-expressions.html


================================================
FILE: book/a-virtual-machine.md
================================================
> Magicians protect their secrets not because the secrets are large and
> important, but because they are so small and trivial. The wonderful effects
> created on stage are often the result of a secret so absurd that the magician
> would be embarrassed to admit that that was how it was done.
>
> <cite>Christopher Priest, <em>The Prestige</em></cite>

We've spent a lot of time talking about how to represent a program as a sequence
of bytecode instructions, but it feels like learning biology using only stuffed,
dead animals. We know what instructions are in theory, but we've never seen them
in action, so it's hard to really understand what they *do*. It would be hard to
write a compiler that outputs bytecode when we don't have a good understanding
of how that bytecode behaves.

So, before we go and build the front end of our new interpreter, we will begin
with the back end -- the virtual machine that executes instructions. It breathes
life into the bytecode. Watching the instructions prance around gives us a
clearer picture of how a compiler might translate the user's source code into a
series of them.

## An Instruction Execution Machine

The virtual machine is one part of our interpreter's internal architecture. You
hand it a chunk of code -- literally a Chunk -- and it runs it. The code and
data structures for the VM reside in a new module.

^code vm-h

As usual, we start simple. The VM will gradually acquire a whole pile of state
it needs to keep track of, so we define a struct now to stuff that all in.
Currently, all we store is the chunk that it executes.

Like we do with most of the data structures we create, we also define functions
to create and tear down a VM. Here's the implementation:

^code vm-c

OK, calling those functions "implementations" is a stretch. We don't have any
interesting state to initialize or free yet, so the functions are empty. Trust
me, we'll get there.

The slightly more interesting line here is that declaration of `vm`. This module
is eventually going to have a slew of functions and it would be a chore to pass
around a pointer to the VM to all of them. Instead, we declare a single global
VM object. We need only one anyway, and this keeps the code in the book a little
lighter on the page.

<aside name="one">

The choice to have a static VM instance is a concession for the book, but not
necessarily a sound engineering choice for a real language implementation. If
you're building a VM that's designed to be embedded in other host applications,
it gives the host more flexibility if you *do* explicitly take a VM pointer
and pass it around.

That way, the host app can control when and where memory for the VM is
allocated, run multiple VMs in parallel, etc.

What I'm doing here is a global variable, and [everything bad you've heard about
global variables][global] is still true when programming in the large. But when
keeping things small for a book...

[global]: http://gameprogrammingpatterns.com/singleton.html

</aside>

Before we start pumping fun code into our VM, let's go ahead and wire it up to
the interpreter's main entrypoint.

^code main-init-vm (1 before, 1 after)

We spin up the VM when the interpreter first starts. Then when we're about to
exit, we wind it down.

^code main-free-vm (1 before, 1 after)

One last ceremonial obligation:

^code main-include-vm (1 before, 2 after)

Now when you run clox, it starts up the VM before it creates that hand-authored
chunk from the [last chapter][]. The VM is ready and waiting, so let's teach it
to do something.

[last chapter]: chunks-of-bytecode.html#disassembling-chunks

### Executing instructions

The VM springs into action when we command it to interpret a chunk of bytecode.

^code main-interpret (1 before, 1 after)

This function is the main entrypoint into the VM. It's declared like so:

^code interpret-h (1 before, 2 after)

The VM runs the chunk and then responds with a value from this enum:

^code interpret-result (2 before, 2 after)

We aren't using the result yet, but when we have a compiler that reports static
errors and a VM that detects runtime errors, the interpreter will use this to
know how to set the exit code of the process.

We're inching towards some actual implementation.

^code interpret

First, we store the chunk being executed in the VM. Then we call `run()`, an
internal helper function that actually runs the bytecode instructions. Between
those two parts is an intriguing line. What is this `ip` business?

As the VM works its way through the bytecode, it keeps track of where it is --
the location of the instruction currently being executed. We don't use a <span
name="local">local</span> variable inside `run()` for this because eventually
other functions will need to access it. Instead, we store it as a field in VM.

<aside name="local">

If we were trying to squeeze every ounce of speed out of our bytecode
interpreter, we would store `ip` in a local variable. It gets modified so often
during execution that we want the C compiler to keep it in a register.

</aside>

^code ip (2 before, 1 after)

Its type is a byte pointer. We use an actual real C pointer pointing right into
the middle of the bytecode array instead of something like an integer index
because it's faster to dereference a pointer than look up an element in an array
by index.

The name "IP" is traditional, and -- unlike many traditional names in CS --
actually makes sense: it's an **[instruction pointer][ip]**. Almost every
instruction set in the <span name="ip">world</span>, real and virtual, has a
register or variable like this.

[ip]: https://en.wikipedia.org/wiki/Program_counter

<aside name="ip">

x86, x64, and the CLR call it "IP". 68k, PowerPC, ARM, p-code, and the JVM call
it "PC", for **program counter**.

</aside>

We initialize `ip` by pointing it at the first byte of code in the chunk. We
haven't executed that instruction yet, so `ip` points to the instruction *about
to be executed*. This will be true during the entire time the VM is running: the
IP always points to the next instruction, not the one currently being handled.

The real fun happens in `run`().

^code run

This is the single most <span name="important">important</span> function in all
of clox, by far. When the interpreter executes a user's program, it will spend
something like 90% of its time inside `run()`. It is the beating heart of the
VM.

<aside name="important">

Or, at least, it *will* be in a few chapters when it has enough content to be
useful. Right now, it's not exactly a wonder of software wizardry.

</aside>

Despite that dramatic intro, it's conceptually pretty simple. We have an outer
loop that goes and goes. Each turn through that loop, we read and execute a
single bytecode instruction.

To process an instruction, we first figure out what kind of instruction we're
dealing with. The `READ_BYTE` macro reads the byte currently pointed at by `ip`
and then <span name="next">advances</span> the instruction pointer. The first
byte of any instruction is the opcode. Given a numeric opcode, we need to get to
the right C code that implements that instruction's semantics. This process is
called **decoding** or **dispatching** the instruction.

<aside name="next">

Note that `ip` advances as soon as we read the opcode, before we've actually
started executing the instruction. So, again, `ip` points to the *next*
byte of code to be used.

</aside>

We do that process for every single instruction, every single time one is
executed, so this is the most performance critical part of the entire virtual
machine. Programming language lore is filled with <span
name="dispatch">clever</span> techniques to do bytecode dispatch efficiently,
going all the way back to the early days of computers.

<aside name="dispatch">

If you want to learn some of these techniques, look up "direct threaded code",
"jump table", and "computed goto".

</aside>

Alas, the fastest solutions require either non-standard extensions to C, or
handwritten assembly code. For clox, we'll keep it simple. Just like our
disassembler, we have a single giant `switch` statement with a case for each
opcode. The body of each case implements that opcode's behavior.

So far, we handle only a single instruction, `OP_RETURN`, and the only thing it
does is exit the loop entirely. Eventually, that instruction will be used to
return from the current Lox function, but we don't have functions yet, so we'll
repurpose it temporarily to end the execution.

Let's go ahead and support our one other instruction.

^code op-constant (1 before, 1 after)

We don't have enough machinery in place yet to do anything useful with a
constant. For now, we'll just print it out so we interpreter hackers can see
what's going on inside our VM. That call to `printf()` necessitates an include.

^code vm-include-stdio (1 after)

We also have a new macro to define.

^code read-constant (1 before, 2 after)

`READ_CONSTANT()` reads the next byte from the bytecode, treats the resulting
number as an index, and looks up the corresponding Value in the chunk's constant
table. In later chapters, we'll add a few more instructions with operands that
refer to constants, so we're setting up this helper macro now.

Like the previous `READ_BYTE` macro, `READ_CONSTANT` is only used inside
`run()`. To make that scoping more explicit, the macro definitions themselves
are confined to that function. We <span name="macro">define</span> them at the
beginning and -- because we care -- undefine them at the end.

^code undef-read-constant (1 before, 1 after)

<aside name="macro">

Undefining these macros explicitly might seem needlessly fastidious, but C tends
to punish sloppy users, and the C preprocessor doubly so.

</aside>

### Execution tracing

If you run clox now, it executes the chunk we hand-authored in the last chapter
and spits out `1.2` to your terminal. We can see that it's working, but that's
only because our implementation of `OP_CONSTANT` has temporary code to log the
value. Once that instruction is doing what it's supposed to do and plumbing that
constant along to other operations that want to consume it, the VM will become a
black box. That makes our lives as VM implementers harder.

To help ourselves out, now is a good time to add some diagnostic logging to the
VM like we did with chunks themselves. In fact, we'll even reuse the same code.
We don't want this logging enabled all the time -- it's just for us VM hackers,
not Lox users -- so first we create a flag to hide it behind.

^code define-debug-trace (1 before, 2 after)

When this flag is defined, the VM disassembles and prints each instruction right
before executing it. Where our previous disassembler walked an entire chunk
once, statically, this disassembles instructions dynamically, on the fly.

^code trace-execution (1 before, 1 after)

Since `disassembleInstruction()` takes an integer byte *offset* and we store the
current instruction reference as a direct pointer, we first do a little pointer
math to convert `ip` back to a relative offset from the beginning of the
bytecode. Then we disassemble the instruction that begins at that byte.

As ever, we need to bring in the declaration of the function before we can call
it.

^code vm-include-debug (1 before, 1 after)

I know this code isn't super impressive so far -- it's literally a switch
statement wrapped in a `for` loop but, believe it or not, this is one of the two
major components of our VM. With this, we can imperatively execute instructions.
Its simplicity is a virtue -- the less work it does, the faster it can do it.
Contrast this with all of the complexity and overhead we had in jlox with the
Visitor pattern for walking the AST.

## A Value Stack Manipulator

In addition to imperative side effects, Lox has expressions that produce,
modify, and consume values. Thus, our compiled bytecode needs a way to shuttle
values around between the different instructions that need them. For example:

```lox
print 3 - 2;
```

We obviously need instructions for the constants 3 and 2, the `print` statement,
and the subtraction. But how does the subtraction instruction know that 3 is
the <span name="word">minuend</span> and 2 is the subtrahend? How does the print
instruction know to print the result of that?

<aside name="word">

Yes, I did have to look up "subtrahend" and "minuend" in a dictionary. But
aren't they delightful words? "Minuend" sounds like a kind of Elizabethan dance
and "subtrahend" might be some sort of underground Paleolithic monument.

</aside>

To put a finer point on it, look at this thing right here:

```lox
fun echo(n) {
  print n;
  return n;
}

print echo(echo(1) + echo(2)) + echo(echo(4) + echo(5));
```

I wrapped each subexpression in a call to `echo()` that prints and returns its
argument. That side effect means we can see the exact order of operations.

Don't worry about the VM for a minute. Think about just the semantics of Lox
itself. The operands to an arithmetic operator obviously need to be evaluated
before we can perform the operation itself. (It's pretty hard to add `a + b` if
you don't know what `a` and `b` are.) Also, when we implemented expressions in
jlox, we <span name="undefined">decided</span> that the left operand must be
evaluated before the right.

<aside name="undefined">

We could have left evaluation order unspecified and let each implementation
decide. That leaves the door open for optimizing compilers to reorder arithmetic
expressions for efficiency, even in cases where the operands have visible side
effects. C and Scheme leave evaluation order unspecified. Java specifies
left-to-right evaluation like we do for Lox.

I think nailing down stuff like this is generally better for users. When
expressions are not evaluated in the order users intuit -- possibly in different
orders across different implementations! -- it can be a burning hellscape of
pain to figure out what's going on.

</aside>

Here is the syntax tree for the `print` statement:

<img src="image/a-virtual-machine/ast.png" alt="The AST for the example
statement, with numbers marking the order that the nodes are evaluated." />

Given left-to-right evaluation, and the way the expressions are nested, any
correct Lox implementation *must* print these numbers in this order:

```text
1  // from echo(1)
2  // from echo(2)
3  // from echo(1 + 2)
4  // from echo(4)
5  // from echo(5)
9  // from echo(4 + 5)
12 // from print 3 + 9
```

Our old jlox interpreter accomplishes this by recursively traversing the AST. It
does a postorder traversal. First it recurses down the left operand branch,
then the right operand, then finally it evaluates the node itself.

After evaluating the left operand, jlox needs to store that result somewhere
temporarily while it's busy traversing down through the right operand tree. We
use a local variable in Java for that. Our recursive tree-walk interpreter
creates a unique Java call frame for each node being evaluated, so we could have
as many of these local variables as we needed.

In clox, our `run()` function is not recursive -- the nested expression tree is
flattened out into a linear series of instructions. We don't have the luxury of
using C local variables, so how and where should we store these temporary
values? You can probably <span name="guess">guess</span> already, but I want to
really drill into this because it's an aspect of programming that we take for
granted, but we rarely learn *why* computers are architected this way.

<aside name="guess">

Hint: it's in the name of this section, and it's how Java and C manage recursive
calls to functions.

</aside>

Let's do a weird exercise. We'll walk through the execution of the above program
a step at a time:

<img src="image/a-virtual-machine/bars.png" alt="The series of instructions with
bars showing which numbers need to be preserved across which instructions." />

On the left are the steps of code. On the right are the values we're tracking.
Each bar represents a number. It starts when the value is first produced --
either a constant or the result of an addition. The length of the bar tracks
when a previously produced value needs to be kept around, and it ends when that
value finally gets consumed by an operation.

As you step through, you see values appear and then later get eaten. The
longest-lived ones are the values produced from the left-hand side of an
addition. Those stick around while we work through the right-hand operand
expression.

In the above diagram, I gave each unique number its own visual column. Let's be
a little more parsimonious. Once a number is consumed, we allow its column to be
reused for another later value. In other words, we take all of those gaps
up there and fill them in, pushing in numbers from the right:

<img src="image/a-virtual-machine/bars-stacked.png" alt="Like the previous
diagram, but with number bars pushed to the left, forming a stack." />

There's some interesting stuff going on here. When we shift everything over,
each number still manages to stay in a single column for its entire life. Also,
there are no gaps left. In other words, whenever a number appears earlier than
another, then it will live at least as long as that second one. The first number
to appear is the last to be consumed. Hmm... last-in, first-out... why, that's a
<span name="pancakes">stack</span>!

<aside name="pancakes">

This is also a stack:

<img src="image/a-virtual-machine/pancakes.png" alt="A stack... of pancakes." />

</aside>

In the second diagram, each time we introduce a number, we push it onto the
stack from the right. When numbers are consumed, they are always popped off from
rightmost to left.

Since the temporary values we need to track naturally have stack-like behavior,
our VM will use a stack to manage them. When an instruction "produces" a value,
it pushes it onto the stack. When it needs to consume one or more values, it
gets them by popping them off the stack.

### The VM's Stack

Maybe this doesn't seem like a revelation, but I *love* stack-based VMs. When
you first see a magic trick, it feels like something actually magical. But then
you learn how it works -- usually some mechanical gimmick or misdirection -- and
the sense of wonder evaporates. There are a <span name="wonder">couple</span> of
ideas in computer science where even after I pulled them apart and learned all
the ins and outs, some of the initial sparkle remained. Stack-based VMs are one
of those.

<aside name="wonder">

Heaps -- [the data structure][heap], not [the memory management thing][heap mem]
-- are another. And Vaughan Pratt's top-down operator precedence parsing scheme,
which we'll learn about [in due time][pratt].

[heap]: https://en.wikipedia.org/wiki/Heap_(data_structure)
[heap mem]: https://en.wikipedia.org/wiki/Memory_management#HEAP
[pratt]: compiling-expressions.html

</aside>

As you'll see in this chapter, executing instructions in a stack-based VM is
dead <span name="cheat">simple</span>. In later chapters, you'll also discover
that compiling a source language to a stack-based instruction set is a piece of
cake. And yet, this architecture is fast enough to be used by production
language implementations. It almost feels like cheating at the programming
language game.

<aside name="cheat">

To take a bit of the sheen off: stack-based interpreters aren't a silver bullet.
They're often *adequate*, but modern implementations of the JVM, the CLR, and
JavaScript all use sophisticated [just-in-time compilation][jit] pipelines to
generate *much* faster native code on the fly.

[jit]: https://en.wikipedia.org/wiki/Just-in-time_compilation

</aside>

Alrighty, it's codin' time! Here's the stack:

^code vm-stack (3 before, 1 after)

We implement the stack semantics ourselves on top of a raw C array. The bottom
of the stack -- the first value pushed and the last to be popped -- is at
element zero in the array, and later pushed values follow it. If we push the
letters of "crepe" -- my favorite stackable breakfast item -- onto the stack, in
order, the resulting C array looks like this:

<img src="image/a-virtual-machine/array.png" alt="An array containing the
letters in 'crepe' in order starting at element 0." />

Since the stack grows and shrinks as values are pushed and popped, we need to
track where the top of the stack is in the array. As with `ip`, we use a direct
pointer instead of an integer index since it's faster to dereference the pointer
than calculate the offset from the index each time we need it.

The pointer points at the array element just *past* the element containing the
top value on the stack. That seems a little odd, but almost every implementation
does this. It means we can indicate that the stack is empty by pointing at
element zero in the array.

<img src="image/a-virtual-machine/stack-empty.png" alt="An empty array with
stackTop pointing at the first element." />

If we pointed to the top element, then for an empty stack we'd need to point at
element -1. That's <span name="defined">undefined</span> in C. As we push values
onto the stack...

<aside name="defined">

What about when the stack is *full*, you ask, Clever Reader? The C standard is
one step ahead of you. It *is* allowed and well-specified to have an array
pointer that points just past the end of an array.

</aside>

<img src="image/a-virtual-machine/stack-c.png" alt="An array with 'c' at element
zero." />

...`stackTop` always points just past the last item.

<img src="image/a-virtual-machine/stack-crepe.png" alt="An array with 'c', 'r',
'e', 'p', and 'e' in the first five elements." />

I remember it like this: `stackTop` points to where the next value to be pushed
will go. The maximum number of values we can store on the stack (for now, at
least) is:

^code stack-max (1 before, 2 after)

Giving our VM a fixed stack size means it's possible for some sequence of
instructions to push too many values and run out of stack space -- the classic
"stack overflow". We could grow the stack dynamically as needed, but for now
we'll keep it simple. Since VM uses Value, we need to include its declaration.

^code vm-include-value (1 before, 2 after)

Now that VM has some interesting state, we get to initialize it.

^code call-reset-stack (1 before, 1 after)

That uses this helper function:

^code reset-stack

Since the stack array is declared directly inline in the VM struct, we don't
need to allocate it. We don't even need to clear the unused cells in the
array -- we simply won't access them until after values have been stored in
them. The only initialization we need is to set `stackTop` to point to the
beginning of the array to indicate that the stack is empty.

The stack protocol supports two operations:

^code push-pop (1 before, 2 after)

You can push a new value onto the top of the stack, and you can pop the most
recently pushed value back off. Here's the first function:

^code push

If you're rusty on your C pointer syntax and operations, this is a good warm-up.
The first line stores `value` in the array element at the top of the stack.
Remember, `stackTop` points just *past* the last used element, at the next
available one. This stores the value in that slot. Then we increment the pointer
itself to point to the next unused slot in the array now that the previous slot
is occupied.

Popping is the mirror image.

^code pop

First, we move the stack pointer *back* to get to the most recent used slot in
the array. Then we look up the value at that index and return it. We don't need
to explicitly "remove" it from the array -- moving `stackTop` down is enough to
mark that slot as no longer in use.

### Stack tracing

We have a working stack, but it's hard to *see* that it's working. When we start
implementing more complex instructions and compiling and running larger pieces
of code, we'll end up with a lot of values crammed into that array. It would
make our lives as VM hackers easier if we had some visibility into the stack.

To that end, whenever we're tracing execution, we'll also show the current
contents of the stack before we interpret each instruction.

^code trace-stack (1 before, 1 after)

We loop, printing each value in the array, starting at the first (bottom of the
stack) and ending when we reach the top. This lets us observe the effect of each
instruction on the stack. The output is pretty verbose, but it's useful when
we're surgically extracting a nasty bug from the bowels of the interpreter.

Stack in hand, let's revisit our two instructions. First up:

^code push-constant (2 before, 1 after)

In the last chapter, I was hand-wavey about how the `OP_CONSTANT` instruction
"loads" a constant. Now that we have a stack you know what it means to actually
produce a value: it gets pushed onto the stack.

^code print-return (1 before, 1 after)

Then we make `OP_RETURN` pop the stack and print the top value before exiting.
When we add support for real functions to clox, we'll change this code. But, for
now, it gives us a way to get the VM executing simple instruction sequences and
displaying the result.

## An Arithmetic Calculator

The heart and soul of our VM are in place now. The bytecode loop dispatches and
executes instructions. The stack grows and shrinks as values flow through it.
The two halves work, but it's hard to get a feel for how cleverly they interact
with only the two rudimentary instructions we have so far. So let's teach our
interpreter to do arithmetic.

We'll start with the simplest arithmetic operation, unary negation.

```lox
var a = 1.2;
print -a; // -1.2.
```

The prefix `-` operator takes one operand, the value to negate. It produces a
single result. We aren't fussing with a parser yet, but we can add the
bytecode instruction that the above syntax will compile to.

^code negate-op (1 before, 1 after)

We execute it like so:

^code op-negate (1 before, 1 after)

The instruction needs a value to operate on, which it gets by popping from the
stack. It negates that, then pushes the result back on for later instructions to
use. Doesn't get much easier than that. We can disassemble it too.

^code disassemble-negate (2 before, 1 after)

And we can try it out in our test chunk.

^code main-negate (1 before, 2 after)

After loading the constant, but before returning, we execute the negate
instruction. That replaces the constant on the stack with its negation. Then the
return instruction prints that out:

```text
-1.2
```

Magical!

### Binary operators

OK, unary operators aren't *that* impressive. We still only ever have a single
value on the stack. To really see some depth, we need binary operators. Lox has
four binary <span name="ops">arithmetic</span> operators: addition, subtraction,
multiplication, and division. We'll go ahead and implement them all at the same
time.

<aside name="ops">

Lox has some other binary operators -- comparison and equality -- but those
don't produce numbers as a result, so we aren't ready for them yet.

</aside>

^code binary-ops (1 before, 1 after)

Back in the bytecode loop, they are executed like this:

^code op-binary (1 before, 1 after)

The only difference between these four instructions is which underlying C
operator they ultimately use to combine the two operands. Surrounding that core
arithmetic expression is some boilerplate code to pull values off the stack and
push the result. When we later add dynamic typing, that boilerplate will grow.
To avoid repeating that code four times, I wrapped it up in a macro.

^code binary-op (1 before, 2 after)

I admit this is a fairly <span name="operator">adventurous</span> use of the C
preprocessor. I hesitated to do this, but you'll be glad in later chapters when
we need to add the type checking for each operand and stuff. It would be a chore
to walk you through the same code four times.

<aside name="operator">

Did you even know you can pass an *operator* as an argument to a macro? Now you
do. The preprocessor doesn't care that operators aren't first class in C. As far
as it's concerned, it's all just text tokens.

I know, you can just *feel* the temptation to abuse this, can't you?

</aside>

If you aren't familiar with the trick already, that outer `do while` loop
probably looks really weird. This macro needs to expand to a series of
statements. To be careful macro authors, we want to ensure those statements all
end up in the same scope when the macro is expanded. Imagine if you defined:

```c
#define WAKE_UP() makeCoffee(); drinkCoffee();
```

And then used it like:

```c
if (morning) WAKE_UP();
```

The intent is to execute both statements of the macro body only if `morning` is
true. But it expands to:

```c
if (morning) makeCoffee(); drinkCoffee();;
```

Oops. The `if` attaches only to the *first* statement. You might think you could
fix this using a block.

```c
#define WAKE_UP() { makeCoffee(); drinkCoffee(); }
```

That's better, but you still risk:

```c
if (morning)
  WAKE_UP();
else
  sleepIn();
```

Now you get a compile error on the `else` because of that trailing `;` after the
macro's block. Using a `do while` loop in the macro looks funny, but it gives
you a way to contain multiple statements inside a block that *also* permits a
semicolon at the end.

Where were we? Right, so what the body of that macro does is straightforward. A
binary operator takes two operands, so it pops twice. It performs the operation
on those two values and then pushes the result.

Pay close attention to the *order* of the two pops. Note that we assign the
first popped operand to `b`, not `a`. It looks backwards. When the operands
themselves are calculated, the left is evaluated first, then the right. That
means the left operand gets pushed before the right operand. So the right
operand will be on top of the stack. Thus, the first value we pop is `b`.

For example, if we compile `3 - 1`, the data flow between the instructions looks
like so:

<img src="image/a-virtual-machine/reverse.png" alt="A sequence of instructions
with the stack for each showing how pushing and then popping values reverses
their order." />

As we did with the other macros inside `run()`, we clean up after ourselves at
the end of the function.

^code undef-binary-op (1 before, 1 after)

Last is disassembler support.

^code disassemble-binary (2 before, 1 after)

The arithmetic instruction formats are simple, like `OP_RETURN`. Even though the
arithmetic *operators* take operands -- which are found on the stack -- the
arithmetic *bytecode instructions* do not.

Let's put some of our new instructions through their paces by evaluating a
larger expression:

<img src="image/a-virtual-machine/chunk.png" alt="The expression being
evaluated: -((1.2 + 3.4) / 5.6)" />

Building on our existing example chunk, here's the additional instructions we
need to hand-compile that AST to bytecode.

^code main-chunk (3 before, 3 after)

The addition goes first. The instruction for the left constant, 1.2, is already
there, so we add another for 3.4. Then we add those two using `OP_ADD`, leaving
it on the stack. That covers the left side of the division. Next we push the
5.6, and divide the result of the addition by it. Finally, we negate the result
of that.

Note how the output of the `OP_ADD` implicitly flows into being an operand of
`OP_DIVIDE` without either instruction being directly coupled to each other.
That's the magic of the stack. It lets us freely compose instructions without
them needing any complexity or awareness of the data flow. The stack acts like a
shared workspace that they all read from and write to.

In this tiny example chunk, the stack still only gets two values tall, but when
we start compiling Lox source to bytecode, we'll have chunks that use much more
of the stack. In the meantime, try playing around with this hand-authored chunk
to calculate different nested arithmetic expressions and see how values flow
through the instructions and stack.

You may as well get it out of your system now. This is the last chunk we'll
build by hand. When we next revisit bytecode, we will be writing a compiler to
generate it for us.

<div class="challenges">

## Challenges

1.  What bytecode instruction sequences would you generate for the following
    expressions:

    ```lox
    1 * 2 + 3
    1 + 2 * 3
    3 - 2 - 1
    1 + 2 * 3 - 4 / -5
    ```

    (Remember that Lox does not have a syntax for negative number literals, so
    the `-5` is negating the number 5.)

1.  If we really wanted a minimal instruction set, we could eliminate either
    `OP_NEGATE` or `OP_SUBTRACT`. Show the bytecode instruction sequence you
    would generate for:

    ```lox
    4 - 3 * -2
    ```

    First, without using `OP_NEGATE`. Then, without using `OP_SUBTRACT`.

    Given the above, do you think it makes sense to have both instructions? Why
    or why not? Are there any other redundant instructions you would consider
    including?

1.  Our VM's stack has a fixed size, and we don't check if pushing a value
    overflows it. This means the wrong series of instructions could cause our
    interpreter to crash or go into undefined behavior. Avoid that by
    dynamically growing the stack as needed.

    What are the costs and benefits of doing so?

1.  To interpret `OP_NEGATE`, we pop the operand, negate the value, and then
    push the result. That's a simple implementation, but it increments and
    decrements `stackTop` unnecessarily, since the stack ends up the same height
    in the end. It might be faster to simply negate the value in place on the
    stack and leave `stackTop` alone. Try that and see if you can measure a
    performance difference.

    Are there other instructions where you can do a similar optimization?

</div>

<div class="design-note">

## Design Note: Register-Based Bytecode

For the remainder of this book, we'll meticulously implement an interpreter
around a stack-based bytecode instruction set. There's another family of
bytecode architectures out there -- *register-based*. Despite the name, these
bytecode instructions aren't quite as difficult to work with as the registers in
an actual chip like <span name="x64">x64</span>. With real hardware registers,
you usually have only a handful for the entire program, so you spend a lot of
effort [trying to use them efficiently and shuttling stuff in and out of
them][register allocation].

[register allocation]: https://en.wikipedia.org/wiki/Register_allocation

<aside name="x64">

Register-based bytecode is a little closer to the [*register windows*][window]
supported by SPARC chips.

[window]: https://en.wikipedia.org/wiki/Register_window

</aside>

In a register-based VM, you still have a stack. Temporary values still get
pushed onto it and popped when no longer needed. The main difference is that
instructions can read their inputs from anywhere in the stack and can store
their outputs into specific stack slots.

Take this little Lox script:

```lox
var a = 1;
var b = 2;
var c = a + b;
```

In our stack-based VM, the last statement will get compiled to something like:

```lox
load <a>  // Read local variable a and push onto stack.
load <b>  // Read local variable b and push onto stack.
add       // Pop two values, add, push result.
store <c> // Pop value and store in local variable c.
```

(Don't worry if you don't fully understand the load and store instructions yet.
We'll go over them in much greater detail [when we implement
variables][variables].) We have four separate instructions. That means four
times through the bytecode interpret loop, four instructions to decode and
dispatch. It's at least seven bytes of code -- four for the opcodes and another
three for the operands identifying which locals to load and store. Three pushes
and three pops. A lot of work!

[variables]: global-variables.html

In a register-based instruction set, instructions can read from and store
directly into local variables. The bytecode for the last statement above looks
like:

```lox
add <a> <b> <c> // Read values from a and b, add, store in c.
```

The add instruction is bigger -- it has three instruction operands that define
where in the stack it reads its inputs from and writes the result to. But since
local variables live on the stack, it can read directly from `a` and `b` and
then store the result right into `c`.

There's only a single instruction to decode and dispatch, and the whole thing
fits in four bytes. Decoding is more complex because of the additional operands,
but it's still a net win. There's no pushing and popping or other stack
manipulation.

The main implementation of Lua used to be stack-based. For <span name="lua">Lua
5.0</span>, the implementers switched to a register instruction set and noted a
speed improvement. The amount of improvement, naturally, depends heavily on the
details of the language semantics, specific instruction set, and compiler
sophistication, but that should get your attention.

<aside name="lua">

The Lua dev team -- Roberto Ierusalimschy, Waldemar Celes, and Luiz Henrique de
Figueiredo -- wrote a *fantastic* paper on this, one of my all time favorite
computer science papers, "[The Implementation of Lua 5.0][lua]" (PDF).

[lua]: https://www.lua.org/doc/jucs05.pdf

</aside>

That raises the obvious question of why I'm going to spend the rest of the book
doing a stack-based bytecode. Register VMs are neat, but they are quite a bit
harder to write a compiler for. For what is likely to be your very first
compiler, I wanted to stick with an instruction set that's easy to generate and
easy to execute. Stack-based bytecode is marvelously simple.

It's also *much* better known in the literature and the community. Even though
you may eventually move to something more advanced, it's a good common ground to
share with the rest of your language hacker peers.

</div>


================================================
FILE: book/acknowledgements.md
================================================
When the first copy of "[Game Programming Patterns][gpp]" sold, I guess I had
the right to call myself an author. But it took time to feel comfortable with
that label. Thank you to everyone who bought copies of my first book, and to the
publishers and translators who brought it to other languages. You gave me the
confidence to believe I could tackle a project of this scope. Well, that, and
massively underestimating what I was getting myself into, but that's on me.

[gpp]: https://gameprogrammingpatterns.com/

A fear particular to technical writing is *getting stuff wrong*. Tests and
static analysis only get you so far. Once the code and prose is in ink on paper,
there's no fixing it. I am deeply grateful to the many people who filed issues
and pull requests on the [open source repo][repo] for the book. Special thanks
go to cm1776, who filed 145 tactfully worded issues pointing out hundreds of
code errors, typos, and unclear sentences. The book is more accurate and
readable because of you all.

[repo]: https://github.com/munificent/craftinginterpreters

I'm grateful to my copy editor Kari Somerton who braved a heap of computer
science jargon and an unfamilar workflow in order to fix my many grammar errors
and stylistic inconsistencies.

When the pandemic turned everyone's life upside down, a number of people reached
out to tell me that my book provided a helpful distraction. This book that I
spent six years writing forms a chapter in my own life's story and I'm grateful
to the readers who contacted me and made that chapter more meaningful.

Finally, the deepest thanks go to my wife Megan and my daughters Lily and
Gretchen. You patiently endured the time I had to sink into the book, and my
stress while writing it. There's no one I'd rather be stuck at home with.


================================================
FILE: book/appendix-i.md
================================================
Here is a complete grammar for Lox. The chapters that introduce each part of the
language include the grammar rules there, but this collects them all into one
place.

## Syntax Grammar

The syntactic grammar is used to parse the linear sequence of tokens into the
nested syntax tree structure. It starts with the first rule that matches an
entire Lox program (or a single REPL entry).

```ebnf
program        → declaration* EOF ;
```

### Declarations

A program is a series of declarations, which are the statements that bind new
identifiers or any of the other statement types.

```ebnf
declaration    → classDecl
               | funDecl
               | varDecl
               | statement ;

classDecl      → "class" IDENTIFIER ( "<" IDENTIFIER )?
                 "{" function* "}" ;
funDecl        → "fun" function ;
varDecl        → "var" IDENTIFIER ( "=" expression )? ";" ;
```

### Statements

The remaining statement rules produce side effects, but do not introduce
bindings.

```ebnf
statement      → exprStmt
               | forStmt
               | ifStmt
               | printStmt
               | returnStmt
               | whileStmt
               | block ;

exprStmt       → expression ";" ;
forStmt        → "for" "(" ( varDecl | exprStmt | ";" )
                           expression? ";"
                           expression? ")" statement ;
ifStmt         → "if" "(" expression ")" statement
                 ( "else" statement )? ;
printStmt      → "print" expression ";" ;
returnStmt     → "return" expression? ";" ;
whileStmt      → "while" "(" expression ")" statement ;
block          → "{" declaration* "}" ;
```

Note that `block` is a statement rule, but is also used as a nonterminal in a
couple of other rules for things like function bodies.

### Expressions

Expressions produce values. Lox has a number of unary and binary operators with
different levels of precedence. Some grammars for languages do not directly
encode the precedence relationships and specify that elsewhere. Here, we use a
separate rule for each precedence level to make it explicit.

```ebnf
expression     → assignment ;

assignment     → ( call "." )? IDENTIFIER "=" assignment
               | logic_or ;

logic_or       → logic_and ( "or" logic_and )* ;
logic_and      → equality ( "and" equality )* ;
equality       → comparison ( ( "!=" | "==" ) comparison )* ;
comparison     → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
term           → factor ( ( "-" | "+" ) factor )* ;
factor         → unary ( ( "/" | "*" ) unary )* ;

unary          → ( "!" | "-" ) unary | call ;
call           → primary ( "(" arguments? ")" | "." IDENTIFIER )* ;
primary        → "true" | "false" | "nil" | "this"
               | NUMBER | STRING | IDENTIFIER | "(" expression ")"
               | "super" "." IDENTIFIER ;
```

### Utility rules

In order to keep the above rules a little cleaner, some of the grammar is
split out into a few reused helper rules.

```ebnf
function       → IDENTIFIER "(" parameters? ")" block ;
parameters     → IDENTIFIER ( "," IDENTIFIER )* ;
arguments      → expression ( "," expression )* ;
```

## Lexical Grammar

The lexical grammar is used by the scanner to group characters into tokens.
Where the syntax is [context free][], the lexical grammar is [regular][] -- note
that there are no recursive rules.

[context free]: https://en.wikipedia.org/wiki/Context-free_grammar
[regular]: https://en.wikipedia.org/wiki/Regular_grammar

```ebnf
NUMBER         → DIGIT+ ( "." DIGIT+ )? ;
STRING         → "\"" <any char except "\"">* "\"" ;
IDENTIFIER     → ALPHA ( ALPHA | DIGIT )* ;
ALPHA          → "a" ... "z" | "A" ... "Z" | "_" ;
DIGIT          → "0" ... "9" ;
```


================================================
FILE: book/appendix-ii.md
================================================
For your edification, here is the code produced by [the little script
we built][generator] to automate generating the syntax tree classes for jlox.

[generator]: representing-code.html#metaprogramming-the-trees

## Expressions

Expressions are the first syntax tree nodes we see, introduced in "[Representing
Code](representing-code.html)". The main Expr class defines the visitor
interface used to dispatch against the specific expression types, and contains
the other expression subclasses as nested classes.

^code expr

### Assign expression

Variable assignment is introduced in "[Statements and
State](statements-and-state.html#assignment)".

^code expr-assign

### Binary expression

Binary operators are introduced in "[Representing
Code](representing-code.html)".

^code expr-binary

### Call expression

Function call expressions are introduced in
"[Functions](functions.html#function-calls)".

^code expr-call

### Get expression

Property access, or "get" expressions are introduced in
"[Classes](classes.html#properties-on-instances)".

^code expr-get

### Grouping expression

Using parentheses to group expressions is introduced in "[Representing
Code](representing-code.html)".

^code expr-grouping

### Literal expression

Literal value expressions are introduced in "[Representing
Code](representing-code.html)".

^code expr-literal

### Logical expression

The logical `and` and `or` operators are introduced in "[Control
Flow](control-flow.html#logical-operators)".

^code expr-logical

### Set expression

Property assignment, or "set" expressions are introduced in
"[Classes](classes.html#properties-on-instances)".

^code expr-set

### Super expression

The `super` expression is introduced in
"[Inheritance](inheritance.html#calling-superclass-methods)".

^code expr-super

### This expression

The `this` expression is introduced in "[Classes](classes.html#this)".

^code expr-this

### Unary expression

Unary operators are introduced in "[Representing Code](representing-code.html)".

^code expr-unary

### Variable expression

Variable access expressions are introduced in "[Statements and
State](statements-and-state.html#variable-syntax)".

^code expr-variable

## Statements

Statements form a second hierarchy of syntax tree nodes independent of
expressions. We add the first couple of them in "[Statements and
State](statements-and-state.html)".

^code stmt

### Block statement

The curly-braced block statement that defines a local scope is introduced in
"[Statements and State](statements-and-state.html#block-syntax-and-semantics)".

^code stmt-block

### Class statement

Class declarations are introduced in, unsurprisingly,
"[Classes](classes.html#class-declarations)".

^code stmt-class

### Expression statement

The expression statement is introduced in "[Statements and
State](statements-and-state.html#statements)".

^code stmt-expression

### Function statement

Function declarations are introduced in, you guessed it,
"[Functions](functions.html#function-declarations)".

^code stmt-function

### If statement

The `if` statement is introduced in "[Control
Flow](control-flow.html#conditional-execution)".

^code stmt-if

### Print statement

The `print` statement is introduced in "[Statements and
State](statements-and-state.html#statements)".

^code stmt-print

### Return statement

You need a function to return from, so `return` statements are introduced in
"[Functions](functions.html#return-statements)".

^code stmt-return

### Variable statement

Variable declarations are introduced in "[Statements and
State](statements-and-state.html#variable-syntax)".

^code stmt-var

### While statement

The `while` statement is introduced in "[Control
Flow](control-flow.html#while-loops)".

^code stmt-while


================================================
FILE: book/backmatter.md
================================================
You've reached the end of the book! There are two pieces of supplementary
material you may find helpful:

* **[Appendix I][]** contains a complete grammar for Lox, all in one place.

* **[Appendix II][]** shows the Java classes produced by [the AST generator][]
  we use for jlox.

[appendix i]: appendix-i.html
[appendix ii]: appendix-ii.html
[the ast generator]: representing-code.html#metaprogramming-the-trees


================================================
FILE: book/calls-and-functions.md
================================================
> Any problem in computer science can be solved with another level of
> indirection. Except for the problem of too many layers of indirection.
>
> <cite>David Wheeler</cite>

This chapter is a beast. I try to break features into bite-sized pieces, but
sometimes you gotta swallow the whole <span name="eat">meal</span>. Our next
task is functions. We could start with only function declarations, but that's
not very useful when you can't call them. We could do calls, but there's nothing
to call. And all of the runtime support needed in the VM to support both of
those isn't very rewarding if it isn't hooked up to anything you can see. So
we're going to do it all. It's a lot, but we'll feel good when we're done.

<aside name="eat">

Eating -- consumption -- is a weird metaphor for a creative act. But most of the
biological processes that produce "output" are a little less, ahem, decorous.

</aside>

## Function Objects

The most interesting structural change in the VM is around the stack. We already
*have* a stack for local variables and temporaries, so we're partway there. But
we have no notion of a *call* stack. Before we can make much progress, we'll
have to fix that. But first, let's write some code. I always feel better once I
start moving. We can't do much without having some kind of representation for
functions, so we'll start there. From the VM's perspective, what is a function?

A function has a body that can be executed, so that means some bytecode. We
could compile the entire program and all of its function declarations into one
big monolithic Chunk. Each function would have a pointer to the first
instruction of its code inside the Chunk.

This is roughly how compilation to native code works where you end up with one
solid blob of machine code. But for our bytecode VM, we can do something a
little higher level. I think a cleaner model is to give each function its own
Chunk. We'll want some other metadata too, so let's go ahead and stuff it all in
a struct now.

^code obj-function (2 before, 2 after)

Functions are first class in Lox, so they need to be actual Lox objects. Thus
ObjFunction has the same Obj header that all object types share. The `arity`
field stores the number of parameters the function expects. Then, in addition to
the chunk, we store the function's <span name="name">name</span>. That will be
handy for reporting readable runtime errors.

<aside name="name">

Humans don't seem to find numeric bytecode offsets particularly illuminating in
crash dumps.

</aside>

This is the first time the "object" module has needed to reference Chunk, so we
get an include.

^code object-include-chunk (1 before, 1 after)

Like we did with strings, we define some accessories to make Lox functions
easier to work with in C. Sort of a poor man's object orientation. First, we'll
declare a C function to create a new Lox function.

^code new-function-h (3 before, 1 after)

The implementation is over here:

^code new-function

We use our friend `ALLOCATE_OBJ()` to allocate memory and initialize the
object's header so that the VM knows what type of object it is. Instead of
passing in arguments to initialize the function like we did with ObjString, we
set the function up in a sort of blank state -- zero arity, no name, and no
code. That will get filled in later after the function is created.

Since we have a new kind of object, we need a new object type in the enum.

^code obj-type-function (1 before, 2 after)

When we're done with a function object, we must return the bits it borrowed back
to the operating system.

^code free-function (1 before, 1 after)

This switch case is <span name="free-name">responsible</span> for freeing the
ObjFunction itself as well as any other memory it owns. Functions own their
chunk, so we call Chunk's destructor-like function.

<aside name="free-name">

We don't need to explicitly free the function's name because it's an ObjString.
That means we can let the garbage collector manage its lifetime for us. Or, at
least, we'll be able to once we [implement a garbage collector][gc].

[gc]: garbage-collection.html

</aside>

Lox lets you print any object, and functions are first-class objects, so we
need to handle them too.

^code print-function (1 before, 1 after)

This calls out to:

^code print-function-helper

Since a function knows its name, it may as well say it.

Finally, we have a couple of macros for converting values to functions. First,
make sure your value actually *is* a function.

^code is-function (2 before, 1 after)

Assuming that evaluates to true, you can then safely cast the Value to an
ObjFunction pointer using this:

^code as-function (2 before, 1 after)

With that, our object model knows how to represent functions. I'm feeling warmed
up now. You ready for something a little harder?

## Compiling to Function Objects

Right now, our compiler assumes it is always compiling to one single chunk. With
each function's code living in separate chunks, that gets more complex. When the
compiler reaches a function declaration, it needs to emit code into the
function's chunk when compiling its body. At the end of the function body, the
compiler needs to return to the previous chunk it was working with.

That's fine for code inside function bodies, but what about code that isn't? The
"top level" of a Lox program is also imperative code and we need a chunk to
compile that into. We can simplify the compiler and VM by placing that top-level
code inside an automatically defined function too. That way, the compiler is
always within some kind of function body, and the VM always runs code by
invoking a function. It's as if the entire program is <span
name="wrap">wrapped</span> inside an implicit `main()` function.

<aside name="wrap">

One semantic corner where that analogy breaks down is global variables. They
have special scoping rules different from local variables, so in that way, the
top level of a script isn't like a function body.

</aside>

Before we get to user-defined functions, then, let's do the reorganization to
support that implicit top-level function. It starts with the Compiler struct.
Instead of pointing directly to a Chunk that the compiler writes to, it instead
has a reference to the function object being built.

^code function-fields (1 before, 1 after)

We also have a little FunctionType enum. This lets the compiler tell when it's
compiling top-level code versus the body of a function. Most of the compiler
doesn't care about this -- that's why it's a useful abstraction -- but in one or
two places the distinction is meaningful. We'll get to one later.

^code function-type-enum

Every place in the compiler that was writing to the Chunk now needs to go
through that `function` pointer. Fortunately, many <span
name="current">chapters</span> ago, we encapsulated access to the chunk in the
`currentChunk()` function. We only need to fix that and the rest of the compiler
is happy.

<aside name="current">

It's almost like I had a crystal ball that could see into the future and knew
we'd need to change the code later. But, really, it's because I wrote all the
code for the book before any of the text.

</aside>

^code current-chunk (1 before, 2 after)

The current chunk is always the chunk owned by the function we're in the middle
of compiling. Next, we need to actually create that function. Previously, the VM
passed a Chunk to the compiler which filled it with code. Instead, the compiler
will create and return a function that contains the compiled top-level code --
which is all we support right now -- of the user's program.

### Creating functions at compile time

We start threading this through in `compile()`, which is the main entry point
into the compiler.

^code call-init-compiler (1 before, 2 after)

There are a bunch of changes in how the compiler is initialized. First, we
initialize the new Compiler fields.

^code init-compiler (1 after)

Then we allocate a new function object to compile into.

^code init-function (1 before, 1 after)

<span name="null"></span>

<aside name="null">

I know, it looks dumb to null the `function` field only to immediately assign it
a value a few lines later. More garbage collection-related paranoia.

</aside>

Creating an ObjFunction in the compiler might seem a little strange. A function
object is the *runtime* representation of a function, but here we are creating
it at compile time. The way to think of it is that a function is similar to a
string or number literal. It forms a bridge between the compile time and runtime
worlds. When we get to function *declarations*, those really *are* literals
-- they are a notation that produces values of a built-in type. So the <span
name="closure">compiler</span> creates function objects during compilation.
Then, at runtime, they are simply invoked.

<aside name="closure">

We can create functions at compile time because they contain only data available
at compile time. The function's code, name, and arity are all fixed. When we add
closures in the [next chapter][closures], which capture variables at runtime,
the story gets more complex.

[closures]: closures.html

</aside>

Here is another strange piece of code:

^code init-function-slot (1 before, 1 after)

Remember that the compiler's `locals` array keeps track of which stack slots are
associated with which local variables or temporaries. From now on, the compiler
implicitly claims stack slot zero for the VM's own internal use. We give it an
empty name so that the user can't write an identifier that refers to it. I'll
explain what this is about when it becomes useful.

That's the initialization side. We also need a couple of changes on the other
end when we finish compiling some code.

^code end-compiler (1 after)

Previously, when `interpret()` called into the compiler, it passed in a Chunk to
be written to. Now that the compiler creates the function object itself, we
return that function. We grab it from the current compiler here:

^code end-function (1 before, 1 after)

And then return it to `compile()` like so:

^code return-function (1 before, 1 after)

Now is a good time to make another tweak in this function. Earlier, we added
some diagnostic code to have the VM dump the disassembled bytecode so we could
debug the compiler. We should fix that to keep working now that the generated
chunk is wrapped in a function.

^code disassemble-end (2 before, 2 after)

Notice the check in here to see if the function's name is `NULL`? User-defined
functions have names, but the implicit function we create for the top-level code
does not, and we need to handle that gracefully even in our own diagnostic code.
Speaking of which:

^code print-script (1 before, 1 after)

There's no way for a *user* to get a reference to the top-level function and try
to print it, but our `DEBUG_TRACE_EXECUTION` <span
name="debug">diagnostic</span> code that prints the entire stack can and does.

<aside name="debug">

It is no fun if the diagnostic code we use to find bugs itself causes the VM to
segfault!

</aside>

Bumping up a level to `compile()`, we adjust its signature.

^code compile-h (2 before, 2 after)

Instead of taking a chunk, now it returns a function. Over in the
implementation:

^code compile-signature (1 after)

Finally we get to some actual code. We change the very end of the function to
this:

^code call-end-compiler (4 before, 1 after)

We get the function object from the compiler. If there were no compile errors,
we return it. Otherwise, we signal an error by returning `NULL`. This way, the
VM doesn't try to execute a function that may contain invalid bytecode.

Eventually, we will update `interpret()` to handle the new declaration of
`compile()`, but first we have some other changes to make.

## Call Frames

It's time for a big conceptual leap. Before we can implement function
declarations and calls, we need to get the VM ready to handle them. There are
two main problems we need to worry about:

### Allocating local variables

The compiler allocates stack slots for local variables. How should that work
when the set of local variables in a program is distributed across multiple
functions?

One option would be to keep them totally separate. Each function would get its
own dedicated set of slots in the VM stack that it would own <span
name="static">forever</span>, even when the function isn't being called. Each
local variable in the entire program would have a bit of memory in the VM that
it keeps to itself.

<aside name="static">

It's basically what you'd get if you declared every local variable in a C
program using `static`.

</aside>

Believe it or not, early programming language implementations worked this way.
The first Fortran compilers statically allocated memory for each variable. The
obvious problem is that it's really inefficient. Most functions are not in the
middle of being called at any point in time, so sitting on unused memory for
them is wasteful.

The more fundamental problem, though, is recursion. With recursion, you can be
"in" multiple calls to the same function at the same time. Each needs its <span
name="fortran">own</span> memory for its local variables. In jlox, we solved
this by dynamically allocating memory for an environment each time a function
was called or a block entered. In clox, we don't want that kind of performance
cost on every function call.

<aside name="fortran">

Fortran avoided this problem by disallowing recursion entirely. Recursion was
considered an advanced, esoteric feature at the time.

</aside>

Instead, our solution lies somewhere between Fortran's static allocation and
jlox's dynamic approach. The value stack in the VM works on the observation that
local variables and temporaries behave in a last-in first-out fashion.
Fortunately for us, that's still true even when you add function calls into the
mix. Here's an example:

```lox
fun first() {
  var a = 1;
  second();
  var b = 2;
}

fun second() {
  var c = 3;
  var d = 4;
}

first();
```

Step through the program and look at which variables are in memory at each point
in time:

<img src="image/calls-and-functions/calls.png" alt="Tracing through the execution of the previous program, showing the stack of variables at each step." />

As execution flows through the two calls, every local variable obeys the
principle that any variable declared after it will be discarded before the first
variable needs to be. This is true even across calls. We know we'll be done with
`c` and `d` before we are done with `a`. It seems we should be able to allocate
local variables on the VM's value stack.

Ideally, we still determine *where* on the stack each variable will go at
compile time. That keeps the bytecode instructions for working with variables
simple and fast. In the above example, we could <span
name="imagine">imagine</span> doing so in a straightforward way, but that
doesn't always work out. Consider:

<aside name="imagine">

I say "imagine" because the compiler can't actually figure this out. Because
functions are first class in Lox, we can't determine which functions call which
others at compile time.

</aside>

```lox
fun first() {
  var a = 1;
  second();
  var b = 2;
  second();
}

fun second() {
  var c = 3;
  var d = 4;
}

first();
```

In the first call to `second()`, `c` and `d` would go into slots 1 and 2. But in
the second call, we need to have made room for `b`, so `c` and `d` need to be in
slots 2 and 3. Thus the compiler can't pin down an exact slot for each local
variable across function calls. But *within* a given function, the *relative*
locations of each local variable are fixed. Variable `d` is always in the slot
right after `c`. This is the key insight.

When a function is called, we don't know where the top of the stack will be
because it can be called from different contexts. But, wherever that top happens
to be, we do know where all of the function's local variables will be relative
to that starting point. So, like many problems, we solve our allocation problem
with a level of indirection.

At the beginning of each function call, the VM records the location of the first
slot where that function's own locals begin. The instructions for working with
local variables access them by a slot index relative to that, instead of
relative to the bottom of the stack like they do today. At compile time, we
calculate those relative slots. At runtime, we convert that relative slot to an
absolute stack index by adding the function call's starting slot.

It's as if the function gets a "window" or "frame" within the larger stack where
it can store its locals. The position of the **call frame** is determined at
runtime, but within and relative to that region, we know where to find things.

<img src="image/calls-and-functions/window.png" alt="The stack at the two points when second() is called, with a window hovering over each one showing the pair of stack slots used by the function." />

The historical name for this recorded location where the function's locals start
is a **frame pointer** because it points to the beginning of the function's call
frame. Sometimes you hear **base pointer**, because it points to the base stack
slot on top of which all of the function's variables live.

That's the first piece of data we need to track. Every time we call a function,
the VM determines the first stack slot where that function's variables begin.

### Return addresses

Right now, the VM works its way through the instruction stream by incrementing
the `ip` field. The only interesting behavior is around control flow
instructions which offset the `ip` by larger amounts. *Calling* a function is
pretty straightforward -- simply set `ip` to point to the first instruction in
that function's chunk. But what about when the function is done?

The VM needs to <span name="return">return</span> back to the chunk where the
function was called from and resume execution at the instruction immediately
after the call. Thus, for each function call, we need to track where we jump
back to when the call completes. This is called a **return address** because
it's the address of the instruction that the VM returns to after the call.

Again, thanks to recursion, there may be multiple return addresses for a single
function, so this is a property of each *invocation* and not the function
itself.

<aside name="return">

The authors of early Fortran compilers had a clever trick for implementing
return addresses. Since they *didn't* support recursion, any given function
needed only a single return address at any point in time. So when a function was
called at runtime, the program would *modify its own code* to change a jump
instruction at the end of the function to jump back to its caller. Sometimes the
line between genius and madness is hair thin.

</aside>

### The call stack

So for each live function invocation -- each call that hasn't returned yet -- we
need to track where on the stack that function's locals begin, and where the
caller should resume. We'll put this, along with some other stuff, in a new
struct.

^code call-frame (1 before, 2 after)

A CallFrame represents a single ongoing function call. The `slots` field points
into the VM's value stack at the first slot that this function can use. I gave
it a plural name because -- thanks to C's weird "pointers are sort of arrays"
thing -- we'll treat it like an array.

The implementation of return addresses is a little different from what I
described above. Instead of storing the return address in the callee's frame,
the caller stores its own `ip`. When we return from a function, the VM will jump
to the `ip` of the caller's CallFrame and resume from there.

I also stuffed a pointer to the function being called in here. We'll use that to
look up constants and for a few other things.

Each time a function is called, we create one of these structs. We could <span
name="heap">dynamically</span> allocate them on the heap, but that's slow.
Function calls are a core operation, so they need to be as fast as possible.
Fortunately, we can make the same observation we made for variables: function
calls have stack semantics. If `first()` calls `second()`, the call to
`second()` will complete before `first()` does.

<aside name="heap">

Many Lisp implementations dynamically allocate stack frames because it
simplifies implementing [continuations][cont]. If your language supports
continuations, then function calls do *not* always have stack semantics.

[cont]: https://en.wikipedia.org/wiki/Continuation

</aside>

So over in the VM, we create an array of these CallFrame structs up front and
treat it as a stack, like we do with the value array.

^code frame-array (1 before, 1 after)

This array replaces the `chunk` and `ip` fields we used to have directly in the
VM. Now each CallFrame has its own `ip` and its own pointer to the ObjFunction
that it's executing. From there, we can get to the function's chunk.

The new `frameCount` field in the VM stores the current height of the CallFrame
stack -- the number of ongoing function calls. To keep clox simple, the array's
capacity is fixed. This means, as in many language implementations, there is a
maximum call depth we can handle. For clox, it's defined here:

^code frame-max (2 before, 2 after)

We also redefine the value stack's <span name="plenty">size</span> in terms of
that to make sure we have plenty of stack slots even in very deep call trees.
When the VM starts up, the CallFrame stack is empty.

<aside name="plenty">

It is still possible to overflow the stack if enough function calls use enough
temporaries in addition to locals. A robust implementation would guard against
this, but I'm trying to keep things simple.

</aside>

^code reset-frame-count (1 before, 1 after)

The "vm.h" header needs access to ObjFunction, so we add an include.

^code vm-include-object (2 before, 1 after)

Now we're ready to move over to the VM's implementation file. We've got some
grunt work ahead of us. We've moved `ip` out of the VM struct and into
CallFrame. We need to fix every line of code in the VM that touches `ip` to
handle that. Also, the instructions that access local variables by stack slot
need to be updated to do so relative to the current CallFrame's `slots` field.

We'll start at the top and plow through it.

^code run (1 before, 1 after)

First, we store the current topmost CallFrame in a <span
name="local">local</span> variable inside the main bytecode execution function.
Then we replace the bytecode access macros with versions that access `ip`
through that variable.

<aside name="local">

We could access the current frame by going through the CallFrame array every
time, but that's verbose. More importantly, storing the frame in a local
variable encourages the C compiler to keep that pointer in a register. That
speeds up access to the frame's `ip`. There's no *guarantee* that the compiler
will do this, but there's a good chance it will.

</aside>

Now onto each instruction that needs a little tender loving care.

^code push-local (2 before, 1 after)

Previously, `OP_GET_LOCAL` read the given local slot directly from the VM's
stack array, which meant it indexed the slot starting from the bottom of the
stack. Now, it accesses the current frame's `slots` array, which means it
accesses the given numbered slot relative to the beginning of that frame.

Setting a local variable works the same way.

^code set-local (2 before, 1 after)

The jump instructions used to modify the VM's `ip` field. Now, they do the same
for the current frame's `ip`.

^code jump (2 before, 1 after)

Same with the conditional jump:

^code jump-if-false (2 before, 1 after)

And our backward-jumping loop instruction:

^code loop (2 before, 1 after)

We have some diagnostic code that prints each instruction as it executes to help
us debug our VM. That needs to work with the new structure too.

^code trace-execution (1 before, 1 after)

Instead of passing in the VM's `chunk` and `ip` fields, now we read from the
current CallFrame.

You know, that wasn't too bad, actually. Most instructions just use the macros
so didn't need to be touched. Next, we jump up a level to the code that calls
`run()`.

^code interpret-stub (1 before, 2 after)

We finally get to wire up our earlier compiler changes to the back-end changes
we just made. First, we pass the source code to the compiler. It returns us a
new ObjFunction containing the compiled top-level code. If we get `NULL` back,
it means there was some compile-time error which the compiler has already
reported. In that case, we bail out since we can't run anything.

Otherwise, we store the function on the stack and prepare an initial CallFrame
to execute its code. Now you can see why the compiler sets aside stack slot zero
-- that stores the function being called. In the new CallFrame, we point to the
function, initialize its `ip` to point to the beginning of the function's
bytecode, and set up its stack window to start at the very bottom of the VM's
value stack.

This gets the interpreter ready to start executing code. After finishing, the VM
used to free the hardcoded chunk. Now that the ObjFunction owns that code, we
don't need to do that anymore, so the end of `interpret()` is simply this:

^code end-interpret (2 before, 1 after)

The last piece of code referring to the old VM fields is `runtimeError()`. We'll
revisit that later in the chapter, but for now let's change it to this:

^code runtime-error-temp (2 before, 1 after)

Instead of reading the chunk and `ip` directly from the VM, it pulls those from
the topmost CallFrame on the stack. That should get the function working again
and behaving as it did before.

Assuming we did all of that correctly, we got clox back to a runnable
state. Fire it up and it does... exactly what it did before. We haven't added
any new features yet, so this is kind of a let down. But all of the
infrastructure is there and ready for us now. Let's take advantage of it.

## Function Declarations

Before we can do call expressions, we need something to call, so we'll do
function declarations first. The <span name="fun">fun</span> starts with a
keyword.

<aside name="fun">

Yes, I am going to make a dumb joke about the `fun` keyword every time it
comes up.

</aside>

^code match-fun (1 before, 1 after)

That passes control to here:

^code fun-declaration

Functions are first-class values, and a function declaration simply creates and
stores one in a newly declared variable. So we parse the name just like any
other variable declaration. A function declaration at the top level will bind
the function to a global variable. Inside a block or other function, a function
declaration creates a local variable.

In an earlier chapter, I explained how variables [get defined in two
stages][stage]. This ensures you can't access a variable's value inside the
variable's own initializer. That would be bad because the variable doesn't
*have* a value yet.

[stage]: local-variables.html#another-scope-edge-case

Functions don't suffer from this problem. It's safe for a function to refer to
its own name inside its body. You can't *call* the function and execute the body
until after it's fully defined, so you'll never see the variable in an
uninitialized state. Practically speaking, it's useful to allow this in order to
support recursive local functions.

To make that work, we mark the function declaration's variable "initialized" as
soon as we compile the name, before we compile the body. That way the name can
be referenced inside the body without generating an error.

We do need one check, though.

^code check-depth (1 before, 1 after)

Before, we called `markInitialized()` only when we already knew we were in a
local scope. Now, a top-level function declaration will also call this function.
When that happens, there is no local variable to mark initialized -- the
function is bound to a global variable.

Next, we compile the function itself -- its parameter list and block body. For
that, we use a separate helper function. That helper generates code that
leaves the resulting function object on top of the stack. After that, we call
`defineVariable()` to store that function back into the variable we declared for
it.

I split out the code to compile the parameters and body because we'll reuse it
later for parsing method declarations inside classes. Let's build it
incrementally, starting with this:

^code compile-function

<aside name="no-end-scope">

This `beginScope()` doesn't have a corresponding `endScope()` call. Because we
end Compiler completely when we reach the end of the function body, there's no
need to close the lingering outermost scope.

</aside>

For now, we won't worry about parameters. We parse an empty pair of parentheses
followed by the body. The body starts with a left curly brace, which we parse
here. Then we call our existing `block()` function, which knows how to compile
the rest of a block including the closing brace.

### A stack of compilers

The interesting parts are the compiler stuff at the top and bottom. The Compiler
struct stores data like which slots are owned by which local variables, how many
blocks of nesting we're currently in, etc. All of that is specific to a single
function. But now the front end needs to handle compiling multiple functions
<span name="nested">nested</span> within each other.

<aside name="nested">

Remember that the compiler treats top-level code as the body of an implicit
function, so as soon as we add *any* function declarations, we're in a world of
nested functions.

</aside>

The trick for managing that is to create a separate Compiler for each function
being compiled. When we start compiling a function declaration, we create a new
Compiler on the C stack and initialize it. `initCompiler()` sets that Compiler
to be the current one. Then, as we compile the body, all of the functions that
emit bytecode write to the chunk owned by the new Compiler's function.

After we reach the end of the function's block body, we call `endCompiler()`.
That yields the newly compiled function object, which we store as a constant in
the *surrounding* function's constant table. But, wait, how do we get back to
the surrounding function? We lost it when `initCompiler()` overwrote the current
compiler pointer.

We fix that by treating the series of nested Compiler structs as a stack. Unlike
the Value and CallFrame stacks in the VM, we won't use an array. Instead, we use
a linked list. Each Compiler points back to the Compiler for the function that
encloses it, all the way back to the root Compiler for the top-level code.

^code enclosing-field (2 before, 1 after)

Inside the Compiler struct, we can't reference the Compiler *typedef* since that
declaration hasn't finished yet. Instead, we give a name to the struct itself
and use that for the field's type. C is weird.

When initializing a new Compiler, we capture the about-to-no-longer-be-current
one in that pointer.

^code store-enclosing (1 before, 1 after)

Then when a Compiler finishes, it pops itself off the stack by restoring the
previous compiler to be the new current one.

^code restore-enclosing (2 before, 1 after)

Note that we don't even need to <span name="compiler">dynamically</span>
allocate the Compiler structs. Each is stored as a local variable in the C stack
-- either in `compile()` or `function()`. The linked list of Compilers threads
through the C stack. The reason we can get an unbounded number of them is
because our compiler uses recursive descent, so `function()` ends up calling
itself recursively when you have nested function declarations.

<aside name="compiler">

Using the native stack for Compiler structs does mean our compiler has a
practical limit on how deeply nested function declarations can be. Go too far
and you could overflow the C stack. If we want the compiler to be more robust
against pathological or even malicious code -- a real concern for tools like
JavaScript VMs -- it would be good to have our compiler artificially limit the
amount of function nesting it permits.

</aside>

### Function parameters

Functions aren't very useful if you can't pass arguments to them, so let's do
parameters next.

^code parameters (1 before, 1 after)

Semantically, a parameter is simply a local variable declared in the outermost
lexical scope of the function body. We get to use the existing compiler support
for declaring named local variables to parse and compile parameters. Unlike
local variables, which have initializers, there's no code here to initialize the
parameter's value. We'll see how they are initialized later when we do argument
passing in function calls.

While we're at it, we note the function's arity by counting how many parameters
we parse. The other piece of metadata we store with a function is its name. When
compiling a function declaration, we call `initCompiler()` right after we parse
the function's name. That means we can grab the name right then from the
previous token.

^code init-function-name (1 before, 2 after)

Note that we're careful to create a copy of the name string. Remember, the
lexeme points directly into the original source code string. That string may get
freed once the code is finished compiling. The function object we create in the
compiler outlives the compiler and persists until runtime. So it needs its own
heap-allocated name string that it can keep around.

Rad. Now we can compile function declarations, like this:

```lox
fun areWeHavingItYet() {
  print "Yes we are!";
}

print areWeHavingItYet;
```

We just can't do anything <span name="useful">useful</span> with them.

<aside name="useful">

We can print them! I guess that's not very useful, though.

</aside>

## Function Calls

By the end of this section, we'll start to see some interesting behavior. The
next step is calling functions. We don't usually think of it this way, but a
function call expression is kind of an infix `(` operator. You have a
high-precedence expression on the left for the thing being called -- usually
just a single identifier. Then the `(` in the middle, followed by the argument
expressions separated by commas, and a final `)` to wrap it up at the end.

That odd grammatical perspective explains how to hook the syntax into our
parsing table.

^code infix-left-paren (1 before, 1 after)

When the parser encounters a left parenthesis following an expression, it
dispatches to a new parser function.

^code compile-call

We've already consumed the `(` token, so next we compile the arguments using a
separate `argumentList()` helper. That function returns the number of arguments
it compiled. Each argument expression generates code that leaves its value on
the stack in preparation for the call. After that, we emit a new `OP_CALL`
instruction to invoke the function, using the argument count as an operand.

We compile the arguments using this friend:

^code argument-list

That code should look familiar from jlox. We chew through arguments as long as
we find commas after each expression. Once we run out, we consume the final
closing parenthesis and we're done.

Well, almost. Back in jlox, we added a compile-time check that you don't pass
more than 255 arguments to a call. At the time, I said that was because clox
would need a similar limit. Now you can see why -- since we stuff the argument
count into the bytecode as a single-byte operand, we can only go up to 255. We
need to verify that in this compiler too.

^code arg-limit (1 before, 1 after)

That's the front end. Let's skip over to the back end, with a quick stop in the
middle to declare the new instruction.

^code op-call (1 before, 1 after)

### Binding arguments to parameters

Before we get to the implementation, we should think about what the stack looks
like at the point of a call and what we need to do from there. When we reach the
call instruction, we have already executed the expression for the function being
called, followed by its arguments. Say our program looks like this:

```lox
fun sum(a, b, c) {
  return a + b + c;
}

print 4 + sum(5, 6, 7);
```

If we pause the VM right on the `OP_CALL` instruction for that call to `sum()`,
the stack looks like this:

<img src="image/calls-and-functions/argument-stack.png" alt="Stack: 4, fn sum, 5, 6, 7." />

Picture this from the perspective of `sum()` itself. When the compiler compiled
`sum()`, it automatically allocated slot zero. Then, after that, it allocated
local slots for the parameters `a`, `b`, and `c`, in order. To perform a call to
`sum()`, we need a CallFrame initialized with the function being called and a
region of stack slots that it can use. Then we need to collect the arguments
passed to the function and get them into the corresponding slots for the
parameters.

When the VM starts executing the body of `sum()`, we want its stack window to
look like this:

<img src="image/calls-and-functions/parameter-window.png" alt="The same stack with the sum() function's call frame window surrounding fn sum, 5, 6, and 7." />

Do you notice how the argument slots that the caller sets up and the parameter
slots the callee needs are both in exactly the right order? How convenient! This
is no coincidence. When I talked about each CallFrame having its own window into
the stack, I never said those windows must be *disjoint*. There's nothing
preventing us from overlapping them, like this:

<img src="image/calls-and-functions/overlapping-windows.png" alt="The same stack with the top-level call frame covering the entire stack and the sum() function's call frame window surrounding fn sum, 5, 6, and 7." />

<span name="lua">The</span> top of the caller's stack contains the function
being called followed by the arguments in order. We know the caller doesn't have
any other slots above those in use because any temporaries needed when
evaluating argument expressions have been discarded by now. The bottom of the
callee's stack overlaps so that the parameter slots exactly line up with where
the argument values already live.

<aside name="lua">

Different bytecode VMs and real CPU architectures have different *calling
conventions*, which is the specific mechanism they use to pass arguments, store
the return address, etc. The mechanism I use here is based on Lua's clean, fast
virtual machine.

</aside>

This means that we don't need to do *any* work to "bind an argument to a
parameter". There's no copying values between slots or across environments. The
arguments are already exactly where they need to be. It's hard to beat that for
performance.

Time to implement the call instruction.

^code interpret-call (1 before, 1 after)

We need to know the function being called and the number of arguments passed to
it. We get the latter from the instruction's operand. That also tells us where
to find the function on the stack by counting past the argument slots from the
top of the stack. We hand that data off to a separate `callValue()` function. If
that returns `false`, it means the call caused some sort of runtime error. When
that happens, we abort the interpreter.

If `callValue()` is successful, there will be a new frame on the CallFrame stack
for the called function. The `run()` function has its own cached pointer to the
current frame, so we need to update that.

^code update-frame-after-call (2 before, 1 after)

Since the bytecode dispatch loop reads from that `frame` variable, when the VM
goes to execute the next instruction, it will read the `ip` from the newly
called function's CallFrame and jump to its code. The work for executing that
call begins here:

^code call-value

<aside name="switch">

Using a `switch` statement to check a single type is overkill now, but will make
sense when we add cases to handle other callable types.

</aside>

There's more going on here than just initializing a new CallFrame. Because Lox
is dynamically typed, there's nothing to prevent a user from writing bad code
like:

```lox
var notAFunction = 123;
notAFunction();
```

If that happens, the runtime needs to safely report an error and halt. So the
first thing we do is check the type of the value that we're trying to call. If
it's not a function, we error out. Otherwise, the actual call happens here:

^code call

This simply initializes the next CallFrame on the stack. It stores a pointer to
the function being called and points the frame's `ip` to the beginning of the
function's bytecode. Finally, it sets up the `slots` pointer to give the frame
its window into the stack. The arithmetic there ensures that the arguments
already on the stack line up with the function's parameters:

<img src="image/calls-and-functions/arithmetic.png" alt="The arithmetic to calculate frame-&gt;slots from stackTop and argCount." />

The funny little `- 1` is to account for stack slot zero which the compiler set
aside for when we add methods later. The parameters start at slot one so we
make the window start one slot earlier to align them with the arguments.

Before we move on, let's add the new instruction to our disassembler.

^code disassemble-call (1 before, 1 after)

And one more quick side trip. Now that we have a handy function for initiating a
CallFrame, we may as well use it to set up the first frame for executing the
top-level code.

^code interpret (1 before, 2 after)

OK, now back to calls...

### Runtime error checking

The overlapping stack windows work based on the assumption that a call passes
exactly one argument for each of the function's parameters. But, again, because
Lox ain't statically typed, a foolish user could pass too many or too few
arguments. In Lox, we've defined that to be a runtime error, which we report
like so:

^code check-arity (1 before, 1 after)

Pretty straightforward. This is why we store the arity of each function inside
the ObjFunction for it.

There's another error we need to report that's less to do with the user's
foolishness than our own. Because the CallFrame array has a fixed size, we need
to ensure a deep call chain doesn't overflow it.

^code check-overflow (2 before, 1 after)

In practice, if a program gets anywhere close to this limit, there's most likely
a bug in some runaway recursive code.

### Printing stack traces

While we're on the subject of runtime errors, let's spend a little time making
them more useful. Stopping on a runtime error is important to prevent the VM
from crashing and burning in some ill-defined way. But simply aborting doesn't
help the user fix their code that *caused* that error.

The classic tool to aid debugging runtime failures is a **stack trace** -- a
print out of each function that was still executing when the program died, and
where the execution was at the point that it died. Now that we have a call stack
and we've conveniently stored each function's name, we can show that entire
stack when a runtime error disrupts the harmony of the user's existence. It
looks like this:

^code runtime-error-stack (2 before, 2 after)

<aside name="minus">

The `- 1` is because the IP is already sitting on the next instruction to be
executed but we want the stack trace to point to the previous failed
instruction.

</aside>

After printing the error message itself, we walk the call stack from <span
name="top">top</span> (the most recently called function) to bottom (the
top-level code). For each frame, we find the line number that corresponds to the
current `ip` inside that frame's function. Then we print that line number along
with the function name.

<aside name="top">

There is some disagreement on which order stack frames should be shown in a
trace. Most put the innermost function as the first line and work their way
towards the bottom of the stack. Python prints them out in the opposite order.
So reading from top to bottom tells you how your program got to where it is, and
the last line is where the error actually occurred.

There's a logic to that style. It ensures you can always see the innermost
function even if the stack trace is too long to fit on one screen. On the other
hand, the "[inverted pyramid][]" from journalism tells us we should put the most
important information *first* in a block of text. In a stack trace, that's the
function where the error actually occurred. Most other language implementations
do that.

[inverted pyramid]: https://en.wikipedia.org/wiki/Inverted_pyramid_(journalism)

</aside>

For example, if you run this broken program:

```lox
fun a() { b(); }
fun b() { c(); }
fun c() {
  c("too", "many");
}

a();
```

It prints out:

```text
Expected 0 arguments but got 2.
[line 4] in c()
[line 2] in b()
[line 1] in a()
[line 7] in script
```

That doesn't look too bad, does it?

### Returning from functions

We're getting close. We can call functions, and the VM will execute them. But we
can't *return* from them yet. We've had an `OP_RETURN` instruction for quite
some time, but it's always had some kind of temporary code hanging out in it
just to get us out of the bytecode loop. The time has arrived for a real
implementation.

^code interpret-return (1 before, 1 after)

When a function returns a value, that value will be on top of the stack. We're
about to discard the called function's entire stack window, so we pop that
return value off and hang on to it. Then we discard the CallFrame for the
returning function. If that was the very last CallFrame, it means we've finished
executing the top-level code. The entire program is done, so we pop the main
script function from the stack and then exit the interpreter.

Otherwise, we discard all of the slots the callee was using for its parameters
and local variables. That includes the same slots the caller used to pass the
arguments. Now that the call is done, the caller doesn't need them anymore. This
means the top of the stack ends up right at the beginning of the returning
function's stack window.

We push the return value back onto the stack at that new, lower location. Then
we update the `run()` function's cached pointer to the current frame. Just like
when we began a call, on the next iteration of the bytecode dispatch loop, the
VM will read `ip` from that frame, and execution will jump back to the caller,
right where it left off, immediately after the `OP_CALL` instruction.

<img src="image/calls-and-functions/return.png" alt="Each step of the return process: popping the return value, discarding the call frame, pushing the return value." />

Note that we assume here that the function *did* actually return a value, but
a function can implicitly return by reaching the end of its body:

```lox
fun noReturn() {
  print "Do stuff";
  // No return here.
}

print noReturn(); // ???
```

We need to handle that correctly too. The language is specified to implicitly
return `nil` in that case. To make that happen, we add this:

^code return-nil (1 before, 2 after)

The compiler calls `emitReturn()` to write the `OP_RETURN` instruction at the
end of a function body. Now, before that, it emits an instruction to push `nil`
onto the stack. And with that, we have working function calls! They can even
take parameters! It almost looks like we know what we're doing here.

## Return Statements

If you want a function that returns something other than the implicit `nil`, you
need a `return` statement. Let's get that working.

^code match-return (1 before, 1 after)

When the compiler sees a `return` keyword, it goes here:

^code return-statement

The return value expression is optional, so the parser looks for a semicolon
token to tell if a value was provided. If there is no return value, the
statement implicitly returns `nil`. We implement that by calling `emitReturn()`,
which emits an `OP_NIL` instruction. Otherwise, we compile the return value
expression and return it with an `OP_RETURN` instruction.

This is the same `OP_RETURN` instruction we've already implemented -- we don't
need any new runtime code. This is quite a difference from jlox. There, we had
to use exceptions to unwind the stack when a `return` statement was executed.
That was because you could return from deep inside some nested blocks. Since
jlox recursively walks the AST, that meant there were a bunch of Java method
calls we needed to escape out of.

Our bytecode compiler flattens that all out. We do recursive descent during
parsing, but at runtime, the VM's bytecode dispatch loop is completely flat.
There is no recursion going on at the C level at all. So returning, even from
within some nested blocks, is as straightforward as returning from the end of
the function's body.

We're not totally done, though. The new `return` statement gives us a new
compile error to worry about. Returns are useful for returning from functions
but the top level of a Lox program is imperative code too. You shouldn't be able
to <span name="worst">return</span> from there.

```lox
return "What?!";
```

<aside name="worst">

Allowing `return` at the top level isn't the worst idea in the world. It would
give you a natural way to terminate a script early. You could maybe even use a
returned number to indicate the process's exit code.

</aside>

We've specified that it's a compile error to have a `return` statement outside
of any function, which we implement like so:

^code return-from-script (1 before, 1 after)

This is one of the reasons we added that FunctionType enum to the compiler.

## Native Functions

Our VM is getting more powerful. We've got functions, calls, parameters,
returns. You can define lots of different functions that can call each other in
interesting ways. But, ultimately, they can't really *do* anything. The only
user-visible thing a Lox program can do, regardless of its complexity, is print.
To add more capabilities, we need to expose them to the user.

A programming language implementation reaches out and touches the material world
through **native functions**. If you want to be able to write programs that
check the time, read user input, or access the file system, we need to add
native functions -- callable from Lox but implemented in C -- that expose those
capabilities.

At the language level, Lox is fairly complete -- it's got closures, classes,
inheritance, and other fun stuff. One reason it feels like a toy language is
because it has almost no native capabilities. We could turn it into a real
language by adding a long list of them.

However, grinding through a pile of OS operations isn't actually very
educational. Once you've seen how to bind one piece of C code to Lox, you get
the idea. But you do need to see *one*, and even a single native function
requires us to build out all the machinery for interfacing Lox with C. So we'll
go through that and do all the hard work. Then, when that's done, we'll add one
tiny native function just to prove that it works.

The reason we need new machinery is because, from the implementation's
perspective, native functions are different from Lox functions. When they are
called, they don't push a CallFrame, because there's no bytecode code for that
frame to point to. They have no bytecode chunk. Instead, they somehow reference
a piece of native C code.

We handle this in clox by defining native functions as an entirely different
object type.

^code obj-native (1 before, 2 after)

The representation is simpler than ObjFunction -- merely an Obj header and a
pointer to the C function that implements the native behavior. The native
function takes the argument count and a pointer to the first argument on the
stack. It accesses the arguments through that pointer. Once it's done, it
returns the result value.

As always, a new object type carries some accoutrements with it. To create an
ObjNative, we declare a constructor-like function.

^code new-native-h (1 before, 1 after)

We implement that like so:

^code new-native

The constructor takes a C function pointer to wrap in an ObjNative. It sets up
the object header and stores the function. For the header, we need a new object
type.

^code obj-type-native (2 before, 2 after)

The VM also needs to know how to deallocate a native function object.

^code free-native (1 before, 1 after)

There isn't much here since ObjNative doesn't own any extra memory. The other
capability all Lox objects support is being printed.

^code print-native (1 before, 1 after)

In order to support dynamic typing, we have a macro to see if a value is a
native function.

^code is-native (1 before, 1 after)

Assuming that returns true, this macro extracts the C function pointer from a
Value representing a native function:

^code as-native (1 before, 1 after)

All of this baggage lets the VM treat native functions like any other object.
You can store them in variables, pass them around, throw them birthday parties,
etc. Of course, the operation we actually care about is *calling* them -- using
one as the left-hand operand in a call expression.

Over in `callValue()` we add another type case.

^code call-native (2 before, 1 after)

If the object being called is a native function, we invoke the C function right
then and there. There's no need to muck with CallFrames or anything. We just
hand off to C, get the result, and stuff it back in the stack. This makes native
functions as fast as we can get.

With this, users should be able to call native functions, but there aren't any
to call. Without something like a foreign function interface, users can't define
their own native functions. That's our job as VM implementers. We'll start with
a helper to define a new native function exposed to Lox programs.

^code define-native

It takes a pointer to a C function and the name it will be known as in Lox.
We wrap the function in an ObjNative and then store that in a global variable
with the given name.

You're probably wondering why we push and pop the name and function on the
stack. That looks weird, right? This is the kind of stuff you have to worry
about when <span name="worry">garbage</span> collection gets involved. Both
`copyString()` and `newNative()` dynamically allocate memory. That means once we
have a GC, they can potentially trigger a collection. If that happens, we need
to ensure the collector knows we're not done with the name and ObjFunction so
that it doesn't free them out from under us. Storing them on the value stack
accomplishes that.

<aside name="worry">

Don't worry if you didn't follow all that. It will make a lot more sense once we
get around to [implementing the GC][gc].

[gc]: garbage-collection.html

</aside>

It feels silly, but after all of that work, we're going to add only one
little native function.

^code clock-native

This returns the elapsed time since the program started running, in seconds. It's
handy for benchmarking Lox programs. In Lox, we'll name it `clock()`.

^code define-native-clock (1 before, 1 after)

To get to the C standard library `clock()` function, the "vm" module needs an
include.

^code vm-include-time (1 before, 2 after)

That was a lot of material to work through, but we did it! Type this in and try
it out:

```lox
fun fib(n) {
  if (n < 2) return n;
  return fib(n - 2) + fib(n - 1);
}

var start = clock();
print fib(35);
print clock() - start;
```

We can write a really inefficient recursive Fibonacci function. Even better, we
can measure just <span name="faster">*how*</span> inefficient it is. This is, of
course, not the smartest way to calculate a Fibonacci number. But it is a good
way to stress test a language implementation's support for function calls. On my
machine, running this in clox is about five times faster than in jlox. That's
quite an improvement.

<aside name="faster">

It's a little slower than a comparable Ruby program run in Ruby 2.4.3p205, and
about 3x faster than one run in Python 3.7.3. And we still have a lot of simple
optimizations we can do in our VM.

</aside>

<div class="challenges">

## Challenges

1.  Reading and writing the `ip` field is one of the most frequent operations
    inside the bytecode loop. Right now, we access it through a pointer to the
    current CallFrame. That requires a pointer indirection which may force the
    CPU to bypass the cache and hit main memory. That can be a real performance
    sink.

    Ideally, we'd keep the `ip` in a native CPU register. C doesn't let us
    *require* that without dropping into inline assembly, but we can structure
    the code to encourage the compiler to make that optimization. If we store
    the `ip` directly in a C local variable and mark it `register`, there's a
    good chance the C compiler will accede to our polite request.

    This does mean we need to be careful to load and store the local `ip` back
    into the correct CallFrame when starting and ending function calls.
    Implement this optimization. Write a couple of benchmarks and see how it
    affects the performance. Do you think the extra code complexity is worth it?

2.  Native function calls are fast in part because we don't validate that the
    call passes as many arguments as the function expects. We really should, or
    an incorrect call to a native function without enough arguments could cause
    the function to read uninitialized memory. Add arity checking.

3.  Right now, there's no way for a native function to signal a runtime error.
    In a real implementation, this is something we'd need to support because
    native functions live in the statically typed world of C but are called
    from dynamically typed Lox land. If a user, say, tries to pass a string to
    `sqrt()`, that native function needs to report a runtime error.

    Extend the native function system to support that. How does this capability
    affect the performance of native calls?

4.  Add some more native functions to do things you find useful. Write some
    programs using those. What did you add? How do they affect the feel of the
    language and how practical it is?

</div>


================================================
FILE: book/chunks-of-bytecode.md
================================================
> If you find that you're spending almost all your time on theory, start turning
> some attention to practical things; it will improve your theories. If you find
> that you're spending almost all your time on practice, start turning some
> attention to theoretical things; it will improve your practice.
>
> <cite>Donald Knuth</cite>

We already have ourselves a complete implementation of Lox with jlox, so why
isn't the book over yet? Part of this is because jlox relies on the <span
name="metal">JVM</span> to do lots of things for us. If we want to understand
how an interpreter works all the way down to the metal, we need to build those
bits and pieces ourselves.

<aside name="metal">

Of course, our second interpreter relies on the C standard library for basics
like memory allocation, and the C compiler frees us from details of the
underlying machine code we're running it on. Heck, that machine code is probably
implemented in terms of microcode on the chip. And the C runtime relies on the
operating system to hand out pages of memory. But we have to stop *somewhere* if
this book is going to fit on your bookshelf.

</aside>

An even more fundamental reason that jlox isn't sufficient is that it's too damn
slow. A tree-walk interpreter is fine for some kinds of high-level, declarative
languages. But for a general-purpose, imperative language -- even a "scripting"
language like Lox -- it won't fly. Take this little script:

```lox
fun fib(n) {
  if (n < 2) return n;
  return fib(n - 1) + fib(n - 2); // [fib]
}

var before = clock();
print fib(40);
var after = clock();
print after - before;
```

<aside name="fib">

This is a comically inefficient way to actually calculate Fibonacci numbers.
Our goal is to see how fast the *interpreter* runs, not to see how fast of a
program we can write. A slow program that does a lot of work -- pointless or not
-- is a good test case for that.

</aside>

On my laptop, that takes jlox about 72 seconds to execute. An equivalent C
program finishes in half a second. Our dynamically typed scripting language is
never going to be as fast as a statically typed language with manual memory
management, but we don't need to settle for more than *two orders of magnitude*
slower.

We could take jlox and run it in a profiler and start tuning and tweaking
hotspots, but that will only get us so far. The execution model -- walking the
AST -- is fundamentally the wrong design. We can't micro-optimize that to the
performance we want any more than you can polish an AMC Gremlin into an SR-71
Blackbird.

We need to rethink the core model. This chapter introduces that model, bytecode,
and begins our new interpreter, clox.

## Bytecode?

In engineering, few choices are without trade-offs. To best understand why we're
going with bytecode, let's stack it up against a couple of alternatives.

### Why not walk the AST?

Our existing interpreter has a couple of things going for it:

*   Well, first, we already wrote it. It's done. And the main reason it's done
    is because this style of interpreter is *really simple to implement*. The
    runtime representation of the code directly maps to the syntax. It's
    virtually effortless to get from the parser to the data structures we need
    at runtime.

*   It's *portable*. Our current interpreter is written in Java and runs on any
    platform Java supports. We could write a new implementation in C using the
    same approach and compile and run our language on basically every platform
    under the sun.

Those are real advantages. But, on the other hand, it's *not memory-efficient*.
Each piece of syntax becomes an AST node. A tiny Lox expression like `1 + 2`
turns into a slew of objects with lots of pointers between them, something like:

<span name="header"></span>

<aside name="header">

The "(header)" parts are the bookkeeping information the Java virtual machine
uses to support memory management and store the object's type. Those take up
space too!

</aside>

<img src="image/chunks-of-bytecode/ast.png" alt="The tree of Java objects created to represent '1 + 2'." />

Each of those pointers adds an extra 32 or 64 bits of overhead to the object.
Worse, sprinkling our data across the heap in a loosely connected web of objects
does bad things for <span name="locality">*spatial locality*</span>.

<aside name="locality">

I wrote [an entire chapter][gpp locality] about this exact problem in my first
book, *Game Programming Patterns*, if you want to really dig in.

[gpp locality]: http://gameprogrammingpatterns.com/data-locality.html

</aside>

Modern CPUs process data way faster than they can pull it from RAM. To
compensate for that, chips have multiple layers of caching. If a piece of memory
it needs is already in the cache, it can be loaded more quickly. We're talking
upwards of 100 *times* faster.

How does data get into that cache? The machine speculatively stuffs things in
there for you. Its heuristic is pretty simple. Whenever the CPU reads a bit of
data from RAM, it pulls in a whole little bundle of adjacent bytes and stuffs
them in the cache.

If our program next requests some data close enough to be inside that cache
line, our CPU runs like a well-oiled conveyor belt in a factory. We *really*
want to take advantage of this. To use the cache effectively, the way we
represent code in memory should be dense and ordered like it's read.

Now look up at that tree. Those sub-objects could be <span
name="anywhere">*anywhere*</span>. Every step the tree-walker takes where it
follows a reference to a child node may step outside the bounds of the cache and
force the CPU to stall until a new lump of data can be slurped in from RAM. Just
the *overhead* of those tree nodes with all of their pointer fields and object
headers tends to push objects away from each other and out of the cache.

<aside name="anywhere">

Even if the objects happened to be allocated in sequential memory when the
parser first produced them, after a couple of rounds of garbage collection --
which may move objects around in memory -- there's no telling where they'll be.

</aside>

Our AST walker has other overhead too around interface dispatch and the Visitor
pattern, but the locality issues alone are enough to justify a better code
representation.

### Why not compile to native code?

If you want to go *real* fast, you want to get all of those layers of
indirection out of the way. Right down to the metal. Machine code. It even
*sounds* fast. *Machine code.*

Compiling directly to the native instruction set the chip supports is what the
fastest languages do. Targeting native code has been the most efficient option
since way back in the early days when engineers actually <span
name="hand">handwrote</span> programs in machine code.

<aside name="hand">

Yes, they actually wrote machine code by hand. On punched cards. Which,
presumably, they punched *with their fists*.

</aside>

If you've never written any machine code, or its slightly more human-palatable
cousin assembly code before, I'll give you the gentlest of introductions. Native
code is a dense series of operations, encoded directly in binary. Each
instruction is between one and a few bytes long, and is almost mind-numbingly
low level. "Move a value from this address to this register." "Add the integers
in these two registers." Stuff like that.

The CPU cranks through the instructions, decoding and executing each one in
order. There is no tree structure like our AST, and control flow is handled by
jumping from one point in the code directly to another. No indirection, no
overhead, no unnecessary skipping around or chasing pointers.

Lightning fast, but that performance comes at a cost. First of all, compiling to
native code ain't easy. Most chips in wide use today have sprawling Byzantine
architectures with heaps of instructions that accreted over decades. They
require sophisticated register allocation, pipelining, and instruction
scheduling.

And, of course, you've thrown <span name="back">portability</span> out. Spend a
few years mastering some architecture and that still only gets you onto *one* of
the several popular instruction sets out there. To get your language on all of
them, you need to learn all of their instruction sets and write a separate back
end for each one.

<aside name="back">

The situation isn't entirely dire. A well-architected compiler lets you
share the front end and most of the middle layer optimization passes across the
different architectures you support. It's mainly the code generation and some of
the details around instruction selection that you'll need to write afresh each
time.

The [LLVM][] project gives you some of this out of the box. If your compiler
outputs LLVM's own special intermediate language, LLVM in turn compiles that to
native code for a plethora of architectures.

[llvm]: https://llvm.org/

</aside>

### What is bytecode?

Fix those two points in your mind. On one end, a tree-walk interpreter is
simple, portable, and slow. On the other, native code is complex and
platform-specific but fast. Bytecode sits in the middle. It retains the
portability of a tree-walker -- we won't be getting our hands dirty with
assembly code in this book. It sacrifices *some* simplicity to get a performance
boost in return, though not as fast as going fully native.

Structurally, bytecode resembles machine code. It's a dense, linear sequence of
binary instructions. That keeps overhead low and plays nice with the cache.
However, it's a much simpler, higher-level instruction set than any real chip
out there. (In many bytecode formats, each instruction is only a single byte
long, hence "bytecode".)

Imagine you're writing a native compiler from some source language and you're
given carte blanche to define the easiest possible architecture to target.
Bytecode is kind of like that. It's an idealized fantasy instruction set that
makes your life as the compiler writer easier.

The problem with a fantasy architecture, of course, is that it doesn't exist. We
solve that by writing an *emulator* -- a simulated chip written in software that
interprets the bytecode one instruction at a time. A *virtual machine (VM)*, if
you will.

That emulation layer adds <span name="p-code">overhead</span>, which is a key
reason bytecode is slower than native code. But in return, it gives us
portability. Write our VM in a language like C that is already supported on all
the machines we care about, and we can run our emulator on top of any hardware
we like.

<aside name="p-code">

One of the first bytecode formats was [p-code][], developed for Niklaus Wirth's
Pascal language. You might think a PDP-11 running at 15MHz couldn't afford the
overhead of emulating a virtual machine. But back then, computers were in their
Cambrian explosion and new architectures appeared every day. Keeping up with the
latest chips was worth more than squeezing the maximum performance from each
one. That's why the "p" in p-code doesn't stand for "Pascal", but "portable".

[p-code]: https://en.wikipedia.org/wiki/P-code_machine

</aside>

This is the path we'll take with our new interpreter, clox. We'll follow in the
footsteps of the main implementations of Python, Ruby, Lua, OCaml, Erlang, and
others. In many ways, our VM's design will parallel the structure of our
previous interpreter:

<img src="image/chunks-of-bytecode/phases.png" alt="Phases of the two
implementations. jlox is Parser to Syntax Trees to Interpreter. clox is Compiler
to Bytecode to Virtual Machine." />

Of course, we won't implement the phases strictly in order. Like our previous
interpreter, we'll bounce around, building up the implementation one language
feature at a time. In this chapter, we'll get the skeleton of the application in
place and create the data structures needed to store and represent a chunk of
bytecode.

## Getting Started

Where else to begin, but at `main()`? <span name="ready">Fire</span> up your
trusty text editor and start typing.

<aside name="ready">

Now is a good time to stretch, maybe crack your knuckles. A little montage music
wouldn't hurt either.

</aside>

^code main-c

From this tiny seed, we will grow our entire VM. Since C provides us with so
little, we first need to spend some time amending the soil. Some of that goes
into this header:

^code common-h

There are a handful of types and constants we'll use throughout the interpreter,
and this is a convenient place to put them. For now, it's the venerable `NULL`,
`size_t`, the nice C99 Boolean `bool`, and explicit-sized integer types --
`uint8_t` and friends.

## Chunks of Instructions

Next, we need a module to define our code representation. I've been using
"chunk" to refer to sequences of bytecode, so let's make that the official name
for that module.

^code chunk-h

In our bytecode format, each instruction has a one-byte **operation code**
(universally shortened to **opcode**). That number controls what kind of
instruction we're dealing with -- add, subtract, look up variable, etc. We
define those here:

^code op-enum (1 before, 2 after)

For now, we start with a single instruction, `OP_RETURN`. When we have a
full-featured VM, this instruction will mean "return from the current function".
I admit this isn't exactly useful yet, but we have to start somewhere, and this
is a particularly simple instruction, for reasons we'll get to later.

### A dynamic array of instructions

Bytecode is a series of instructions. Eventually, we'll store some other data
along with the instructions, so let's go ahead and create a struct to hold it
all.

^code chunk-struct (1 before, 2 after)

At the moment, this is simply a wrapper around an array of bytes. Since we don't
know how big the array needs to be before we start compiling a chunk, it must be
dynamic. Dynamic arrays are one of my favorite data structures. That sounds like
claiming vanilla is my favorite ice cream <span name="flavor">flavor</span>, but
hear me out. Dynamic arrays provide:

<aside name="flavor">

Butter pecan is actually my favorite.

</aside>

* Cache-friendly, dense storage

* Constant-time indexed element lookup

* Constant-time appending to the end of the array

Those features are exactly why we used dynamic arrays all the time in jlox under
the guise of Java's ArrayList class. Now that we're in C, we get to roll our
own. If you're rusty on dynamic arrays, the idea is pretty simple. In addition
to the array itself, we keep two numbers: the number of elements in the array we
have allocated ("capacity") and how many of those allocated entries are actually
in use ("count").

^code count-and-capacity (1 before, 2 after)

When we add an element, if the count is less than the capacity, then there is
already available space in the array. We store the new element right in there
and bump the count.

<img src="image/chunks-of-bytecode/insert.png" alt="Storing an element in an
array that has enough capacity." />

If we have no spare capacity, then the process is a little more involved.

<img src="image/chunks-of-bytecode/grow.png" alt="Growing the dynamic array
before storing an element." class="wide" />

1.  <span name="amortized">Allocate</span> a new array with more capacity.
2.  Copy the existing elements from the old array to the new one.
3.  Store the new `capacity`.
4.  Delete the old array.
5.  Update `code` to point to the new array.
6.  Store the element in the new array now that there is room.
7.  Update the `count`.

<aside name="amortized">

Copying the existing elements when you grow the array makes it seem like
appending an element is *O(n)*, not *O(1)* like I said above. However, you need
to do this copy step only on *some* of the appends. Most of the time, there is
already extra capacity, so you don't need to copy.

To understand how this works, we need [**amortized
analysis**](https://en.wikipedia.org/wiki/Amortized_analysis). That shows us
that as long as we grow the array by a multiple of its current size, when we
average out the cost of a *sequence* of appends, each append is *O(1)*.

</aside>

We have our struct ready, so let's implement the functions to work with it. C
doesn't have constructors, so we declare a function to initialize a new chunk.

^code init-chunk-h (1 before, 2 after)

And implement it thusly:

^code chunk-c

The dynamic array starts off completely empty. We don't even allocate a raw
array yet. To append a byte to the end of the chunk, we use a new function.

^code write-chunk-h (1 before, 2 after)

This is where the interesting work happens.

^code write-chunk

The first thing we need to do is see if the current array already has capacity
for the new byte. If it doesn't, then we first need to grow the array to make
room. (We also hit this case on the very first write when the array is `NULL`
and `capacity` is 0.)

To grow the array, first we figure out the new capacity and grow the array to
that size. Both of those lower-level memory operations are defined in a new
module.

^code chunk-c-include-memory (1 before, 2 after)

This is enough to get us started.

^code memory-h

This macro calculates a new capacity based on a given current capacity. In order
to get the performance we want, the important part is that it *scales* based on
the old size. We grow by a factor of two, which is pretty typical. 1.5&times; is
another common choice.

We also handle when the current capacity is zero. In that case, we jump straight
to eight elements instead of starting at one. That <span
name="profile">avoids</span> a little extra memory churn when the array is very
small, at the expense of wasting a few bytes on very small chunks.

<aside name="profile">

I picked the number eight somewhat arbitrarily for the book. Most dynamic array
implementations have a minimum threshold like this. The right way to pick a
value for this is to profile against real-world usage and see which constant
makes the best performance trade-off between extra grows versus wasted space.

</aside>

Once we know the desired capacity, we create or grow the array to that size
using `GROW_ARRAY()`.

^code grow-array (2 before, 2 after)

This macro pretties up a function call to `reallocate()` where the real work
happens. The macro itself takes care of getting the size of the array's element
type and casting the resulting `void*` back to a pointer of the right type.

This `reallocate()` function is the single function we'll use for all dynamic
memory management in clox -- allocating memory, freeing it, and changing the
size of an existing allocation. Routing all of those operations through a single
function will be important later when we add a garbage collector that needs to
keep track of how much memory is in use.

The two size arguments passed to `reallocate()` control which operation to
perform:

<table>
  <thead>
    <tr>
      <td>oldSize</td>
      <td>newSize</td>
      <td>Operation</td>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>0</td>
      <td>Non&#8209;zero</td>
      <td>Allocate new block.</td>
    </tr>
    <tr>
      <td>Non&#8209;zero</td>
      <td>0</td>
      <td>Free allocation.</td>
    </tr>
    <tr>
      <td>Non&#8209;zero</td>
      <td>Smaller&nbsp;than&nbsp;<code>oldSize</code></td>
      <td>Shrink existing allocation.</td>
    </tr>
    <tr>
      <td>Non&#8209;zero</td>
      <td>Larger&nbsp;than&nbsp;<code>oldSize</code></td>
      <td>Grow existing allocation.</td>
    </tr>
  </tbody>
</table>

That sounds like a lot of cases to handle, but here's the implementation:

^code memory-c

When `newSize` is zero, we handle the deallocation case ourselves by calling
`free()`. Otherwise, we rely on the C standard library's `realloc()` function.
That function conveniently supports the other three aspects of our policy. When
`oldSize` is zero, `realloc()` is equivalent to calling `malloc()`.

The interesting cases are when both `oldSize` and `newSize` are not zero. Those
tell `realloc()` to resize the previously allocated block. If the new size is
smaller than the existing block of memory, it simply <span
name="shrink">updates</span> the size of the block and returns the same pointer
you gave it. If the new size is larger, it attempts to grow the existing block
of memory.

It can do that only if the memory after that block isn't already in use. If
there isn't room to grow the block, `realloc()` instead allocates a *new* block
of memory of the desired size, copies over the old bytes, frees the old block,
and then returns a pointer to the new block. Remember, that's exactly the
behavior we want for our dynamic array.

Because computers are finite lumps of matter and not the perfect mathematical
abstractions computer science theory would have us believe, allocation can fail
if there isn't enough memory and `realloc()` will return `NULL`. We should
handle that.

^code out-of-memory (1 before, 1 after)

There's not really anything *useful* that our VM can do if it can't get the
memory it needs, but we at least detect that and abort the process immediately
instead of returning a `NULL` pointer and letting it go off the rails later.

<aside name="shrink">

Since all we passed in was a bare pointer to the first byte of memory, what does
it mean to "update" the block's size? Under the hood, the memory allocator
maintains additional bookkeeping information for each block of heap-allocated
memory, including its size.

Given a pointer to some previously allocated memory, it can find this
bookkeeping information, which is necessary to be able to cleanly free it. It's
this size metadata that `realloc()` updates.

Many implementations of `malloc()` store the allocated size in memory right
*before* the returned address.

</aside>

OK, we can create new chunks and write instructions to them. Are we done? Nope!
We're in C now, remember, we have to manage memory ourselves, like in Ye Olden
Times, and that means *freeing* it too.

^code free-chunk-h (1 before, 1 after)

The implementation is:

^code free-chunk

We deallocate all of the memory and then call `initChunk()` to zero out the
fields leaving the chunk in a well-defined empty state. To free the memory, we
add one more macro.

^code free-array (3 before, 2 after)

Like `GROW_ARRAY()`, this is a wrapper around a call to `reallocate()`. This one
frees the memory by passing in zero for the new size. I know, this is a lot of
boring low-level stuff. Don't worry, we'll get a lot of use out of these in
later chapters and will get to program at a higher level. Before we can do that,
though, we gotta lay our own foundation.

## Disassembling Chunks

Now we have a little module for creating chunks of bytecode. Let's try it out by
hand-building a sample chunk.

^code main-chunk (1 before, 1 after)

Don't forget the include.

^code main-include-chunk (1 before, 2 after)

Run that and give it a try. Did it work? Uh... who knows? All we've done is push
some bytes around in memory. We have no human-friendly way to see what's
actually inside that chunk we made.

To fix this, we're going to create a **disassembler**. An **assembler** is an
old-school program that takes a file containing human-readable mnemonic names
for CPU instructions like "ADD" and "MULT" and translates them to their binary
machine code equivalent. A *dis*assembler goes in the other direction -- given a
blob of machine code, it spits out a textual listing of the instructions.

We'll implement something <span name="printer">similar</span>. Given a chunk, it
will print out all of the instructions in it. A Lox *user* won't use this, but
we Lox *maintainers* will certainly benefit since it gives us a window into the
interpreter's internal representation of code.

<aside name="printer">

In jlox, our analogous tool was the [AstPrinter class][].

[astprinter class]: representing-code.html#a-not-very-pretty-printer

</aside>

In `main()`, after we create the chunk, we pass it to the disassembler.

^code main-disassemble-chunk (2 before, 1 after)

Again, we whip up <span name="module">yet another</span> module.

<aside name="module">

I promise you we won't be creating this many new files in later chapters.

</aside>

^code main-include-debug (1 before, 2 after)

Here's that header:

^code debug-h

In `main()`, we call `disassembleChunk()` to disassemble all of the instructions
in the entire chunk. That's implemented in terms of the other function, which
just disassembles a single instruction. It shows up here in the header because
we'll call it from the VM in later chapters.

Here's a start at the implementation file:

^code debug-c

To disassemble a chunk, we print a little header (so we can tell *which* chunk
we're looking at) and then crank through the bytecode, disassembling each
instruction. The way we iterate through the code is a little odd. Instead of
incrementing `offset` in the loop, we let `disassembleInstruction()` do it for
us. When we call that function, after disassembling the instruction at the given
offset, it returns the offset of the *next* instruction. This is because, as
we'll see later, instructions can have different sizes.

The core of the "debug" module is this function:

^code disassemble-instruction

First, it prints the byte offset of the given instruction -- that tells us where
in the chunk this instruction is. This will be a helpful signpost when we start
doing control flow and jumping around in the bytecode.

Next, it reads a single byte from the bytecode at the given offset. That's our
opcode. We <span name="switch">switch</span> on that. For each kind of
instruction, we dispatch to a little utility function for displaying it. On the
off chance that the given byte doesn't look like an instruction at all -- a bug
in our compiler -- we print that too. For the one instruction we do have,
`OP_RETURN`, the display function is:

<aside name="switch">

We have only one instruction right now, but this switch will grow throughout the
rest of the book.

</aside>

^code simple-instruction

There isn't much to a return instruction, so all it does is print the name of
the opcode, then return the next byte offset past this instruction. Other
instructions will have more going on.

If we run our nascent interpreter now, it actually prints something:

```text
== test chunk ==
0000 OP_RETURN
```

It worked! This is sort of the "Hello, world!" of our code representation. We
can create a chunk, write an instruction to it, and then extract that
instruction back out. Our encoding and decoding of the binary bytecode is
working.

## Constants

Now that we have a rudimentary chunk structure working, let's start making it
more useful. We can store *code* in chunks, but what about *data*? Many values
the interpreter works with are created at runtime as the result of operations.

```lox
1 + 2;
```

The value 3 appears nowhere in the code here. However, the literals `1` and `2`
do. To compile that statement to bytecode, we need some sort of instruction that
means "produce a constant" and those literal values need to get stored in the
chunk somewhere. In jlox, the Expr.Literal AST node held the value. We need a
different solution now that we don't have a syntax tree.

### Representing values

We won't be *running* any code in this chapter, but since constants have a foot
in both the static and dynamic worlds of our interpreter, they force us to start
thinking at least a little bit about how our VM should represent values.

For now, we're going to start as simple as possible -- we'll support only
double-precision, floating-point numbers. This will obviously expand over time,
so we'll set up a new module to give ourselves room to grow.

^code value-h

This typedef abstracts how Lox values are concretely represented in C. That way,
we can change that representation without needing to go back and fix existing
code that passes around values.

Back to the question of where to store constants in a chunk. For small
fixed-size values like integers, many instruction sets store the value directly
in the code stream right after the opcode. These are called **immediate
instructions** because the bits for the value are immediately after the opcode.

That doesn't work well for large or variable-sized constants like strings. In a
native compiler to machine code, those bigger constants get stored in a separate
"constant data" region in the binary executable. Then, the instruction to load a
constant has an address or offset pointing to where the value is stored in that
section.

Most virtual machines do something similar. For example, the Java Virtual
Machine [associates a **constant pool**][jvm const] with each compiled class.
That sounds good enough for clox to me. Each chunk will carry with it a list of
the values that appear as literals in the program. To keep things <span
name="immediate">simpler</span>, we'll put *all* constants in there, even simple
integers.

[jvm const]: https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.4

<aside name="immediate">

In addition to needing two kinds of constant instructions -- one for immediate
values and one for constants in the constant table -- immediates also force us
to worry about alignment, padding, and endianness. Some architectures aren't
happy if you try to say, stuff a 4-byte integer at an odd address.

</aside>

### Value arrays

The constant pool is an array of values. The instruction to load a constant
looks up the value by index in that array. As with our <span
name="generic">bytecode</span> array, the compiler doesn't know how big the
array needs to be ahead of time. So, again, we need a dynamic one. Since C
doesn't have generic data structures, we'll write another dynamic array data
structure, this time for Value.

<aside name="generic">

Defining a new struct and manipulation functions each time we need a dynamic
array of a different type is a chore. We could cobble together some preprocessor
macros to fake generics, but that's overkill for clox. We won't need many more
of these.

</aside>

^code value-array (1 before, 2 after)

As with the bytecode array in Chunk, this struct wraps a pointer to an array
along with its allocated capacity and the number of elements in use. We also
need the same three functions to work with value arrays.

^code array-fns-h (1 before, 2 after)

The implementations will probably give you déjà vu. First, to create a new one:

^code value-c

Once we have an initialized array, we can start <span name="add">adding</span>
values to it.

<aside name="add">

Fortunately, we don't need other operations like insertion and removal.

</aside>

^code write-value-array

The memory-management macros we wrote earlier do let us reuse some of the logic
from the code array, so this isn't too bad. Finally, to release all memory used
by the array:

^code free-value-array

Now that we have growable arrays of values, we can add one to Chunk to store the
chunk's constants.

^code chunk-constants (1 before, 1 after)

Don't forget the include.

^code chunk-h-include-value (1 before, 2 after)

Ah, C, and its Stone Age modularity story. Where were we? Right. When we
initialize a new chunk, we initialize its constant list too.

^code chunk-init-constant-array (1 before, 1 after)

Likewise, we free the constants when we free the chunk.

^code chunk-free-constants (1 before, 1 after)

Next, we define a convenience method to add a new constant to the chunk. Our
yet-to-be-written compiler could write to the constant array inside Chunk
directly -- it's not like C has private fields or anything -- but it's a little
nicer to add an explicit function.

^code add-constant-h (1 before, 2 after)

Then we implement it.

^code add-constant

After we add the constant, we return the index where the constant was appended
so that we can locate that same constant later.

### Constant instructions

We can *store* constants in chunks, but we also need to *execute* them. In a
piece of code like:

```lox
print 1;
print 2;
```

The compiled chunk needs to not only contain the values 1 and 2, but know *when*
to produce them so that they are printed in the right order. Thus, we need an
instruction that produces a particular constant.

^code op-constant (1 before, 1 after)

When the VM executes a constant instruction, it <span name="load">"loads"</span>
the constant for use. This new instruction is a little more complex than
`OP_RETURN`. In the above example, we load two different constants. A single
bare opcode isn't enough to know *which* constant to load.

<aside name="load">

I'm being vague about what it means to "load" or "produce" a constant because we
haven't learned how the virtual machine actually executes code at runtime yet.
For that, you'll have to wait until you get to (or skip ahead to, I suppose) the
[next chapter][vm].

[vm]: a-virtual-machine.html

</aside>

To handle cases like this, our bytecode -- like most others -- allows
instructions to have <span name="operand">**operands**</span>. These are stored
as binary data immediately after the opcode in the instruction stream and let us
parameterize what the instruction does.

<img src="image/chunks-of-bytecode/format.png" alt="OP_CONSTANT is a byte for
the opcode followed by a byte for the constant index." />

Each opcode determines how many operand bytes it has and what they mean. For
example, a simple operation like "return" may have no operands, where an
instruction for "load local variable" needs an operand to identify which
variable to load. Each time we add a new opcode to clox, we specify what its
operands look like -- its **instruction format**.

<aside name="operand">

Bytecode instruction operands are *not* the same as the operands passed to an
arithmetic operator. You'll see when we get to expressions that arithmetic
operand values are tracked separately. Instruction operands are a lower-level
notion that modify how the bytecode instruction itself behaves.

</aside>

In this case, `OP_CONSTANT` takes a single byte operand that specifies which
constant to load from the chunk's constant array. Since we don't have a compiler
yet, we "hand-compile" an instruction in our test chunk.

^code main-constant (1 before, 1 after)

We add the constant value itself to the chunk's constant pool. That returns the
index of the constant in the array. Then we write the constant instruction,
starting with its opcode. After that, we write the one-byte constant index
operand. Note that `writeChunk()` can write opcodes or operands. It's all raw
bytes as far as that function is concerned.

If we try to run this now, the disassembler is going to yell at us because it
doesn't know how to decode the new instruction. Let's fix that.

^code disassemble-constant (1 before, 1 after)

This instruction has a different instruction format, so we write a new helper
function to disassemble it.

^code constant-instruction

There's more going on here. As with `OP_RETURN`, we print out the name of the
opcode. Then we pull out the constant index from the subsequent byte in the
chunk. We print that index, but that isn't super useful to us human readers. So
we also look up the actual constant value -- since constants *are* known at
compile time after all -- and display the value itself too.

This requires some way to print a clox Value. That function will live in the
"value" module, so we include that.

^code debug-include-value (1 before, 2 after)

Over in that header, we declare:

^code print-value-h (1 before, 2 after)

And here's an implementation:

^code print-value

Magnificent, right? As you can imagine, this is going to get more complex once
we add dynamic typing to Lox and have values of different types.

Back in `constantInstruction()`, the only remaining piece is the return value.

^code return-after-operand (1 before, 1 after)

Remember that `disassembleInstruction()` also returns a number to tell the
caller the offset of the beginning of the *next* instruction. Where `OP_RETURN`
was only a single byte, `OP_CONSTANT` is two -- one for the opcode and one for
the operand.

## Line Information

Chunks contain almost all of the information that the runtime needs from the
user's source code. It's kind of crazy to think that we can reduce all of the
different AST classes that we created in jlox down to an array of bytes and an
array of constants. There's only one piece of data we're missing. We need it,
even though the user hopes to never see it.

When a runtime error occurs, we show the user the line number of the offending
source code. In jlox, those numbers live in tokens, which we in turn store in
the AST nodes. We need a different solution for clox now that we've ditched
syntax trees in favor of bytecode. Given any bytecode instruction, we need to be
able to determine the line of the user's source program that it was compiled
from.

There are a lot of clever ways we could encode this. I took the absolute <span
name="side">simplest</span> approach I could come up with, even though it's
embarrassingly inefficient with memory. In the chunk, we store a separate array
of integers that parallels the bytecode. Each number in the array is the line
number for the corresponding byte in the bytecode. When a runtime error occurs,
we look up the line number at the same index as the current instruction's offset
in the code array.

<aside name="side">

This braindead encoding does do one thing right: it keeps the line information
in a *separate* array instead of interleaving it in the bytecode itself. Since
line information is only used when a runtime error occurs, we don't want it
between the instructions, taking up precious space in the CPU cache and causing
more cache misses as the interpreter skips past it to get to the opcodes and
operands it cares about.

</aside>

To implement this, we add another array to Chunk.

^code chunk-lines (1 before, 1 after)

Since it exactly parallels the bytecode array, we don't need a separate count or
capacity. Every time we touch the code array, we make a corresponding change to
the line number array, starting with initialization.

^code chunk-null-lines (1 before, 1 after)

And likewise deallocation:

^code chunk-free-lines (1 before, 1 after)

When we write a byte of code to the chunk, we need to know what source line it
came from, so we add an extra parameter in the declaration of `writeChunk()`.

^code write-chunk-with-line-h (1 before, 1 after)

And in the implementation:

^code write-chunk-with-line (1 after)

When we allocate or grow the code array, we do the same for the line info too.

^code write-chunk-line (2 before, 1 after)

Finally, we store the line number in the array.

^code chunk-write-line (1 before, 1 after)

### Disassembling line information

Alright, let's try this out with our little, uh, artisanal chunk. First, since
we added a new parameter to `writeChunk()`, we need to fix those calls to pass
in some -- arbitrary at this point -- line number.

^code main-chunk-line (1 before, 2 after)

Once we have a real front end, of course, the compiler will track the current
line as it parses and pass that in.

Now that we have line information for every instruction, let's put it to good
use. In our disassembler, it's helpful to show which source line each
instruction was compiled from. That gives us a way to map back to the original
code when we're trying to figure out what some blob of bytecode is supposed to
do. After printing the offset of the instruction -- the number of bytes from the
beginning of the chunk -- we show its source line.

^code show-location (2 before, 2 after)

Bytecode instructions tend to be pretty fine-grained. A single line of source
code often compiles to a whole sequence of instructions. To make that more
visually clear, we show a `|` for any instruction that comes from the same
source line as the preceding one. The resulting output for our handwritten
chunk looks like:

```text
== test chunk ==
0000  123 OP_CONSTANT         0 '1.2'
0002    | OP_RETURN
```

We have a three-byte chunk. The first two bytes are a constant instruction that
loads 1.2 from the chunk's constant pool. The first byte is the `OP_CONSTANT`
opcode and the second is the index in the constant pool. The third byte (at
offset 2) is a single-byte return instruction.

In the remaining chapters, we will flesh this out with lots more kinds of
instructions. But the basic structure is here, and we have everything we need
now to completely represent an executable piece of code at runtime in our
virtual machine. Remember that whole family of AST classes we defined in jlox?
In clox, we've reduced that down to three arrays: bytes of code, constant
values, and line information for debugging.

This reduction is a key reason why our new interpreter will be faster than jlox.
You can think of bytecode as a sort of compact serialization of the AST, highly
optimized for how the interpreter will deserialize it in the order it needs as
it executes. In the [next chapter][vm], we will see how the virtual machine does
exactly that.

<div class="challenges">

## Challenges

1.  Our encoding of line information is hilariously wasteful of memory. Given
    that a series of instructions often correspond to the same source line, a
    natural solution is something akin to [run-length encoding][rle] of the line
    numbers.

    Devise an encoding that compresses the line information for a
    series of instructions on the same line. Change `writeChunk()` to write this
    compressed form, and implement a `getLine()` function that, given the index
    of an instruction, determines the line where the instruction occurs.

    *Hint: It's not necessary for `getLine()` to be particularly efficient.
    Since it is called only when a runtime error occurs, it is well off the
    critical path where performance matters.*

2.  Because `OP_CONSTANT` uses only a single byte for its operand, a chunk may
    only contain up to 256 different constants. That's small enough that people
    writing real-world code will hit that limit. We could use two or more bytes
    to store the operand, but that makes *every* constant instruction take up
    more space. Most chunks won't need that many unique constants, so that
    wastes space and sacrifices some locality in the common case to support the
    rare case.

    To balance those two competing aims, many instruction sets feature multiple
    instructions that perform the same operation but with operands of different
    sizes. Leave our existing one-byte `OP_CONSTANT` instruction alone, and
    define a second `OP_CONSTANT_LONG` instruction. It stores the operand as a
    24-bit number, which should be plenty.

    Implement this function:

    ```c
    void writeConstant(Chunk* chunk, Value value, int line) {
      // Implement me...
    }
    ```

    It adds `value` to `chunk`'s constant array and then writes an appropriate
    instruction to load the constant. Also add support to the disassembler for
    `OP_CONSTANT_LONG` instructions.

    Defining two instructions seems to be the best of both worlds. What
    sacrifices, if any, does it force on us?

3.  Our `reallocate()` function relies on the C standard library for dynamic
    memory allocation and freeing. `malloc()` and `free()` aren't magic. Find
    a couple of open source implementations of them and explain how they work.
    How do they keep track of which bytes are allocated and which are free?
    What is required to allocate a block of memory? Free it? How do they make
    that efficient? What do they do about fragmentation?

    *Hardcore mode:* Implement `reallocate()` without calling `realloc()`,
    `malloc()`, or `free()`. You are allowed to call `malloc()` *once*, at the
    beginning of the interpreter's execution, to allocate a single big block of
    memory, which your `reallocate()` function has access to. It parcels out
    blobs of memory from that single region, your own personal heap. It's your
    job to define how it does that.

</div>

[rle]: https://en.wikipedia.org/wiki/Run-length_encoding

<div class="design-note">

## Design Note: Test Your Language

We're almost halfway through the book and one thing we haven't talked about is
*testing* your language implementation. That's not because testing isn't
important. I can't possibly stress enough how vital it is to have a good,
comprehensive test suite for your language.

I wrote a [test suite for Lox][tests] (which you are welcome to use on your own
Lox implementation) before I wrote a single word of this book. Those tests found
countless bugs in my implementations.

[tests]: https://github.com/munificent/craftinginterpreters/tree/master/test

Tests are important in all software, but they're even more important for a
programming language for at least a couple of reasons:

*   **Users expect their programming languages to be rock solid.** We are so
    used to mature, stable compilers and interpreters that "It's your code, not
    the compiler" is [an ingrained part of software culture][fault]. If there
    are bugs in your language implementation, users will go through the full
    five stages of grief before they can figure out what's going on, and you
    don't want to put them through all that.

*   **A language implementation is a deeply interconnected piece of software.**
    Some codebases are broad and shallow. If the file loading code is broken in
    your text editor, it -- hopefully! -- won't cause failures in the text
    rendering on screen. Language implementations are narrower and deeper,
    especially the core of the interpreter that handles the language's actual
    semantics. That makes it easy for subtle bugs to creep in caused by weird
    interactions between various parts of the system. It takes good tests to
    flush those out.

*   **The input to a language implementation is, by design, combinatorial.**
    There are an infinite number of possible programs a user could write, and
    your implementation needs to run them all correctly. You obviously can't
    test that exhaustively, but you need to work hard to cover as much of the
    input space as you can.

*   **Language implementations are often complex, constantly changing, and full
    of optimizations.** That leads to gnarly code with lots of dark corners
    where bugs can hide.

[fault]: https://blog.codinghorror.com/the-first-rule-of-programming-its-always-your-fault/

All of that means you're gonna want a lot of tests. But *what* tests? Projects
I've seen focus mostly on end-to-end "language tests". Each test is a program
written in the language along with the output or errors it is expected to
produce. Then you have a test runner that pushes the test program through your
language implementation and validates that it does what it's supposed to.
Writing your tests in the language itself has a few nice advantages:

*   The tests aren't coupled to any particular API or internal architecture
    decisions of the implementation. This frees you to reorganize or rewrite
    parts of your interpreter or compiler without needing to update a slew of
    tests.

*   You can use the same tests for multiple implementations of the language.

*   Tests can often be terse and easy to read and maintain since they are
    simply scripts in your language.

It's not all rosy, though:

*   End-to-end tests help you determine *if* there is a bug, but not *where* the
    bug is. It can be harder to figure out where the erroneous code in the
    implementation is because all the test tells you is that the right output
    didn't appear.

*   It can be a chore to craft a valid program that tickles some obscure corner
    of the implementation. This is particularly true for highly optimized
    compilers where you may need to write convoluted code to ensure that you
    end up on just the right optimization path where a bug may be hiding.

*   The overhead can be high to fire up the interpreter, parse, compile, and
    run each test script. With a big suite of tests -- which you *do* want,
    remember -- that can mean a lot of time spent waiting for the tests to
    finish running.

I could go on, but I don't want this to turn into a sermon. Also, I don't
pretend to be an expert on *how* to test languages. I just want you to
internalize how important it is *that* you test yours. Seriously. Test your
language. You'll thank me for it.

</div>


================================================
FILE: book/classes-and-instances.md
================================================
> Caring too much for objects can destroy you. Only -- if you care for a thing
> enough, it takes on a life of its own, doesn't it? And isn’t the whole point
> of things -- beautiful things -- that they connect you to some larger beauty?
>
> <cite>Donna Tartt, <em>The Goldfinch</em></cite>

The last area left to implement in clox is object-oriented programming. <span
name="oop">OOP</span> is a bundle of intertwined features: classes, instances,
fields, methods, initializers, and inheritance. Using relatively high-level
Java, we packed all that into two chapters. Now that we're coding in C, which
feels like building a model of the Eiffel tower out of toothpicks, we'll devote
three chapters to covering the same territory. This makes for a leisurely stroll
through the implementation. After strenuous chapters like [closures][] and the
[garbage collector][], you have earned a rest. In fact, the book should be easy
from here on out.

<aside name="oop">

People who have strong opinions about object-oriented programming -- read
"everyone" -- tend to assume OOP means some very specific list of language
features, but really there's a whole space to explore, and each language has its
own ingredients and recipes.

Self has objects but no classes. CLOS has methods but doesn't attach them to
specific classes. C++ initially had no runtime polymorphism -- no virtual
methods. Python has multiple inheritance, but Java does not. Ruby attaches
methods to classes, but you can also define methods on a single object.

</aside>

In this chapter, we cover the first three features: classes, instances, and
fields. This is the stateful side of object orientation. Then in the next two
chapters, we will hang behavior and code reuse off of those objects.

[closures]: closures.html
[garbage collector]: garbage-collection.html

## Class Objects

In a class-based object-oriented language, everything begins with classes. They
define what sorts of objects exist in the program and are the factories used to
produce new instances. Going bottom-up, we'll start with their runtime
representation and then hook that into the language.

By this point, we're well-acquainted with the process of adding a new object
type to the VM. We start with a struct.

^code obj-class (1 before, 2 after)

After the Obj header, we store the class's name. This isn't strictly needed for
the user's program, but it lets us show the name at runtime for things like
stack traces.

The new type needs a corresponding case in the ObjType enum.

^code obj-type-class (1 before, 1 after)

And that type gets a corresponding pair of macros. First, for testing an
object's type:

^code is-class (2 before, 1 after)

And then for casting a Value to an ObjClass pointer:

^code as-class (2 before, 1 after)

The VM creates new class objects using this function:

^code new-class-h (2 before, 1 after)

The implementation lives over here:

^code new-class

Pretty much all boilerplate. It takes in the class's name as a string and stores
it. Every time the user declares a new class, the VM will create a new one of
these ObjClass structs to represent it.

<aside name="klass">

<img src="image/classes-and-instances/klass.png" alt="'Klass' in a zany kidz font."/>

I named the variable "klass" not just to give the VM a zany preschool "Kidz
Korner" feel. It makes it easier to get clox compiling as C++ where "class" is
a reserved word.

</aside>

When the VM no longer needs a class, it frees it like so:

^code free-class (1 before, 1 after)

<aside name="braces">

The braces here are pointless now, but will be useful in the next chapter when
we add some more code to the switch case.

</aside>

We have a memory manager now, so we also need to support tracing through class
objects.

^code blacken-class (1 before, 1 after)

When the GC reaches a class object, it marks the class's name to keep that
string alive too.

The last operation the VM can perform on a class is printing it.

^code print-class (1 before, 1 after)

A class simply says its own name.

## Class Declarations

Runtime representation in hand, we are ready to add support for classes to the
language. Next, we move into the parser.

^code match-class (1 before, 1 after)

Class declarations are statements, and the parser recognizes one by the leading
`class` keyword. The rest of the compilation happens over here:

^code class-declaration

Immediately after the `class` keyword is the class's name. We take that
identifier and add it to the surrounding function's constant table as a string.
As you just saw, printing a class shows its name, so the compiler needs to stuff
the name string somewhere that the runtime can find. The constant table is the
way to do that.

The class's <span name="variable">name</span> is also used to bind the class
object to a variable of the same name. So we declare a variable with that
identifier right after consuming its token.

<aside name="variable">

We could have made class declarations be *expressions* instead of statements --
they are essentially a literal that produces a value after all. Then users would
have to explicitly bind the class to a variable themselves like:

```lox
var Pie = class {}
```

Sort of like lambda functions but for classes. But since we generally want
classes to be named anyway, it makes sense to treat them as declarations.

</aside>

Next, we emit a new instruction to actually create the class object at runtime.
That instruction takes the constant table index of the class's name as an
operand.

After that, but before compiling the body of the class, we define the variable
for the class's name. *Declaring* the variable adds it to the scope, but recall
from [a previous chapter][scope] that we can't *use* the variable until it's
*defined*. For classes, we define the variable before the body. That way, users
can refer to the containing class inside the bodies of its own methods. That's
useful for things like factory methods that produce new instances of the class.

[scope]: local-variables.html#another-scope-edge-case

Finally, we compile the body. We don't have methods yet, so right now it's
simply an empty pair of braces. Lox doesn't require fields to be declared in the
class, so we're done with the body -- and the parser -- for now.

The compiler is emitting a new instruction, so let's define that.

^code class-op (1 before, 1 after)

And add it to the disassembler:

^code disassemble-class (2 before, 1 after)

For such a large-seeming feature, the interpreter support is minimal.

^code interpret-class (2 before, 1 after)

We load the string for the class's name from the constant table and pass that to
`newClass()`. That creates a new class object with the given name. We push that
onto the stack and we're good. If the class is bound to a global variable, then
the compiler's call to `defineVariable()` will emit code to store that object
from the stack into the global variable table. Otherwise, it's right where it
needs to be on the stack for a new <span name="local">local</span> variable.

<aside name="local">

"Local" classes -- classes declared inside the body of a function or block, are
an unusual concept. Many languages don't allow them at all. But since Lox is a
dynamically typed scripting language, it treats the top level of a program and
the bodies of functions and blocks uniformly. Classes are just another kind of
declaration, and since you can declare variables and functions inside blocks,
you can declare classes in there too.

</aside>

There you have it, our VM supports classes now. You can run this:

```lox
class Brioche {}
print Brioche;
```

Unfortunately, printing is about *all* you can do with classes, so next is
making them more useful.

## Instances of Classes

Classes serve two main purposes in a language:

*   **They are how you create new instances.** Sometimes this involves a `new`
    keyword, other times it's a method call on the class object, but you usually
    mention the class by name *somehow* to get a new instance.

*   **They contain methods.** These define how all instances of the class
    behave.

We won't get to methods until the next chapter, so for now we will only worry
about the first part. Before classes can create instances, we need a
representation for them.

^code obj-instance (1 before, 2 after)

Instances know their class -- each instance has a pointer to the class that it
is an instance of.  We won't use this much in this chapter, but it will become
critical when we add methods.

More important to this chapter is how instances store their state. Lox lets
users freely add fields to an instance at runtime. This means we need a storage
mechanism that can grow. We could use a dynamic array, but we also want to look
up fields by name as quickly as possible. There's a data structure that's just
perfect for quickly accessing a set of values by name and
-- even more conveniently -- we've already implemented it. Each instance stores
its fields using a hash table.

<aside name="fields">

Being able to freely add fields to an object at runtime is a big practical
difference between most dynamic and static languages. Statically typed languages
usually require fields to be explicitly declared. This way, the compiler knows
exactly what fields each instance has. It can use that to determine the precise
amount of memory needed for each instance and the offsets in that memory where
each field can be found.

In Lox and other dynamic languages, accessing a field is usually a hash table
lookup. Constant time, but still pretty heavyweight. In a language like C++,
accessing a field is as fast as offsetting a pointer by an integer constant.

</aside>

We only need to add an include, and we've got it.

^code object-include-table (1 before, 1 after)

This new struct gets a new object type.

^code obj-type-instance (1 before, 1 after)

I want to slow down a bit here because the Lox *language's* notion of "type" and
the VM *implementation's* notion of "type" brush against each other in ways that
can be confusing. Inside the C code that makes clox, there are a number of
different types of Obj -- ObjString, ObjClosure, etc. Each has its own internal
representation and semantics.

In the Lox *language*, users can define their own classes -- say Cake and Pie --
and then create instances of those classes. From the user's perspective, an
instance of Cake is a different type of object than an instance of Pie. But,
from the VM's perspective, every class the user defines is simply another value
of type ObjClass. Likewise, each instance in the user's program, no matter what
class it is an instance of, is an ObjInstance. That one VM object type covers
instances of all classes. The two worlds map to each other something like this:

<img src="image/classes-and-instances/lox-clox.png" alt="A set of class declarations and instances, and the runtime representations each maps to."/>

Got it? OK, back to the implementation. We also get our usual macros.

^code is-instance (1 before, 1 after)

And:

^code as-instance (1 before, 1 after)

Since fields are added after the instance is created, the "constructor" function
only needs to know the class.

^code new-instance-h (1 before, 1 after)

We implement that function here:

^code new-instance

We store a reference to the instance's class. Then we initialize the field
table to an empty hash table. A new baby object is born!

At the sadder end of the instance's lifespan, it gets freed.

^code free-instance (3 before, 1 after)

The instance owns its field table so when freeing the instance, we also free the
table. We don't explicitly free the entries *in* the table, because there may
be other references to those objects. The garbage collector will take care of
those for us. Here we free only the entry array of the table itself.

Speaking of the garbage collector, it needs support for tracing through
instances.

^code blacken-instance (3 before, 1 after)

If the instance is alive, we need to keep its class around. Also, we need to
keep every object referenced by the instance's fields. Most live objects that
are not roots are reachable because some instance refers to the object in a
field. Fortunately, we already have a nice `markTable()` function to make
tracing them easy.

Less critical but still important is printing.

^code print-instance (1 before, 1 after)

<span name="print">An</span> instance prints its name followed by "instance".
(The "instance" part is mainly so that classes and instances don't print the
same.)

<aside name="print">

Most object-oriented languages let a class define some sort of `toString()`
method that lets the class specify how its instances are converted to a string
and printed. If Lox was less of a toy language, I would want to support that
too.

</aside>

The real fun happens over in the interpreter. Lox has no special `new` keyword.
The way to create an instance of a class is to invoke the class itself as if it
were a function. The runtime already supports function calls, and it checks the
type of object being called to make sure the user doesn't try to invoke a number
or other invalid type.

We extend that runtime checking with a new case.

^code call-class (1 before, 1 after)

If the value being called -- the object that results when evaluating the
expression to the left of the opening parenthesis -- is a class, then we treat
it as a constructor call. We <span name="args">create</span> a new instance of
the called class and store the result on the stack.

<aside name="args">

We ignore any arguments passed to the call for now. We'll revisit this code in
the [next chapter][next] when we add support for initializers.

[next]: methods-and-initializers.html

</aside>

We're one step farther. Now we can define classes and create instances of them.

```lox
class Brioche {}
print Brioche();
```

Note the parentheses after `Brioche` on the second line now. This prints
"Brioche instance".

## Get and Set Expressions

Our object representation for instances can already store state, so all that
remains is exposing that functionality to the user. Fields are accessed and
modified using get and set expressions. Not one to break with tradition, Lox
uses the classic "dot" syntax:

```lox
eclair.filling = "pastry creme";
print eclair.filling;
```

The period -- full stop for my English friends -- works <span
name="sort">sort</span> of like an infix operator. There is an expression to the
left that is evaluated first and produces an instance. After that is the `.`
followed by a field name. Since there is a preceding operand, we hook this into
the parse table as an infix expression.

<aside name="sort">

I say "sort of" because the right-hand side after the `.` is not an expression,
but a single identifier whose semantics are handled by the get or set expression
itself. It's really closer to a postfix expression.

</aside>

^code table-dot (1 before, 1 after)

As in other languages, the `.` operator binds tightly, with precedence as high
as the parentheses in a function call. After the parser consumes the dot token,
it dispatches to a new parse function.

^code compile-dot

The parser expects to find a <span name="prop">property</span> name immediately
after the dot. We load that token's lexeme into the constant table as a string
so that the name is available at runtime.

<aside name="prop">

The compiler uses "property" instead of "field" here because, remember, Lox also
lets you use dot syntax to access a method without calling it. "Property" is the
general term we use to refer to any named entity you can access on an instance.
Fields are the subset of properties that are backed by the instance's state.

</aside>

We have two new expression forms -- getters and setters -- that this one
function handles. If we see an equals sign after the field name, it must be a
set expression that is assigning to a field. But we don't *always* allow an
equals sign after the field to be compiled. Consider:

```lox
a + b.c = 3
```

This is syntactically invalid according to Lox's grammar, which means our Lox
implementation is obligated to detect and report the error. If `dot()` silently
parsed the `= 3` part, we would incorrectly interpret the code as if the user
had written:

```lox
a + (b.c = 3)
```

The problem is that the `=` side of a set expression has much lower precedence
than the `.` part. The parser may call `dot()` in a context that is too high
precedence to permit a setter to appear. To avoid incorrectly allowing that, we
parse and compile the equals part only when `canAssign` is true. If an equals
token appears when `canAssign` is false, `dot()` leaves it alone and returns. In
that case, the compiler will eventually unwind up to `parsePrecedence()`, which
stops at the unexpected `=` still sitting as the next token and reports an
error.

If we find an `=` in a context where it *is* allowed, then we compile the
expression that follows. After that, we emit a new <span
name="set">`OP_SET_PROPERTY`</span> instruction. That takes a single operand for
the index of the property name in the constant table. If we didn't compile a set
expression, we assume it's a getter and emit an `OP_GET_PROPERTY` instruction,
which also takes an operand for the property name.

<aside name="set">

You can't *set* a non-field property, so I suppose that instruction could have
been `OP_SET_FIELD`, but I thought it looked nicer to be consistent with the get
instruction.

</aside>

Now is a good time to define these two new instructions.

^code property-ops (1 before, 1 after)

And add support for disassembling them:

^code disassemble-property-ops (1 before, 1 after)

### Interpreting getter and setter expressions

Sliding over to the runtime, we'll start with get expressions since those are a
little simpler.

^code interpret-get-property (1 before, 1 after)

When the interpreter reaches this instruction, the expression to the left of the
dot has already been executed and the resulting instance is on top of the stack.
We read the field name from the constant pool and look it up in the instance's
field table. If the hash table contains an entry with that name, we pop the
instance and push the entry's value as the result.

Of course, the field might not exist. In Lox, we've defined that to be a runtime
error. So we add a check for that and abort if it happens.

^code get-undefined (3 before, 2 after)

<span name="field">There</span> is another failure mode to handle which you've
probably noticed. The above code assumes the expression to the left of the dot
did evaluate to an ObjInstance. But there's nothing preventing a user from
writing this:

```lox
var obj = "not an instance";
print obj.field;
```

The user's program is wrong, but the VM still has to handle it with some grace.
Right now, it will misinterpret the bits of the ObjString as an ObjInstance and,
I don't know, catch on fire or something definitely not graceful.

In Lox, only instances are allowed to have fields. You can't stuff a field onto
a string or number. So we need to check that the value is an instance before
accessing any fields on it.

<aside name="field">

Lox *could* support adding fields to values of other types. It's our language
and we can do what we want. But it's likely a bad idea. It significantly
complicates the implementation in ways that hurt performance -- for example,
string interning gets a lot harder.

Also, it raises gnarly semantic questions around the equality and identity of
values. If I attach a field to the number `3`, does the result of `1 + 2` have
that field as well? If so, how does the implementation track that? If not, are
those two resulting "threes" still considered equal?

</aside>

^code get-not-instance (1 before, 1 after)

If the value on the stack isn't an instance, we report a runtime error and
safely exit.

Of course, get expressions are not very useful when no instances have any
fields. For that we need setters.

^code interpret-set-property (2 before, 1 after)

This is a little more complex than `OP_GET_PROPERTY`. When this executes, the
top of the stack has the instance whose field is being set and above that, the
value to be stored. Like before, we read the instruction's operand and find the
field name string. Using that, we store the value on top of the stack into the
instance's field table.

After that is a little <span name="stack">stack</span> juggling. We pop the
stored value off, then pop the instance, and finally push the value back on. In
other words, we remove the *second* element from the stack while leaving the top
alone. A setter is itself an expression whose result is the assigned value, so
we need to leave that value on the stack. Here's what I mean:

<aside name="stack">

The stack operations go like this:

<img src="image/classes-and-instances/stack.png" alt="Popping two values and then pushing the first value back on the stack."/>

</aside>

```lox
class Toast {}
var toast = Toast();
print toast.jam = "grape"; // Prints "grape".
```

Unlike when reading a field, we don't need to worry about the hash table not
containing the field. A setter implicitly creates the field if needed. We do
need to handle the user incorrectly trying to store a field on a value that
isn't an instance.

^code set-not-instance (1 before, 1 after)

Exactly like with get expressions, we check the value's type and report a
runtime error if it's invalid. And, with that, the stateful side of Lox's
support for object-oriented programming is in place. Give it a try:

```lox
class Pair {}

var pair = Pair();
pair.first = 1;
pair.second = 2;
print pair.first + pair.second; // 3.
```

This doesn't really feel very *object*-oriented. It's more like a strange,
dynamically typed variant of C where objects are loose struct-like bags of data.
Sort of a dynamic procedural language. But this is a big step in expressiveness.
Our Lox implementation now lets users freely aggregate data into bigger units.
In the next chapter, we will breathe life into those inert blobs.

<div class="challenges">

## Challenges

1.  Trying to access a non-existent field on an object immediately aborts the
    entire VM. The user has no way to recover from this runtime error, nor is
    there any way to see if a field exists *before* trying to access it. It's up
    to the user to ensure on their own that only valid fields are read.

    How do other dynamically typed languages handle missing fields? What do you
    think Lox should do? Implement your solution.

2.  Fields are accessed at runtime by their *string* name. But that name must
    always appear directly in the source code as an *identifier token*. A user
    program cannot imperatively build a string value and then use that as the
    name of a field. Do you think they should be able to? Devise a language
    feature that enables that and implement it.

3.  Conversely, Lox offers no way to *remove* a field from an instance. You can
    set a field's value to `nil`, but the entry in the hash table is still
    there. How do other languages handle this? Choose and implement a strategy
    for Lox.

4.  Because fields are accessed by name at runtime, working with instance state
    is slow. It's technically a constant-time operation -- thanks, hash tables
    -- but the constant factors are relatively large. This is a major component
    of why dynamic languages are slower than statically typed ones.

    How do sophisticated implementations of dynamically typed languages cope
    with and optimize this?

</div>


================================================
FILE: book/classes.md
================================================
> One has no right to love or hate anything if one has not acquired a thorough
> knowledge of its nature. Great love springs from great knowledge of the
> beloved object, and if you know it but little you will be able to love it only
> a little or not at all.
>
> <cite>Leonardo da Vinci</cite>

We're eleven chapters in, and the interpreter sitting on your machine is nearly
a complete scripting language. It could use a couple of built-in data structures
like lists and maps, and it certainly needs a core library for file I/O, user
input, etc. But the language itself is sufficient. We've got a little procedural
language in the same vein as BASIC, Tcl, Scheme (minus macros), and early
versions of Python and Lua.

If this were the '80s, we'd stop here. But today, many popular languages support
"object-oriented programming". Adding that to Lox will give users a familiar set
of tools for writing larger programs. Even if you personally don't <span
name="hate">like</span> OOP, this chapter and [the next][inheritance] will help
you understand how others design and build object systems.

[inheritance]: inheritance.html

<aside name="hate">

If you *really* hate classes, though, you can skip these two chapters. They are
fairly isolated from the rest of the book. Personally, I find it's good to learn
more about the things I dislike. Things look simple at a distance, but as I get
closer, details emerge and I gain a more nuanced perspective.

</aside>

## OOP and Classes

There are three broad paths to object-oriented programming: classes,
[prototypes][], and <span name="multimethods">[multimethods][]</span>. Classes
came first and are the most popular style. With the rise of JavaScript (and to a
lesser extent [Lua][]), prototypes are more widely known than they used to be.
I'll talk more about those [later][]. For Lox, we're taking the, ahem, classic
approach.

[prototypes]: http://gameprogrammingpatterns.com/prototype.html
[multimethods]: https://en.wikipedia.org/wiki/Multiple_dispatch
[lua]: https://www.lua.org/pil/13.4.1.html
[later]: #design-note

<aside name="multimethods">

Multimethods are the approach you're least likely to be familiar with. I'd love
to talk more about them -- I designed [a hobby language][magpie] around them
once and they are *super rad* -- but there are only so many pages I can fit in.
If you'd like to learn more, take a look at [CLOS][] (the object system in
Common Lisp), [Dylan][], [Julia][], or [Raku][].

[clos]: https://en.wikipedia.org/wiki/Common_Lisp_Object_System
[magpie]: http://magpie-lang.org/
[dylan]: https://opendylan.org/
[julia]: https://julialang.org/
[raku]: https://docs.raku.org/language/functions#Multi-dispatch

</aside>

Since you've written about a thousand lines of Java code with me already, I'm
assuming you don't need a detailed introduction to object orientation. The main
goal is to bundle data with the code that acts on it. Users do that by declaring
a *class* that:

<span name="circle"></span>

1. Exposes a *constructor* to create and initialize new *instances* of the
   class

1. Provides a way to store and access *fields* on instances

1. Defines a set of *methods* shared by all instances of the class that
   operate on each instances' state.

That's about as minimal as it gets. Most object-oriented languages, all the way
back to Simula, also do inheritance to reuse behavior across classes. We'll add
that in the [next chapter][inheritance]. Even kicking that out, we still have a
lot to get through. This is a big chapter and everything doesn't quite come
together until we have all of the above pieces, so gather your stamina.

<aside name="circle">

<img src="image/classes/circle.png" alt="The relationships between classes, methods, instances, constructors, and fields." />

It's like the circle of life, *sans* Sir Elton John.

</aside>

[inheritance]: inheritance.html

## Class Declarations

Like we do, we're gonna start with syntax. A `class` statement introduces a new
name, so it lives in the `declaration` grammar rule.

```ebnf
declaration    → classDecl
               | funDecl
               | varDecl
               | statement ;

classDecl      → "class" IDENTIFIER "{" function* "}" ;
```

The new `classDecl` rule relies on the `function` rule we defined
[earlier][function rule]. To refresh your memory:

[function rule]: functions.html#function-declarations

```ebnf
function       → IDENTIFIER "(" parameters? ")" block ;
parameters     → IDENTIFIER ( "," IDENTIFIER )* ;
```

In plain English, a class declaration is the `class` keyword, followed by the
class's name, then a curly-braced body. Inside that body is a list of method
declarations. Unlike function declarations, methods don't have a leading <span
name="fun">`fun`</span> keyword. Each method is a name, parameter list, and
body. Here's an example:

<aside name="fun">

Not that I'm trying to say methods aren't fun or anything.

</aside>

```lox
class Breakfast {
  cook() {
    print "Eggs a-fryin'!";
  }

  serve(who) {
    print "Enjoy your breakfast, " + who + ".";
  }
}
```

Like most dynamically typed languages, fields are not explicitly listed in the
class declaration. Instances are loose bags of data and you can freely add
fields to them as you see fit using normal imperative code.

Over in our AST generator, the `classDecl` grammar rule gets its own statement
<span name="class-ast">node</span>.

^code class-ast (1 before, 1 after)

<aside name="class-ast">

The generated code for the new node is in [Appendix II][appendix-class].

[appendix-class]: appendix-ii.html#class-statement

</aside>

It stores the class's name and the methods inside its body. Methods are
represented by the existing Stmt.Function class that we use for function
declaration AST nodes. That gives us all the bits of state that we need for a
method: name, parameter list, and body.

A class can appear anywhere a named declaration is allowed, triggered by the
leading `class` keyword.

^code match-class (1 before, 1 after)

That calls out to:

^code parse-class-declaration

There's more meat to this than most of the other parsing methods, but it roughly
follows the grammar. We've already consumed the `class` keyword, so we look for
the expected class name next, followed by the opening curly brace. Once inside
the body, we keep parsing method declarations until we hit the closing brace.
Each method declaration is parsed by a call to `function()`, which we defined
back in the [chapter where functions were introduced][functions].

[functions]: functions.html

Like we do in any open-ended loop in the parser, we also check for hitting the
end of the file. That won't happen in correct code since a class should have a
closing brace at the end, but it ensures the parser doesn't get stuck in an
infinite loop if the user has a syntax error and forgets to correctly end the
class body.

We wrap the name and list of methods into a Stmt.Class node and we're done.
Previously, we would jump straight into the interpreter, but now we need to
plumb the node through the resolver first.

^code resolver-visit-class

We aren't going to worry about resolving the methods themselves yet, so for now
all we need to do is declare the class using its name. It's not common to
declare a class as a local variable, but Lox permits it, so we need to handle it
correctly.

Now we interpret the class declaration.

^code interpreter-visit-class

This looks similar to how we execute function declarations. We declare the
class's name in the current environment. Then we turn the class *syntax node*
into a LoxClass, the *runtime* representation of a class. We circle back and
store the class object in the variable we previously declared. That two-stage
variable binding process allows references to the class inside its own methods.

We will refine it throughout the chapter, but the first draft of LoxClass looks
like this:

^code lox-class

Literally a wrapper around a name. We don't even store the methods yet. Not
super useful, but it does have a `toString()` method so we can write a trivial
script and test that class objects are actually being parsed and executed.

```lox
class DevonshireCream {
  serveOn() {
    return "Scones";
  }
}

print DevonshireCream; // Prints "DevonshireCream".
```

## Creating Instances

We have classes, but they don't do anything yet. Lox doesn't have "static"
methods that you can call right on the class itself, so without actual
instances, classes are useless. Thus instances are the next step.

While some syntax and semantics are fairly standard across OOP languages, the
way you create new instances isn't. Ruby, following Smalltalk, creates instances
by calling a method on the class object itself, a <span
name="turtles">recursively</span> graceful approach. Some, like C++ and Java,
have a `new` keyword dedicated to birthing a new object. Python has you "call"
the class itself like a function. (JavaScript, ever weird, sort of does both.)

<aside name="turtles">

In Smalltalk, even *classes* are created by calling methods on an existing
object, usually the desired superclass. It's sort of a turtles-all-the-way-down
thing. It ultimately bottoms out on a few magical classes like Object and
Metaclass that the runtime conjures into being *ex nihilo*.

</aside>

I took a minimal approach with Lox. We already have class objects, and we
already have function calls, so we'll use call expressions on class objects to
create new instances. It's as if a class is a factory function that generates
instances of itself. This feels elegant to me, and also spares us the need to
introduce syntax like `new`. Therefore, we can skip past the front end straight
into the runtime.

Right now, if you try this:

```lox
class Bagel {}
Bagel();
```

You get a runtime error. `visitCallExpr()` checks to see if the called object
implements `LoxCallable` and reports an error since LoxClass doesn't. Not *yet*,
that is.

^code lox-class-callable (2 before, 1 after)

Implementing that interface requires two methods.

^code lox-class-call-arity

The interesting one is `call()`. When you "call" a class, it instantiates a new
LoxInstance for the called class and returns it. The `arity()` method is how the
interpreter validates that you passed the right number of arguments to a
callable. For now, we'll say you can't pass any. When we get to user-defined
constructors, we'll revisit this.

That leads us to LoxInstance, the runtime representation of an instance of a Lox
class. Again, our first implementation starts small.

^code lox-instance

Like LoxClass, it's pretty bare bones, but we're only getting started. If you
want to give it a try, here's a script to run:

```lox
class Bagel {}
var bagel = Bagel();
print bagel; // Prints "Bagel instance".
```

This program doesn't do much, but it's starting to do *something*.

## Properties on Instances

We have instances, so we should make them useful. We're at a fork in the road.
We could add behavior first -- methods -- or we could start with state --
properties. We're going to take the latter because, as we'll see, the two get
entangled in an interesting way and it will be easier to make sense of them if
we get properties working first.

Lox follows JavaScript and Python in how it handles state. Every instance is an
open collection of named values. Methods on the instance's class can access and
modify properties, but so can <span name="outside">outside</span> code.
Properties are accessed using a `.` syntax.

<aside name="outside">

Allowing code outside of the class to directly modify an object's fields goes
against the object-oriented credo that a class *encapsulates* state. Some
languages take a more principled stance. In Smalltalk, fields are accessed using
simple identifiers -- essentially, variables that are only in scope inside a
class's methods. Ruby uses `@` followed by a name to access a field in an
object. That syntax is only meaningful inside a method and always accesses state
on the current object.

Lox, for better or worse, isn't quite so pious about its OOP faith.

</aside>

```lox
someObject.someProperty
```

An expression followed by `.` and an identifier reads the property with that
name from the object the expression evaluates to. That dot has the same
precedence as the parentheses in a function call expression, so we slot it into
the grammar by replacing the existing `call` rule with:

```ebnf
call           → primary ( "(" arguments? ")" | "." IDENTIFIER )* ;
```

After a primary expression, we allow a series of any mixture of parenthesized
calls and dotted property accesses. "Property access" is a mouthful, so from
here on out, we'll call these "get expressions".

### Get expressions

The <span name="get-ast">syntax tree node</span> is:

^code get-ast (1 before, 1 after)

<aside name="get-ast">

The generated code for the new node is in [Appendix II][appendix-get].

[appendix-get]: appendix-ii.html#get-expression

</aside>

Following the grammar, the new parsing code goes in our existing `call()`
method.

^code parse-property (3 before, 4 after)

The outer `while` loop there corresponds to the `*` in the grammar rule. We zip
along the tokens building up a chain of calls and gets as we find parentheses
and dots, like so:

<img src="image/classes/zip.png" alt="Parsing a series of '.' and '()' expressions to an AST." />

Instances of the new Expr.Get node feed into the resolver.

^code resolver-visit-get

OK, not much to that. Since properties are looked up <span
name="dispatch">dynamically</span>, they don't get resolved. During resolution,
we recurse only into the expression to the left of the dot. The actual property
access happens in the interpreter.

<aside name="dispatch">

You can literally see that property dispatch in Lox is dynamic since we don't
process the property name during the static resolution pass.

</aside>

^code interpreter-visit-get

First, we evaluate the expression whose property is being accessed. In Lox, only
instances of classes have properties. If the object is some other type like a
number, invoking a getter on it is a runtime error.

If the object is a LoxInstance, then we ask it to look up the property. It must
be time to give LoxInstance some actual state. A map will do fine.

^code lox-instance-fields (1 before, 2 after)

Each key in the map is a property name and the corresponding value is the
property's value. To look up a property on an instance:

^code lox-instance-get-property

<aside name="hidden">

Doing a hash table lookup for every field access is fast enough for many
language implementations, but not ideal. High performance VMs for languages like
JavaScript use sophisticated optimizations like "[hidden classes][]" to avoid
that overhead.

Paradoxically, many of the optimizations invented to make dynamic languages fast
rest on the observation that -- even in those languages -- most code is fairly
static in terms of the types of objects it works with and their fields.

[hidden classes]: http://richardartoul.github.io/jekyll/update/2015/04/26/hidden-classes.html

</aside>

An interesting edge case we need to handle is what happens if the instance
doesn't *have* a property with the given name. We could silently return some
dummy value like `nil`, but my experience with languages like JavaScript is that
this behavior masks bugs more often than it does anything useful. Instead, we'll
make it a runtime error.

So the first thing we do is see if the instance actually has a field with the
given name. Only then do we return it. Otherwise, we raise an error.

Note how I switched from talking about "properties" to "fields". There is a
subtle difference between the two. Fields are named bits of state stored
directly in an instance. Properties are the named, uh, *things*, that a get
expression may return. Every field is a property, but as we'll see <span
name="foreshadowing">later</span>, not every property is a field.

<aside name="foreshadowing">

Ooh, foreshadowing. Spooky!

</aside>

In theory, we can now read properties on objects. But since there's no way to
actually stuff any state into an instance, there are no fields to access. Before
we can test out reading, we must support writing.

### Set expressions

Setters use the same syntax as getters, except they appear on the left side of
an assignment.

```lox
someObject.someProperty = value;
```

In grammar land, we extend the rule for assignment to allow dotted identifiers
on the left-hand side.

```ebnf
assignment     → ( call "." )? IDENTIFIER "=" assignment
               | logic_or ;
```

Unlike getters, setters don't chain. However, the reference to `call` allows any
high-precedence expression before the last dot, including any number of
*getters*, as in:

<img src="image/classes/setter.png" alt="breakfast.omelette.filling.meat = ham" />

Note here that only the *last* part, the `.meat` is the *setter*. The
`.omelette` and `.filling` parts are both *get* expressions.

Just as we have two separate AST nodes for variable access and variable
assignment, we need a <span name="set-ast">second setter node</span> to
complement our getter node.

^code set-ast (1 before, 1 after)

<aside name="set-ast">

The generated code for the new node is in [Appendix II][appendix-set].

[appendix-set]: appendix-ii.html#set-expression

</aside>

In case you don't remember, the way we handle assignment in the parser is a
little funny. We can't easily tell that a series of tokens is the left-hand side
of an assignment until we reach the `=`. Now that our assignment grammar rule
has `call` on the left side, which can expand to arbitrarily large expressions,
that final `=` may be many tokens away from the point where we need to know
we're parsing an assignment.

Instead, the trick we do is parse the left-hand side as a normal expression.
Then, when we stumble onto the equal sign after it, we take the expression we
already parsed and transform it into the correct syntax tree node for the
assignment.

We add another clause to that transformation to handle turning an Expr.Get
expression on the left into the corresponding Expr.Set.

^code assign-set (1 before, 1 after)

That's parsing our syntax. We push that node through into the resolver.

^code resolver-visit-set

Again, like Expr.Get, the property itself is dynamically evaluated, so there's
nothing to resolve there. All we need to do is recurse into the two
subexpressions of Expr.Set, the object whose property is being set, and the
value it's being set to.

That leads us to the interpreter.

^code interpreter-visit-set

We evaluate the object whose property is being set and check to see if it's a
LoxInstance. If not, that's a runtime error. Otherwise, we evaluate the value
being set and store it on the instance. That relies on a new method in
LoxInstance.

<aside name="order">

This is another semantic edge case. There are three distinct operations:

1. Evaluate the object.

2. Raise a runtime error if it's not an instance of a class.

3. Evaluate the value.

The order that those are performed in could be user visible, which means we need
to carefully specify it and ensure our implementations do these in the same
order.

</aside>

^code lox-instance-set-property

No real magic here. We stuff the values straight into the Java map where fields
live. Since Lox allows freely creating new fields on instances, there's no need
to see if the key is already present.

## Methods on Classes

You can create instances of classes and stuff data into them, but the class
itself doesn't really *do* anything. Instances are just maps and all instances
are more or less the same. To make them feel like instances *of classes*, we
need behavior -- methods.

Our helpful parser already parses method declarations, so we're good there. We
also don't need to add any new parser support for method *calls*. We already
have `.` (getters) and `()` (function calls). A "method call" simply chains
those together.

<img src="image/classes/method.png" alt="The syntax tree for 'object.method(argument)" />

That raises an interesting question. What happens when those two expressions are
pulled apart? Assuming that `method` in this example is a method on the class of
`object` and not a field on the instance, what should the following piece of
code do?

```lox
var m = object.method;
m(argument);
```

This program "looks up" the method and stores the result -- whatever that is --
in a variable and then calls that object later. Is this allowed? Can you treat a
method like it's a function on the instance?

What about the other direction?

```lox
class Box {}

fun notMethod(argument) {
  print "called function with " + argument;
}

var box = Box();
box.function = notMethod;
box.function("argument");
```

This program creates an instance and then stores a function in a field on it.
Then it calls that function using the same syntax as a method call. Does that
work?

Different languages have different answers to these questions. One could write a
treatise on it. For Lox, we'll say the answer to both of these is yes, it does
work. We have a couple of reasons to justify that. For the second example --
calling a function stored in a field -- we want to support that because
first-class functions are useful and storing them in fields is a perfectly
normal thing to do.

The first example is more obscure. One motivation is that users generally expect
to be able to hoist a subexpression out into a local variable without changing
the meaning of the program. You can take this:

```lox
breakfast(omelette.filledWith(cheese), sausage);
```

And turn it into this:

```lox
var eggs = omelette.filledWith(cheese);
breakfast(eggs, sausage);
```

And it does the same thing. Likewise, since the `.` and the `()` in a method
call *are* two separate expressions, it seems you should be able to hoist the
*lookup* part into a variable and then call it <span
name="callback">later</span>. We need to think carefully about what the *thing*
you get when you look up a method is, and how it behaves, even in weird cases
like:

<aside name="callback">

A motivating use for this is callbacks. Often, you want to pass a callback whose
body simply invokes a method on some object. Being able to look up the method and
pass it directly saves you the chore of manually declaring a function to wrap
it. Compare this:

```lox
fun callback(a, b, c) {
  object.method(a, b, c);
}

takeCallback(callback);
```

With this:

```lox
takeCallback(object.method);
```

</aside>

```lox
class Person {
  sayName() {
    print this.name;
  }
}

var jane = Person();
jane.name = "Jane";

var method = jane.sayName;
method(); // ?
```

If you grab a handle to a method on some instance and call it later, does it
"remember" the instance it was pulled off from? Does `this` inside the method
still refer to that original object?

Here's a more pathological example to bend your brain:

```lox
class Person {
  sayName() {
    print this.name;
  }
}

var jane = Person();
jane.name = "Jane";

var bill = Person();
bill.name = "Bill";

bill.sayName = jane.sayName;
bill.sayName(); // ?
```

Does that last line print "Bill" because that's the instance that we *called*
the method through, or "Jane" because it's the instance where we first grabbed
the method?

Equivalent code in Lua and JavaScript would print "Bill". Those languages don't
really have a notion of "methods". Everything is sort of functions-in-fields, so
it's not clear that `jane` "owns" `sayName` any more than `bill` does.

Lox, though, has real class syntax so we do know which callable things are
methods and which are functions. Thus, like Python, C#, and others, we will have
methods "bind" `this` to the original instance when the method is first grabbed.
Python calls <span name="bound">these</span> **bound methods**.

<aside name="bound">

I know, imaginative name, right?

</aside>

In practice, that's usually what you want. If you take a reference to a method
on some object so you can use it as a callback later, you want to remember the
instance it belonged to, even if that callback happens to be stored in a field
on some other object.

OK, that's a lot of semantics to load into your head. Forget about the edge
cases for a bit. We'll get back to those. For now, let's get basic method calls
working. We're already parsing the method declarations inside the class body, so
the next step is to resolve them.

^code resolve-methods (1 before, 1 after)

<aside name="local">

Storing the function type in a local variable is pointless right now, but we'll
expand this code before too long and it will make more sense.

</aside>

We iterate through the methods in the class body and call the
`resolveFunction()` method we wrote for handling function declarations already.
The only difference is that we pass in a new FunctionType enum value.

^code function-type-method (1 before, 1 after)

That's going to be important when we resolve `this` expressions. For now, don't
worry about it. The interesting stuff is in the interpreter.

^code interpret-methods (1 before, 1 after)

When we interpret a class declaration statement, we turn the syntactic
representation of the class -- its AST node -- into its runtime representation.
Now, we need to do that for the methods contained in the class as well. Each
method declaration blossoms into a LoxFunction object.

We take all of those and wrap them up into a map, keyed by the method names.
That gets stored in LoxClass.

^code lox-class-methods (1 before, 3 after)

Where an instance stores state, the class stores behavior. LoxInstance has its
map of fields, and LoxClass gets a map of methods. Even though methods are
owned by the class, they are still accessed through instances of that class.

^code lox-instance-get-method (5 before, 2 after)

When looking up a property on an instance, if we don't <span
name="shadow">find</span> a matching field, we look for a method with that name
on the instance's class. If found, we return that. This is where the distinction
between "field" and "property" becomes meaningful. When accessing a property,
you might get a field -- a bit of state stored on the instance -- or you could
hit a method defined on the instance's class.

The method is looked up using this:

<aside name="shadow">

Looking for a field first implies that fields shadow methods, a subtle but
important semantic point.

</aside>

^code lox-class-find-method

You can probably guess this method is going to get more interesting later. For
now, a simple map lookup on the class's method table is enough to get us
started. Give it a try:

<span name="crunch"></span>

```lox
class Bacon {
  eat() {
    print "Crunch crunch crunch!";
  }
}

Bacon().eat(); // Prints "Crunch crunch crunch!".
```

<aside name="crunch">

Apologies if you prefer chewy bacon over crunchy. Feel free to adjust the script
to your taste.

</aside>

## This

We can define both behavior and state on objects, but they aren't tied together
yet. Inside a method, we have no way to access the fields of the "current"
object -- the instance that the method was called on -- nor can we call other
methods on that same object.

To get at that instance, it needs a <span name="i">name</span>. Smalltalk,
Ruby, and Swift use "self". Simula, C++, Java, and others use "this". Python
uses "self" by convention, but you can technically call it whatever you like.

<aside name="i">

"I" would have been a great choice, but using "i" for loop variables predates
OOP and goes all the way back to Fortran. We are victims of the incidental
choices of our forebears.

</aside>

For Lox, since we generally hew to Java-ish style, we'll go with "this". Inside
a method body, a `this` expression evaluates to the instance that the method was
called on. Or, more specifically, since methods are accessed and then invoked as
two steps, it will refer to the object that the method was *accessed* from.

That makes our job harder. Peep at:

```lox
class Egotist {
  speak() {
    print this;
  }
}

var method = Egotist().speak;
method();
```

On the second-to-last line, we grab a reference to the `speak()` method off an
instance of the class. That returns a function, and that function needs to
remember the instance it was pulled off of so that *later*, on the last line, it
can still find it when the function is called.

We need to take `this` at the point that the method is accessed and attach it to
the function somehow so that it stays around as long as we need it to. Hmm... a
way to store some extra data that hangs around a function, eh? That sounds an
awful lot like a *closure*, doesn't it?

If we defined `this` as a sort of hidden variable in an environment that
surrounds the function returned when looking up a method, then uses of `this` in
the body would be able to find it later. LoxFunction already has the ability to
hold on to a surrounding environment, so we have the machinery we need.

Let's walk through an example to see how it works:

```lox
class Cake {
  taste() {
    var adjective = "delicious";
    print "The " + this.flavor + " cake is " + adjective + "!";
  }
}

var cake = Cake();
cake.flavor = "German chocolate";
cake.taste(); // Prints "The German chocolate cake is delicious!".
```

When we first evaluate the class definition, we create a LoxFunction for
`taste()`. Its closure is the environment surrounding the class, in this case
the global one. So the LoxFunction we store in the class's method map looks
like so:

<img src="image/classes/closure.png" alt="The initial closure for the method." />

When we evaluate the `cake.taste` get expression, we create a new environment
that binds `this` to the object the method is accessed from (here, `cake`). Then
we make a *new* LoxFunction with the same code as the original one but using
that new environment as its closure.

<img src="image/classes/bound-method.png" alt="The new closure that binds 'this'." />

This is the LoxFunction that gets returned when evaluating the get expression
for the method name. When that function is later called by a `()` expression,
we create an environment for the method body as usual.

<img src="image/classes/call.png" alt="Calling the bound method and creating a new environment for the method body." />

The parent of the body environment is the environment we created earlier to bind
`this` to the current object. Thus any use of `this` inside the body
successfully resolves to that instance.

Reusing our environment code for implementing `this` also takes care of
interesting cases where methods and functions interact, like:

```lox
class Thing {
  getCallback() {
    fun localFunction() {
      print this;
    }

    return localFunction;
  }
}

var callback = Thing().getCallback();
callback();
```

In, say, JavaScript, it's common to return a callback from inside a method. That
callback may want to hang on to and retain access to the original object -- the
`this` value -- that the method was associated with. Our existing support for
closures and environment chains should do all this correctly.

Let's code it up. The first step is adding <span name="this-ast">new
syntax</span> for `this`.

^code this-ast (1 before, 1 after)

<aside name="this-ast">

The generated code for the new node is in [Appendix II][appendix-this].

[appendix-this]: appendix-ii.html#this-expression

</aside>

Parsing is simple since it's a single token which our lexer already
recognizes as a reserved word.

^code parse-this (2 before, 2 after)

You can start to see how `this` works like a variable when we get to the
resolver.

^code resolver-visit-this

We resolve it exactly like any other local variable using "this" as the name for
the "variable". Of course, that's not going to work right now, because "this"
*isn't* declared in any scope. Let's fix that over in `visitClassStmt()`.

^code resolver-begin-this-scope (2 before, 1 after)

Before we step in and start resolving the method bodies, we push a new scope and
define "this" in it as if it were a variable. Then, when we're done, we discard
that surrounding scope.

^code resolver-end-this-scope (2 before, 1 after)

Now, whenever a `this` expression is encountered (at least inside a method) it
will resolve to a "local variable" defined in an implicit scope just outside of
the block for the method body.

The resolver has a new *scope* for `this`, so the interpreter needs to create a
corresponding *environment* for it. Remember, we always have to keep the
resolver's scope chains and the interpreter's linked environments in sync with
each other. At runtime, we create the environment after we find the method on
the instance. We replace the previous line of code that simply returned the
method's LoxFunction with this:

^code lox-instance-bind-method (1 before, 3 after)

Note the new call to `bind()`. That looks like so:

^code bind-instance

There isn't much to it. We create a new environment nestled inside the method's
original closure. Sort of a closure-within-a-closure. When the method is called,
that will become the parent of the method body's environment.

We declare "this" as a variable in that environment and bind it to the given
instance, the instance that the method is being accessed from. *Et voilà*, the
returned LoxFunction now carries around its own little persistent world where
"this" is bound to the object.

The remaining task is interpreting those `this` expressions. Similar to the
resolver, it is the same as interpreting a variable expression.

^code interpreter-visit-this

Go ahead and give it a try using that cake example from earlier. With less than
twenty lines of code, our interpreter handles `this` inside methods even in all
of the weird ways it can interact with nested classes, functions inside methods,
handles to methods, etc.

### Invalid uses of this

Wait a minute. What happens if you try to use `this` *outside* of a method? What
about:

```lox
print this;
```

Or:

```lox
fun notAMethod() {
  print this;
}
```

There is no instance for `this` to point to if you're not in a method. We could
give it some default value like `nil` or make it a runtime error, but the user
has clearly made a mistake. The sooner they find and fix that mistake, the
happier they'll be.

Our resolution pass is a fine place to detect this error statically. It already
detects `return` statements outside of functions. We'll do something similar for
`this`. In the vein of our existing FunctionType enum, we define a new ClassType
one.

^code class-type (1 before, 1 after)

Yes, it could be a Boolean. When we get to inheritance, it will get a third
value, hence the enum right now. We also add a corresponding field,
`currentClass`. Its value tells us if we are currently inside a class
declaration while traversing the syntax tree. It starts out `NONE` which means
we aren't in one.

When we begin to resolve a class declaration, we change that.

^code set-current-class (1 before, 1 after)

As with `currentFunction`, we store the previous value of the field in a local
variable. This lets us piggyback onto the JVM to keep a stack of `currentClass`
values. That way we don't lose track of the previous value if one class nests
inside another.

Once the methods have been resolved, we "pop" that stack by restoring the old
value.

^code restore-current-class (2 before, 1 after)

When we resolve a `this` expression, the `currentClass` field gives us the bit
of data we need to report an error if the expression doesn't occur nestled
inside a method body.

^code this-outside-of-class (1 before, 1 after)

That should help users use `this` correctly, and it saves us from having to
handle misuse at runtime in the interpreter.

## Constructors and Initializers

We can do almost everything with classes now, and as we near the end of the
chapter we find ourselves strangely focused on a beginning. Methods and fields
let us encapsulate state and behavior together so that an object always *stays*
in a valid configuration. But how do we ensure a brand new object *starts* in a
good state?

For that, we need constructors. I find them one of the trickiest parts of a
language to design, and if you peer closely at most other languages, you'll see
<span name="cracks">cracks</span> around object construction where the seams of
the design don't quite fit together perfectly. Maybe there's something
intrinsically messy about the moment of birth.

<aside name="cracks">

A few examples: In Java, even though final fields must be initialized, it is
still possible to read one *before* it has been. Exceptions -- a huge, complex
feature -- were added to C++ mainly as a way to emit errors from constructors.

</aside>

"Constructing" an object is actually a pair of operations:

1.  The runtime <span name="allocate">*allocates*</span> the memory required for
    a fresh instance. In most languages, this operation is at a fundamental
    level beneath what user code is able to access.

    <aside name="allocate">

    C++'s "[placement new][]" is a rare example where the bowels of allocation
    are laid bare for the programmer to prod.

    </aside>

2.  Then, a user-provided chunk of code is called which *initializes* the
    unformed object.

[placement new]: https://en.wikipedia.org/wiki/Placement_syntax

The latter is what we tend to think of when we hear "constructor", but the
language itself has usually done some groundwork for us before we get to that
point. In fact, our Lox interpreter already has that covered when it creates a
new LoxInstance object.

We'll do the remaining part -- user-defined initialization -- now. Languages
have a variety of notations for the chunk of code that sets up a new object for
a class. C++, Java, and C# use a method whose name matches the class name. Ruby
and Python call it `init()`. The latter is nice and short, so we'll do that.

In LoxClass's implementation of LoxCallable, we add a few more lines.

^code lox-class-call-initializer (2 before, 1 after)

When a class is called, after the LoxInstance is created, we look for an "init"
method. If we find one, we immediately bind and invoke it just like a normal
method call. The argument list is forwarded along.

That argument list means we also need to tweak how a class declares its arity.

^code lox-initializer-arity (1 before, 1 after)

If there is an initializer, that method's arity determines how many arguments
you must pass when you call the class itself. We don't *require* a class to
define an initializer, though, as a convenience. If you don't have an
initializer, the arity is still zero.

That's basically it. Since we bind the `init()` method before we call it, it has
access to `this` inside its body. That, along with the arguments passed to the
class, are all you need to be able to set up the new instance however you
desire.

### Invoking init() directly

As usual, exploring this new semantic territory rustles up a few weird
creatures. Consider:

```lox
class Foo {
  init() {
    print this;
  }
}

var foo = Foo();
print foo.init();
```

Can you "re-initialize" an object by directly calling its `init()` method? If
you do, what does it return? A <span name="compromise">reasonable</span> answer
would be `nil` since that's what it appears the body returns.

However -- and I generally dislike compromising to satisfy the
implementation -- it will make clox's implementation of constructors much
easier if we say that `init()` methods always return `this`, even when
directly called. In order to keep jlox compatible with that, we add a little
special case code in LoxFunction.

<aside name="compromise">

Maybe "dislike" is too strong a claim. It's reasonable to have the constraints
and resources of your implementation affect the design of the language. There
are only so many hours in the day, and if a cut corner here or there lets you get
more features to users in less time, it may very well be a net win for their
happiness and productivity. The trick is figuring out *which* corners to cut
that won't cause your users and future self to curse your shortsightedness.

</aside>

^code return-this (2 before, 1 after)

If the function is an initializer, we override the actual return value and
forcibly return `this`. That relies on a new `isInitializer` field.

^code is-initializer-field (2 before, 2 after)

We can't simply see if the name of the LoxFunction is "init" because the user
could have defined a *function* with that name. In that case, there *is* no
`this` to return. To avoid *that* weird edge case, we'll directly store whether
the LoxFunction represents an initializer method. That means we need to go back
and fix the few places where we create LoxFunctions.

^code construct-function (1 before, 1 after)

For actual function declarations, `isInitializer` is always false. For methods,
we check the name.

^code interpreter-method-initializer (1 before, 1 after)

And then in `bind()` where we create the closure that binds `this` to a method,
we pass along the original method's value.

^code lox-function-bind-with-initializer (1 before, 1 after)

### Returning from init()

We aren't out of the woods yet. We've been assuming that a user-written
initializer doesn't explicitly return a value because most constructors don't.
What should happen if a user tries:

```lox
class Foo {
  init() {
    return "something else";
  }
}
```

It's definitely not going to do what they want, so we may as well make it a
static error. Back in the resolver, we add another case to FunctionType.

^code function-type-initializer (1 before, 1 after)

We use the visited method's name to determine if we're resolving an initializer
or not.

^code resolver-initializer-type (1 before, 1 after)

When we later traverse into a `return` statement, we check that field and make
it an error to return a value from inside an `init()` method.

^code return-in-initializer (1 before, 1 after)

We're *still* not done. We statically disallow returning a *value* from an
initializer, but you can still use an empty early `return`.

```lox
class Foo {
  init() {
    return;
  }
}
```

That is actually kind of useful sometimes, so we don't want to disallow it
entirely. Instead, it should return `this` instead of `nil`. That's an easy fix
over in LoxFunction.

^code early-return-this (1 before, 1 after)

If we're in an initializer and execute a `return` statement, instead of
returning the value (which will always be `nil`), we again return `this`.

Phew! That was a whole list of tasks but our reward is that our little
interpreter has grown an entire programming paradigm. Classes, methods, fields,
`this`, and constructors. Our baby language is looking awfully grown-up.

<div class="challenges">

## Challenges

1.  We have methods on instances, but there is no way to define "static" methods
    that can be called directly on the class object itself. Add support for
    them. Use a `class` keyword preceding the method to indicate a static method
    that hangs off the class object.

    ```lox
    class Math {
      class square(n) {
        return n * n;
      }
    }

    print Math.square(3); // Prints "9".
    ```

    You can solve this however you like, but the "[metaclasses][]" used by
    Smalltalk and Ruby are a particularly elegant approach. *Hint: Make LoxClass
    extend LoxInstance and go from there.*

2.  Most modern languages support "getters" and "setters" -- members on a class
    that look like field reads and writes but that actually execute user-defined
    code. Extend Lox to support getter methods. These are declared without a
    parameter list. The body of the getter is executed when a property with that
    name is accessed.

    ```lox
    class Circle {
      init(radius) {
        this.radius = radius;
      }

      area {
        return 3.141592653 * this.radius * this.radius;
      }
    }

    var circle = Circle(4);
    print circle.area; // Prints roughly "50.2655".
    ```

3.  Python and JavaScript allow you to freely access an object's fields from
    outside of its own methods. Ruby and Smalltalk encapsulate instance state.
    Only methods on the class can access the raw fields, and it is up to the
    class to decide which state is exposed. Most statically typed languages
    offer modifiers like `private` and `public` to control which parts of a
    class are externally accessible on a per-member basis.

    What are the trade-offs between these approaches and why might a language
    prefer one or the other?

[metaclasses]: https://en.wikipedia.org/wiki/Metaclass

</div>

<div class="design-note">

## Design Note: Prototypes and Power

In this chapter, we introduced two new runtime entities, LoxClass and
LoxInstance. The former is where behavior for objects lives, and the latter is
for state. What if you could define methods right on a single object, inside
LoxInstance? In that case, we wouldn't need LoxClass at all. LoxInstance would
be a complete package for defining the behavior and state of an object.

We'd still want some way, without classes, to reuse behavior across multiple
instances. We could let a LoxInstance [*delegate*][delegate] directly to another
LoxInstance to reuse its fields and methods, sort of like inheritance.

Users would model their program as a constellation of objects, some of which
delegate to each other to reflect commonality. Objects used as delegates
represent "canonical" or "prototypical" objects that others refine. The result
is a simpler runtime with only a single internal construct, LoxInstance.

That's where the name **[prototypes][proto]** comes from for this paradigm. It
was invented by David Ungar and Randall Smith in a language called [Self][].
They came up with it by starting with Smalltalk and following the above mental
exercise to see how much they could pare it down.

Prototypes were an academic curiosity for a long time, a fascinating one that
generated interesting research but didn't make a dent in the larger world of
programming. That is, until Brendan Eich crammed prototypes into JavaScript,
which then promptly took over the world. Many (many) <span
name="words">words</span> have been written about prototypes in JavaScript.
Whether that shows that prototypes are brilliant or confusing -- or both! -- is
an open question.

<aside name="words">

Including [more than a handful][prototypes] by yours truly.

</aside>

I won't get into whether or not I think prototypes are a good idea for a
language. I've made languages that are [prototypal][finch] and
[class-based][wren], and my opinions of both are complex. What I want to discuss
is the role of *simplicity* in a language.

Prototypes are simpler than classes -- less code for the language implementer to
write, and fewer concepts for the user to learn and understand. Does that make
them better? We language nerds have a tendency to fetishize minimalism.
Personally, I think simplicity is only part of the equation. What we really want
to give the user is *power*, which I define as:

```text
power = breadth × ease ÷ complexity
```

None of these are precise numeric measures. I'm using math as analogy here, not
actual quantification.

*   **Breadth** is the range of different things the language lets you express.
    C has a lot of breadth -- it's been used for everything from operating
    systems to user applications to games. Domain-specific languages like
    AppleScript and Matlab have less breadth.

*   **Ease** is how little effort it takes to make the language do what you
    want. "Usability" might be another term, though it carries more baggage than
    I want to bring in. "Higher-level" languages tend to have more ease than
    "lower-level" ones. Most languages have a "grain" to them where some things
    feel easier to express than others.

*   **Complexity** is how big the language (including its runtime, core libraries,
    tools, ecosystem, etc.) is. People talk about how many pages are in a
    language's spec, or how many keywords it has. It's how much the user has to
    load into their wetware before they can be productive in the system. It is
    the antonym of simplicity.

[proto]: https://en.wikipedia.org/wiki/Prototype-based_programming

Reducing complexity *does* increase power. The smaller the denominator, the
larger the resulting value, so our intuition that simplicity is good is valid.
However, when reducing complexity, we must take care not to sacrifice breadth or
ease in the process, or the total power may go down. Java would be a strictly
*simpler* language if it removed strings, but it probably wouldn't handle text
manipulation tasks well, nor would it be as easy to get things done.

The art, then, is finding *accidental* complexity that can be omitted --
language features and interactions that don't carry their weight by increasing
the breadth or ease of using the language.

If users want to express their program in terms of categories of objects, then
baking classes into the language increases the ease of doing that, hopefully by
a large enough margin to pay for the added complexity. But if that isn't how
users are using your language, then by all means leave classes out.

</div>

[delegate]: https://en.wikipedia.org/wiki/Prototype-based_programming#Delegation
[prototypes]: http://gameprogrammingpatterns.com/prototype.html
[self]: http://www.selflanguage.org/
[finch]: http://finch.stuffwithstuff.com/
[wren]: http://wren.io/


================================================
FILE: book/closures.md
================================================
> As the man said, for every complex problem there's a simple solution, and it's
> wrong.
>
> <cite>Umberto Eco, <em>Foucault's Pendulum</em></cite>

Thanks to our diligent labor in [the last chapter][last], we have a virtual
machine with working functions. What it lacks is closures. Aside from global
variables, which are their own breed of animal, a function has no way to
reference a variable declared outside of its own body.

[last]: calls-and-functions.html

```lox
var x = "global";
fun outer() {
  var x = "outer";
  fun inner() {
    print x;
  }
  inner();
}
outer();
```

Run this example now and it prints "global". It's supposed to print "outer". To
fix this, we need to include the entire lexical scope of all surrounding
functions when resolving a variable.

This problem is harder in clox than it was in jlox because our bytecode VM
stores locals on a stack. We used a stack because I claimed locals have stack
semantics -- variables are discarded in the reverse order that they are created.
But with closures, that's only *mostly* true.

```lox
fun makeClosure() {
  var local = "local";
  fun closure() {
    print local;
  }
  return closure;
}

var closure = makeClosure();
closure();
```

The outer function `makeClosure()` declares a variable, `local`. It also creates
an inner function, `closure()` that captures that variable. Then `makeClosure()`
returns a reference to that function. Since the closure <span
name="flying">escapes</span> while holding on to the local variable, `local` must
outlive the function call where it was created.

<aside name="flying">

<img src="image/closures/flying.png" class="above" alt="A local variable flying away from the stack."/>

Oh no, it's escaping!

</aside>

We could solve this problem by dynamically allocating memory for all local
variables. That's what jlox does by putting everything in those Environment
objects that float around in Java's heap. But we don't want to. Using a <span
name="stack">stack</span> is *really* fast. Most local variables are *not*
captured by closures and do have stack semantics. It would suck to make all of
those slower for the benefit of the rare local that is captured.

<aside name="stack">

There is a reason that C and Java use the stack for their local variables, after
all.

</aside>

This means a more complex approach than we used in our Java interpreter. Because
some locals have very different lifetimes, we will have two implementation
strategies. For locals that aren't used in closures, we'll keep them just as
they are on the stack. When a local is captured by a closure, we'll adopt
another solution that lifts them onto the heap where they can live as long as
needed.

Closures have been around since the early Lisp days when bytes of memory and CPU
cycles were more precious than emeralds. Over the intervening decades, hackers
devised all <span name="lambda">manner</span> of ways to compile closures to
optimized runtime representations. Some are more efficient but require a more
complex compilation process than we could easily retrofit into clox.

<aside name="lambda">

Search for "closure conversion" or "lambda lifting" to start exploring.

</aside>

The technique I explain here comes from the design of the Lua VM. It is fast,
parsimonious with memory, and implemented with relatively little code. Even more
impressive, it fits naturally into the single-pass compilers clox and Lua both
use. It is somewhat intricate, though. It might take a while before all the
pieces click together in your mind. We'll build them one step at a time, and
I'll try to introduce the concepts in stages.

## Closure Objects

Our VM represents functions at runtime using ObjFunction. These objects are
created by the front end during compilation. At runtime, all the VM does is load
the function object from a constant table and bind it to a name. There is no
operation to "create" a function at runtime. Much like string and number <span
name="literal">literals</span>, they are constants instantiated purely at
compile time.

<aside name="literal">

In other words, a function declaration in Lox *is* a kind of literal -- a piece
of syntax that defines a constant value of a built-in type.

</aside>

That made sense because all of the data that composes a function is known at
compile time: the chunk of bytecode compiled from the function's body, and the
constants used in the body. Once we introduce closures, though, that
representation is no longer sufficient. Take a gander at:

```lox
fun makeClosure(value) {
  fun closure() {
    print value;
  }
  return closure;
}

var doughnut = makeClosure("doughnut");
var bagel = makeClosure("bagel");
doughnut();
bagel();
```

The `makeClosure()` function defines and returns a function. We call it twice
and get two closures back. They are created by the same nested function
declaration, `closure`, but close over different values. When we call the two
closures, each prints a different string. That implies we need some runtime
representation for a closure that captures the local variables surrounding the
function as they exist when the function declaration is *executed*, not just
when it is compiled.

We'll work our way up to capturing variables, but a good first step is defining
that object representation. Our existing ObjFunction type represents the <span
name="raw">"raw"</span> compile-time state of a function declaration, since all
closures created from a single declaration share the same code and constants. At
runtime, when we execute a function declaration, we wrap the ObjFunction in a
new ObjClosure structure. The latter has a reference to the underlying bare
function along with runtime state for the variables the function closes over.

<aside name="raw">

The Lua implementation refers to the raw function object containing the bytecode
as a "prototype", which is a great word to describe this, except that word also
gets overloaded to refer to [prototypal inheritance][].

[prototypal inheritance]: https://en.wikipedia.org/wiki/Prototype-based_programming

</aside>

<img src="image/closures/obj-closure.png" alt="An ObjClosure with a reference to an ObjFunction."/>

We'll wrap every function in an ObjClosure, even if the function doesn't
actually close over and capture any surrounding local variables. This is a
little wasteful, but it simplifies the VM because we can always assume that the
function we're calling is an ObjClosure. That new struct starts out like this:

^code obj-closure

Right now, it simply points to an ObjFunction and adds the necessary object
header stuff. Grinding through the usual ceremony for adding a new object type
to clox, we declare a C function to create a new closure.

^code new-closure-h (2 before, 1 after)

Then we implement it here:

^code new-closure

It takes a pointer to the ObjFunction it wraps. It also initializes the type
field to a new type.

^code obj-type-closure (1 before, 1 after)

And when we're done with a closure, we release its memory.

^code free-closure (1 before, 1 after)

We free only the ObjClosure itself, not the ObjFunction. That's because the
closure doesn't *own* the function. There may be multiple closures that all
reference the same function, and none of them claims any special privilege over
it. We can't free the ObjFunction until *all* objects referencing it are gone --
including even the surrounding function whose constant table contains it.
Tracking that sounds tricky, and it is! That's why we'll write a garbage
collector soon to manage it for us.

We also have the usual <span name="macro">macros</span> for checking a value's
type.

<aside name="macro">

Perhaps I should have defined a macro to make it easier to generate these
macros. Maybe that would be a little too meta.

</aside>

^code is-closure (2 before, 1 after)

And to cast a value:

^code as-closure (2 before, 1 after)

Closures are first-class objects, so you can print them.

^code print-closure (1 before, 1 after)

They display exactly as ObjFunction does. From the user's perspective, the
difference between ObjFunction and ObjClosure is purely a hidden implementation
detail. With that out of the way, we have a working but empty representation for
closures.

### Compiling to closure objects

We have closure objects, but our VM never creates them. The next step is getting
the compiler to emit instructions to tell the runtime when to create a new
ObjClosure to wrap a given ObjFunction. This happens right at the end of a
function declaration.

^code emit-closure (1 before, 1 after)

Before, the final bytecode for a function declaration was a single `OP_CONSTANT`
instruction to load the compiled function from the surrounding function's
constant table and push it onto the stack. Now we have a new instruction.

^code closure-op (1 before, 1 after)

Like `OP_CONSTANT`, it takes a single operand that represents a constant table
index for the function. But when we get over to the runtime implementation, we
do something more interesting.

First, let's be diligent VM hackers and slot in disassembler support for the
instruction.

^code disassemble-closure (2 before, 1 after)

There's more going on here than we usually have in the disassembler. By the end
of the chapter, you'll discover that `OP_CLOSURE` is quite an unusual
instruction. It's straightforward right now -- just a single byte operand -- but
we'll be adding to it. This code here anticipates that future.

### Interpreting function declarations

Most of the work we need to do is in the runtime. We have to handle the new
instruction, naturally. But we also need to touch every piece of code in the VM
that works with ObjFunction and change it to use ObjClosure instead -- function
calls, call frames, etc. We'll start with the instruction, though.

^code interpret-closure (1 before, 1 after)

Like the `OP_CONSTANT` instruction we used before, first we load the compiled
function from the constant table. The difference now is that we wrap that
function in a new ObjClosure and push the result onto the stack.

Once you have a closure, you'll eventually want to call it.

^code call-value-closure (1 before, 1 after)

We remove the code for calling objects whose type is `OBJ_FUNCTION`. Since we
wrap all functions in ObjClosures, the runtime will never try to invoke a bare
ObjFunction anymore. Those objects live only in constant tables and get
immediately <span name="naked">wrapped</span> in closures before anything else
sees them.

<aside name="naked">

We don't want any naked functions wandering around the VM! What would the
neighbors say?

</aside>

We replace the old code with very similar code for calling a closure instead.
The only difference is the type of object we pass to `call()`. The real changes
are over in that function. First, we update its signature.

^code call-signature (1 after)

Then, in the body, we need to fix everything that referenced the function to
handle the fact that we've introduced a layer of indirection. We start with the
arity checking:

^code check-arity (1 before, 1 after)

The only change is that we unwrap the closure to get to the underlying function.
The next thing `call()` does is create a new CallFrame. We change that code to
store the closure in the CallFrame and get the bytecode pointer from the
closure's function.

^code call-init-closure (1 before, 1 after)

This necessitates changing the declaration of CallFrame too.

^code call-frame-closure (1 before, 1 after)

That change triggers a few other cascading changes. Every place in the VM that
accessed CallFrame's function needs to use a closure instead. First, the macro
for reading a constant from the current function's constant table:

^code read-constant (2 before, 2 after)

When `DEBUG_TRACE_EXECUTION` is enabled, it needs to get to the chunk from the
closure.

^code disassemble-instruction (1 before, 1 after)

Likewise when reporting a runtime error:

^code runtime-error-function (1 before, 1 after)

Almost there. The last piece is the blob of code that sets up the very first
CallFrame to begin executing the top-level code for a Lox script.

^code interpret (1 before, 2 after)

<span name="pop">The</span> compiler still returns a raw ObjFunction when
compiling a script. That's fine, but it means we need to wrap it in an
ObjClosure here, before the VM can execute it.

<aside name="pop">

The code looks a little silly because we still push the original ObjFunction
onto the stack. Then we pop it after creating the closure, only to then push the
closure. Why put the ObjFunction on there at all? As usual, when you see weird
stack stuff going on, it's to keep the [forthcoming garbage collector][gc] aware
of some heap-allocated objects.

[gc]: garbage-collection.html

</aside>

We are back to a working interpreter. The *user* can't tell any difference, but
the compiler now generates code telling the VM to create a closure for each
function declaration. Every time the VM executes a function declaration, it
wraps the ObjFunction in a new ObjClosure. The rest of the VM now handles those
ObjClosures floating around. That's the boring stuff out of the way. Now we're
ready to make these closures actually *do* something.

## Upvalues

Our existing instructions for reading and writing local variables are limited to
a single function's stack window. Locals from a surrounding function are outside
of the inner function's window. We're going to need some new instructions.

The easiest approach might be an instruction that takes a relative stack slot
offset that can reach *before* the current function's window. That would work if
closed-over variables were always on the stack. But as we saw earlier, these
variables sometimes outlive the function where they are declared. That means
they won't always be on the stack.

The next easiest approach, then, would be to take any local variable that gets
closed over and have it always live on the heap. When the local variable
declaration in the surrounding function is executed, the VM would allocate
memory for it dynamically. That way it could live as long as needed.

This would be a fine approach if clox didn't have a single-pass compiler. But
that restriction we chose in our implementation makes things harder. Take a look
at this example:

```lox
fun outer() {
  var x = 1;    // (1)
  x = 2;        // (2)
  fun inner() { // (3)
    print x;
  }
  inner();
}
```

Here, the compiler compiles the declaration of `x` at `(1)` and emits code for
the assignment at `(2)`. It does that before reaching the declaration of
`inner()` at `(3)` and discovering that `x` is in fact closed over. We don't
have an easy way to go back and fix that already-emitted code to treat `x`
specially. Instead, we want a solution that allows a closed-over variable to
live on the stack exactly like a normal local variable *until the point that it
is closed over*.

Fortunately, thanks to the Lua dev team, we have a solution. We use a level of
indirection that they call an **upvalue**. An upvalue refers to a local variable
in an enclosing function. Every closure maintains an array of upvalues, one for
each surrounding local variable that the closure uses.

The upvalue points back into the stack to where the variable it captured lives.
When the closure needs to access a closed-over variable, it goes through the
corresponding upvalue to reach it. When a function declaration is first executed
and we create a closure for it, the VM creates the array of upvalues and wires
them up to "capture" the surrounding local variables that the closure needs.

For example, if we throw this program at clox,

```lox
{
  var a = 3;
  fun f() {
    print a;
  }
}
```

the compiler and runtime will conspire together to build up a set of objects in
memory like this:

<img src="image/closures/open-upvalue.png" alt="The object graph of the stack, ObjClosure, ObjFunction, and upvalue array."/>


That might look overwhelming, but fear not. We'll work our way through it. The
important part is that upvalues serve as the layer of indirection needed to
continue to find a captured local variable even after it moves off the stack.
But before we get to all that, let's focus on compiling captured variables.

### Compiling upvalues

As usual, we want to do as much work as possible during compilation to keep
execution simple and fast. Since local variables are lexically scoped in Lox, we
have enough knowledge at compile time to resolve which surrounding local
variables a function accesses and where those locals are declared. That, in
turn, means we know *how many* upvalues a closure needs, *which* variables they
capture, and *which stack slots* contain those variables in the declaring
function's stack window.

Currently, when the compiler resolves an identifier, it walks the block scopes
for the current function from innermost to outermost. If we don't find the
variable in that function, we assume the variable must be a global. We don't
consider the local scopes of enclosing functions -- they get skipped right over.
The first change, then, is inserting a resolution step for those outer local
scopes.

^code named-variable-upvalue (3 before, 1 after)

This new `resolveUpvalue()` function looks for a local variable declared in any
of the surrounding functions. If it finds one, it returns an "upvalue index" for
that variable. (We'll get into what that means later.) Otherwise, it returns -1
to indicate the variable wasn't found. If it was found, we use these two new
instructions for reading or writing to the variable through its upvalue:

^code upvalue-ops (1 before, 1 after)

We're implementing this sort of top-down, so I'll show you how these work at
runtime soon. The part to focus on now is how the compiler actually resolves the
identifier.

^code resolve-upvalue

We call this after failing to resolve a local variable in the current function's
scope, so we know the variable isn't in the current compiler. Recall that
Compiler stores a pointer to the Compiler for the enclosing function, and these
pointers form a linked chain that goes all the way to the root Compiler for the
top-level code. Thus, if the enclosing Compiler is `NULL`, we know we've reached
the outermost function without finding a local variable. The variable must be
<span name="undefined">global</span>, so we return -1.

<aside name="undefined">

It might end up being an entirely undefined variable and not even global. But in
Lox, we don't detect that error until runtime, so from the compiler's
perspective, it's "hopefully global".

</aside>

Otherwise, we try to resolve the identifier as a *local* variable in the
*enclosing* compiler. In other words, we look for it right outside the current
function. For example:

```lox
fun outer() {
  var x = 1;
  fun inner() {
    print x; // (1)
  }
  inner();
}
```

When compiling the identifier expression at `(1)`, `resolveUpvalue()` looks for
a local variable `x` declared in `outer()`. If found -- like it is in this
example -- then we've successfully resolved the variable. We create an upvalue
so that the inner function can access the variable through that. The upvalue is
created here:

^code add-upvalue

The compiler keeps an array of upvalue structures to track the closed-over
identifiers that it has resolved in the body of each function. Remember how the
compiler's Local array mirrors the stack slot indexes where locals live at
runtime? This new upvalue array works the same way. The indexes in the
compiler's array match the indexes where upvalues will live in the ObjClosure at
runtime.

This function adds a new upvalue to that array. It also keeps track of the
number of upvalues the function uses. It stores that count directly in the
ObjFunction itself because we'll also <span name="bridge">need</span> that
number for use at runtime.

<aside name="bridge">

Like constants and function arity, the upvalue count is another one of those
little pieces of data that form the bridge between the compiler and runtime.

</aside>

The `index` field tracks the closed-over local variable's slot index. That way
the compiler knows *which* variable in the enclosing function needs to be
captured. We'll circle back to what that `isLocal` field is for before too long.
Finally, `addUpvalue()` returns the index of the created upvalue in the
function's upvalue list. That index becomes the operand to the `OP_GET_UPVALUE`
and `OP_SET_UPVALUE` instructions.

That's the basic idea for resolving upvalues, but the function isn't fully
baked. A closure may reference the same variable in a surrounding function
multiple times. In that case, we don't want to waste time and memory creating a
separate upvalue for each identifier expression. To fix that, before we add a
new upvalue, we first check to see if the function already has an upvalue that
closes over that variable.

^code existing-upvalue (1 before, 1 after)

If we find an upvalue in the array whose slot index matches the one we're
adding, we just return that *upvalue* index and reuse it. Otherwise, we fall
through and add the new upvalue.

These two functions access and modify a bunch of new state, so let's define
that. First, we add the upvalue count to ObjFunction.

^code upvalue-count (1 before, 1 after)

We're conscientious C programmers, so we zero-initialize that when an
ObjFunction is first allocated.

^code init-upvalue-count (1 before, 1 after)

In the compiler, we add a field for the upvalue array.

^code upvalues-array (1 before, 1 after)

For simplicity, I gave it a fixed size. The `OP_GET_UPVALUE` and
`OP_SET_UPVALUE` instructions encode an upvalue index using a single byte
operand, so there's a restriction on how many upvalues a function can have --
how many unique variables it can close over. Given that, we can afford a static
array that large. We also need to make sure the compiler doesn't overflow that
limit.

^code too-many-upvalues (5 before, 1 after)

Finally, the Upvalue struct type itself.

^code upvalue-struct

The `index` field stores which local slot the upvalue is capturing. The
`isLocal` field deserves its own section, which we'll get to next.

### Flattening upvalues

In the example I showed before, the closure is accessing a variable declared in
the immediately enclosing function. Lox also supports accessing local variables
declared in *any* enclosing scope, as in:

```lox
fun outer() {
  var x = 1;
  fun middle() {
    fun inner() {
      print x;
    }
  }
}
```

Here, we're accessing `x` in `inner()`. That variable is defined not in
`middle()`, but all the way out in `outer()`. We need to handle cases like this
too. You *might* think that this isn't much harder since the variable will
simply be somewhere farther down on the stack. But consider this <span
name="devious">devious</span> example:

<aside name="devious">

If you work on programming languages long enough, you will develop a
finely honed skill at creating bizarre programs like this that are technically
valid but likely to trip up an implementation written by someone with a less
perverse imagination than you.

</aside>

```lox
fun outer() {
  var x = "value";
  fun middle() {
    fun inner() {
      print x;
    }

    print "create inner closure";
    return inner;
  }

  print "return from outer";
  return middle;
}

var mid = outer();
var in = mid();
in();
```

When you run this, it should print:

```text
return from outer
create inner closure
value
```

I know, it's convoluted. The important part is that `outer()` -- where `x` is
declared -- returns and pops all of its variables off the stack before the
*declaration* of `inner()` executes. So, at the point in time that we create the
closure for `inner()`, `x` is already off the stack.

Here, I traced out the execution flow for you:

<img src="image/closures/execution-flow.png" alt="Tracing through the previous example program."/>

See how `x` is popped &#9312; before it is captured &#9313; and then later
accessed &#9314;? We really have two problems:

1.  We need to resolve local variables that are declared in surrounding
    functions beyond the immediately enclosing one.

2.  We need to be able to capture variables that have already left the stack.

Fortunately, we're in the middle of adding upvalues to the VM, and upvalues are
explicitly designed for tracking variables that have escaped the stack. So, in a
clever bit of self-reference, we can use upvalues to allow upvalues to capture
variables declared outside of the immediately surrounding function.

The solution is to allow a closure to capture either a local variable or *an
existing upvalue* in the immediately enclosing function. If a deeply nested
function references a local variable declared several hops away, we'll thread it
through all of the intermediate functions by having each function capture an
upvalue for the next function to grab.

<img src="image/closures/linked-upvalues.png" alt="An upvalue in inner() points to an upvalue in middle(), which points to a local variable in outer()."/>

In the above example, `middle()` captures the local variable `x` in the
immediately enclosing function `outer()` and stores it in its own upvalue. It
does this even though `middle()` itself doesn't reference `x`. Then, when the
declaration of `inner()` executes, its closure grabs the *upvalue* from the
ObjClosure for `middle()` that captured `x`. A function captures -- either a
local or upvalue -- *only* from the immediately surrounding function, which is
guaranteed to still be around at the point that the inner function declaration
executes.

In order to implement this, `resolveUpvalue()` becomes recursive.

^code resolve-upvalue-recurse (4 before, 1 after)

It's only another three lines of code, but I found this function really
challenging to get right the first time. This in spite of the fact that I wasn't
inventing anything new, just porting the concept over from Lua. Most recursive
functions either do all their work before the recursive call (a **pre-order
traversal**, or "on the way down"), or they do all the work after the recursive
call (a **post-order traversal**, or "on the way back up"). This function does
both. The recursive call is right in the middle.

We'll walk through it slowly. First, we look for a matching local variable in
the enclosing function. If we find one, we capture that local and return. That's
the <span name="base">base</span> case.

<aside name="base">

The other base case, of course, is if there is no enclosing function. In that
case, the variable can't be resolved lexically and is treated as global.

</aside>

Otherwise, we look for a local variable beyond the immediately enclosing
function. We do that by recursively calling `resolveUpvalue()` on the
*enclosing* compiler, not the current one. This series of `resolveUpvalue()`
calls works its way along the chain of nested compilers until it hits one of
the base cases -- either it finds an actual local variable to capture or it
runs out of compilers.

When a local variable is found, the most deeply <span name="outer">nested</span>
call to `resolveUpvalue()` captures it and returns the upvalue index. That
returns to the next call for the inner function declaration. That call captures
the *upvalue* from the surrounding function, and so on. As each nested call to
`resolveUpvalue()` returns, we drill back down into the innermost function
declaration where the identifier we are resolving appears. At each step along
the way, we add an upvalue to the intervening function and pass the resulting
upvalue index down to the next call.

<aside name="outer">

Each recursive call to `resolveUpvalue()` walks *out* one level of function
nesting. So an inner *recursive call* refers to an *outer* nested declaration.
The innermost recursive call to `resolveUpvalue()` that finds the local variable
will be for the *outermost* function, just inside the enclosing function where
that variable is actually declared.

</aside>

It might help to walk through the original example when resolving `x`:

<img src="image/closures/recursion.png" alt="Tracing through a recursive call to resolveUpvalue()."/>

Note that the new call to `addUpvalue()` passes `false` for the `isLocal`
parameter. Now you see that that flag controls whether the closure captures a
local variable or an upvalue from the surrounding function.

By the time the compiler reaches the end of a function declaration, every
variable reference has been resolved as either a local, an upvalue, or a global.
Each upvalue may in turn capture a local variable from the surrounding function,
or an upvalue in the case of transitive closures. We finally have enough data to
emit bytecode which creates a closure at runtime that captures all of the
correct variables.

^code capture-upvalues (1 before, 1 after)

The `OP_CLOSURE` instruction is unique in that it has a variably sized encoding.
For each upvalue the closure captures, there are two single-byte operands. Each
pair of operands specifies what that upvalue captures. If the first byte is one,
it captures a local variable in the enclosing function. If zero, it captures one
of the function's upvalues. The next byte is the local slot or upvalue index to
capture.

This odd encoding means we need some bespoke support in the disassembly code
for `OP_CLOSURE`.

^code disassemble-upvalues (1 before, 1 after)

For example, take this script:

```lox
fun outer() {
  var a = 1;
  var b = 2;
  fun middle() {
    var c = 3;
    var d = 4;
    fun inner() {
      print a + c + b + d;
    }
  }
}
```

If we disassemble the instruction that creates the closure for `inner()`, it
prints this:

```text
0004    9 OP_CLOSURE          2 <fn inner>
0006      |                     upvalue 0
0008      |                     local 1
0010      |                     upvalue 1
0012      |                     local 2
```

We have two other, simpler instructions to add disassembler support for.

^code disassemble-upvalue-ops (2 before, 1 after)

These both have a single-byte operand, so there's nothing exciting going on. We
do need to add an include so the debug module can get to `AS_FUNCTION()`.

^code debug-include-object (1 before, 1 after)

With that, our compiler is where we want it. For each function declaration, it
outputs an `OP_CLOSURE` instruction followed by a series of operand byte pairs
for each upvalue it needs to capture at runtime. It's time to hop over to that
side of the VM and get things running.

## Upvalue Objects

Each `OP_CLOSURE` instruction is now followed by the series of bytes that
specify the upvalues the ObjClosure should own. Before we process those
operands, we need a runtime representation for upvalues.

^code obj-upvalue

We know upvalues must manage closed-over variables that no longer live on the
stack, which implies some amount of dynamic allocation. The easiest way to do
that in our VM is by building on the object system we already have. That way,
when we implement a garbage collector in [the next chapter][gc], the GC can
manage memory for upvalues too.

[gc]: garbage-collection.html

Thus, our runtime upvalue structure is an ObjUpvalue with the typical Obj header
field. Following that is a `location` field that points to the closed-over
variable. Note that this is a *pointer* to a Value, not a Value itself. It's a
reference to a *variable*, not a *value*. This is important because it means
that when we assign to the variable the upvalue captures, we're assigning to the
actual variable, not a copy. For example:

```lox
fun outer() {
  var x = "before";
  fun inner() {
    x = "assigned";
  }
  inner();
  print x;
}
outer();
```

This program should print "assigned" even though the closure assigns to `x` and
the surrounding function accesses it.

Because upvalues are objects, we've got all the usual object machinery, starting
with a constructor-like function:

^code new-upvalue-h (1 before, 1 after)

It takes the address of the slot where the closed-over variable lives. Here is
the implementation:

^code new-upvalue

We simply initialize the object and store the pointer. That requires a new
object type.

^code obj-type-upvalue (1 before, 1 after)

And on the back side, a destructor-like function:

^code free-upvalue (3 before, 1 after)

Multiple closures can close over the same variable, so ObjUpvalue does not own
the variable it references. Thus, the only thing to free is the ObjUpvalue
itself.

And, finally, to print:

^code print-upvalue (3 before, 1 after)

Printing isn't useful to end users. Upvalues are objects only so that we can
take advantage of the VM's memory management. They aren't first-class values
that a Lox user can directly access in a program. So this code will never
actually execute... but it keeps the compiler from yelling at us about an
unhandled switch case, so here we are.

### Upvalues in closures

When I first introduced upvalues, I said each closure has an array of them.
We've finally worked our way back to implementing that.

^code upvalue-fields (1 before, 1 after)

<span name="count">Different</span> closures may have different numbers of
upvalues, so we need a dynamic array. The upvalues themselves are dynamically
allocated too, so we end up with a double pointer -- a pointer to a dynamically
allocated array of pointers to upvalues. We also store the number of elements in
the array.

<aside name="count">

Storing the upvalue count in the closure is redundant because the ObjFunction
that the ObjClosure references also keeps that count. As usual, this weird code
is to appease the GC. The collector may need to know an ObjClosure's upvalue
array size after the closure's corresponding ObjFunction has already been freed.

</aside>

When we create an ObjClosure, we allocate an upvalue array of the proper size,
which we determined at compile time and stored in the ObjFunction.

^code allocate-upvalue-array (1 before, 1 after)

Before creating the closure object itself, we allocate the array of upvalues and
initialize them all to `NULL`. This weird ceremony around memory is a careful
dance to please the (forthcoming) garbage collection deities. It ensures the
memory manager never sees uninitialized memory.

Then we store the array in the new closure, as well as copy the count over from
the ObjFunction.

^code init-upvalue-fields (1 before, 1 after)

When we free an ObjClosure, we also free the upvalue array.

^code free-upvalues (1 before, 1 after)

ObjClosure does not own the ObjUpvalue objects themselves, but it does own *the
array* containing pointers to those upvalues.

We fill the upvalue array over in the interpreter when it creates a closure.
This is where we walk through all of the operands after `OP_CLOSURE` to see what
kind of upvalue each slot captures.

^code interpret-capture-upvalues (1 before, 1 after)

This code is the magic moment when a closure comes to life. We iterate over each
upvalue the closure expects. For each one, we read a pair of operand bytes. If
the upvalue closes over a local variable in the enclosing function, we let
`captureUpvalue()` do the work.

Otherwise, we capture an upvalue from the surrounding function. An `OP_CLOSURE`
instruction is emitted at the end of a function declaration. At the moment that
we are executing that declaration, the *current* function is the surrounding
one. That means the current function's closure is stored in the CallFrame at the
top of the callstack. So, to grab an upvalue from the enclosing function, we can
read it right from the `frame` local variable, which caches a reference to that
CallFrame.

Closing over a local variable is more interesting. Most of the work happens in a
separate function, but first we calculate the argument to pass to it. We need to
grab a pointer to the captured local's slot in the surrounding function's stack
window. That window begins at `frame->slots`, which points to slot zero. Adding
`index` offsets that to the local slot we want to capture. We pass that pointer
here:

^code capture-upvalue

This seems a little silly. All it does is create a new ObjUpvalue that captures
the given stack slot and returns it. Did we need a separate function for this?
Well, no, not *yet*. But you know we are going to end up sticking more code in
here.

First, let's wrap up what we're working on. Back in the interpreter code for
handling `OP_CLOSURE`, we eventually finish iterating through the upvalue
array and initialize each one. When that completes, we have a new closure with
an array full of upvalues pointing to variables.

With that in hand, we can implement the instructions that work with those
upvalues.

^code interpret-get-upvalue (1 before, 1 after)

The operand is the index into the current function's upvalue array. So we simply
look up the corresponding upvalue and dereference its location pointer to read
the value in that slot. Setting a variable is similar.

^code interpret-set-upvalue (1 before, 1 after)

We <span name="assign">take</span> the value on top of the stack and store it
into the slot pointed to by the chosen upvalue. Just as with the instructions
for local variables, it's important that these instructions are fast. User
programs are constantly reading and writing variables, so if that's slow,
everything is slow. And, as usual, the way we make them fast is by keeping them
simple. These two new instructions are pretty good: no control flow, no complex
arithmetic, just a couple of pointer indirections and a `push()`.

<aside name="assign">

The set instruction doesn't *pop* the value from the stack because, remember,
assignment is an expression in Lox. So the result of the assignment -- the
assigned value -- needs to remain on the stack for the surrounding expression.

</aside>

This is a milestone. As long as all of the variables remain on the stack, we
have working closures. Try this:

```lox
fun outer() {
  var x = "outside";
  fun inner() {
    print x;
  }
  inner();
}
outer();
```

Run this, and it correctly prints "outside".

## Closed Upvalues

Of course, a key feature of closures is that they hold on to the variable as
long as needed, even after the function that declares the variable has returned.
Here's another example that *should* work:

```lox
fun outer() {
  var x = "outside";
  fun inner() {
    print x;
  }

  return inner;
}

var closure = outer();
closure();
```

But if you run it right now... who knows what it does? At runtime, it will end
up reading from a stack slot that no longer contains the closed-over variable.
Like I've mentioned a few times, the crux of the issue is that variables in
closures don't have stack semantics. That means we've got to hoist them off the
stack when the function where they were declared returns. This final section of
the chapter does that.

### Values and variables

Before we get to writing code, I want to dig into an important semantic point.
Does a closure close over a *value* or a *variable?* This isn't purely an <span
name="academic">academic</span> question. I'm not just splitting hairs.
Consider:

<aside name="academic">

If Lox didn't allow assignment, it *would* be an academic question.

</aside>

```lox
var globalSet;
var globalGet;

fun main() {
  var a = "initial";

  fun set() { a = "updated"; }
  fun get() { print a; }

  globalSet = set;
  globalGet = get;
}

main();
globalSet();
globalGet();
```

The outer `main()` function creates two closures and stores them in <span
name="global">global</span> variables so that they outlive the execution of
`main()` itself. Both of those closures capture the same variable. The first
closure assigns a new value to it and the second closure reads the variable.

<aside name="global">

The fact that I'm using a couple of global variables isn't significant. I needed
some way to return two values from a function, and without any kind of
collection type in Lox, my options were limited.

</aside>

What does the call to `globalGet()` print? If closures capture *values* then
each closure gets its own copy of `a` with the value that `a` had at the point
in time that the closure's function declaration executed. The call to
`globalSet()` will modify `set()`'s copy of `a`, but `get()`'s copy will be
unaffected. Thus, the call to `globalGet()` will print "initial".

If closures close over variables, then `get()` and `set()` will both capture --
reference -- the *same mutable variable*. When `set()` changes `a`, it changes
the same `a` that `get()` reads from. There is only one `a`. That, in turn,
implies the call to `globalGet()` will print "updated".

Which is it? The answer for Lox and most other languages I know with closures is
the latter. Closures capture variables. You can think of them as capturing *the
place the value lives*. This is important to keep in mind as we deal with
closed-over variables that are no longer on the stack. When a variable moves to
the heap, we need to ensure that all closures capturing that variable retain a
reference to its *one* new location. That way, when the variable is mutated, all
closures see the change.

### Closing upvalues

We know that local variables always start out on the stack. This is faster, and
lets our single-pass compiler emit code before it discovers the variable has
been captured. We also know that closed-over variables need to move to the heap
if the closure outlives the function where the captured variable is declared.

Following Lua, we'll use **open upvalue** to refer to an upvalue that points to
a local variable still on the stack. When a variable moves to the heap, we are
*closing* the upvalue and the result is, naturally, a **closed upvalue**. The
two questions we need to answer are:

1.  Where on the heap does the closed-over variable go?

2.  When do we close the upvalue?

The answer to the first question is easy. We already have a convenient object on
the heap that represents a reference to a variable -- ObjUpvalue itself. The
closed-over variable will move into a new field right inside the ObjUpvalue
struct. That way we don't need to do any additional heap allocation to close an
upvalue.

The second question is straightforward too. As long as the variable is on the
stack, there may be code that refers to it there, and that code must work
correctly. So the logical time to hoist the variable to the heap is as late as
possible. If we move the local variable right when it goes out of scope, we are
certain that no code after that point will try to access it from the stack.
<span name="after">After</span> the variable is out of scope, the compiler will
have reported an error if any code tried to use it.

<aside name="after">

By "after" here, I mean in the lexical or textual sense -- code past the `}`
for the block containing the declaration of the closed-over variable.

</aside>

The compiler already emits an `OP_POP` instruction when a local variable goes
out of scope. If a variable is captured by a closure, we will instead emit a
different instruction to hoist that variable out of the stack and into its
corresponding upvalue. To do that, the compiler needs to know which <span
name="param">locals</span> are closed over.

<aside name="param">

The compiler doesn't pop parameters and locals declared immediately inside the
body of a function. We'll handle those too, in the runtime.

</aside>

The compiler already maintains an array of Upvalue structs for each local
variable in the function to track exactly that state. That array is good for
answering "Which variables does this closure use?" But it's poorly suited for
answering, "Does *any* function capture this local variable?" In particular,
once the Compiler for some closure has finished, the Compiler for the enclosing
function whose variable has been captured no longer has access to any of the
upvalue state.

In other words, the compiler maintains pointers from upvalues to the locals they
capture, but not in the other direction. So we first need to add some extra
tracking inside the existing Local struct so that we can tell if a given local
is captured by a closure.

^code is-captured-field (1 before, 1 after)

This field is `true` if the local is captured by any later nested function
declaration. Initially, all locals are not captured.

^code init-is-captured (1 before, 1 after)

<span name="zero">Likewise</span>, the special "slot zero local" that the
compiler implicitly declares is not captured.

<aside name="zero">

Later in the book, it *will* become possible for a user to capture this
variable. Just building some anticipation here.

</aside>

^code init-zero-local-is-captured (1 before, 1 after)

When resolving an identifier, if we end up creating an upvalue for a local
variable, we mark it as captured.

^code mark-local-captured (1 before, 1 after)

Now, at the end of a block scope when the compiler emits code to free the stack
slots for the locals, we can tell which ones need to get hoisted onto the heap.
We'll use a new instruction for that.

^code end-scope (3 before, 2 after)

The instruction requires no operand. We know that the variable will always be
right on top of the stack at the point that this instruction executes. We
declare the instruction.

^code close-upvalue-op (1 before, 1 after)

And add trivial disassembler support for it:

^code disassemble-close-upvalue (1 before, 1 after)

Excellent. Now the generated bytecode tells the runtime exactly when each
captured local variable must move to the heap. Better, it does so only for the
locals that *are* used by a closure and need this special treatment. This aligns
with our general performance goal that we want users to pay only for
functionality that they use. Variables that aren't used by closures live and die
entirely on the stack just as they did before.

### Tracking open upvalues

Let's move over to the runtime side. Before we can interpret `OP_CLOSE_UPVALUE`
instructions, we have an issue to resolve. Earlier, when I talked about whether
closures capture variables or values, I said it was important that if multiple
closures access the same variable that they end up with a reference to the
exact same storage location in memory. That way if one closure writes to the
variable, the other closure sees the change.

Right now, if two closures capture the same <span name="indirect">local</span>
variable, the VM creates a separate Upvalue for each one. The necessary sharing
is missing. When we move the variable off the stack, if we move it into only one
of the upvalues, the other upvalue will have an orphaned value.

<aside name="indirect">

The VM *does* share upvalues if one closure captures an *upvalue* from a
surrounding function. The nested case works correctly. But if two *sibling*
closures capture the same local variable, they each create a separate
ObjUpvalue.

</aside>

To fix that, whenever the VM needs an upvalue that captures a particular local
variable slot, we will first search for an existing upvalue pointing to that
slot. If found, we reuse that. The challenge is that all of the previously
created upvalues are squirreled away inside the upvalue arrays of the various
closures. Those closures could be anywhere in the VM's memory.

The first step is to give the VM its own list of all open upvalues that point to
variables still on the stack. Searching a list each time the VM needs an upvalue
sounds like it might be slow, but in practice, it's not bad. The number of
variables on the stack that actually get closed over tends to be small. And
function declarations that <span name="create">create</span> closures are rarely
on performance critical execution paths in the user's program.

<aside name="create">

Closures are frequently *invoked* inside hot loops. Think about the closures
passed to typical higher-order functions on collections like [`map()`][map] and
[`filter()`][filter]. That should be fast. But the function declaration that
*creates* the closure happens only once and is usually outside of the loop.

[map]: https://en.wikipedia.org/wiki/Map_(higher-order_function)
[filter]: https://en.wikipedia.org/wiki/Filter_(higher-order_function)

</aside>

Even better, we can order the list of open upvalues by the stack slot index they
point to. The common case is that a slot has *not* already been captured --
sharing variables between closures is uncommon -- and closures tend to capture
locals near the top of the stack. If we store the open upvalue array in stack
slot order, as soon as we step past the slot where the local we're capturing
lives, we know it won't be found. When that local is near the top of the stack,
we can exit the loop pretty early.

Maintaining a sorted list requires inserting elements in the middle efficiently.
That suggests using a linked list instead of a dynamic array. Since we defined
the ObjUpvalue struct ourselves, the easiest implementation is an intrusive list
that puts the next pointer right inside the ObjUpvalue struct itself.

^code next-field (1 before, 1 after)

When we allocate an upvalue, it is not attached to any list yet so the link is
`NULL`.

^code init-next (1 before, 1 after)

The VM owns the list, so the head pointer goes right inside the main VM struct.

^code open-upvalues-field (1 before, 1 after)

The list starts out empty.

^code init-open-upvalues (1 before, 1 after)

Starting with the first upvalue pointed to by the VM, each open upvalue points
to the next open upvalue that references a local variable farther down the
stack. This script, for example,

```lox
{
  var a = 1;
  fun f() {
    print a;
  }
  var b = 2;
  fun g() {
    print b;
  }
  var c = 3;
  fun h() {
    print c;
  }
}
```

should produce a series of linked upvalues like so:

<img src="image/closures/linked-list.png" alt="Three upvalues in a linked list."/>

Whenever we close over a local variable, before creating a new upvalue, we look
for an existing one in the list.

^code look-for-existing-upvalue (1 before, 1 after)

We start at the <span name="head">head</span> of the list, which is the upvalue
closest to the top of the stack. We walk through the list, using a little
pointer comparison to iterate past every upvalue pointing to slots above the one
we're looking for. While we do that, we keep track of the preceding upvalue on
the list. We'll need to update that node's `next` pointer if we end up inserting
a node after it.

<aside name="head">

It's a singly linked list. It's not like we have any other choice than to start
at the head and go forward from there.

</aside>

There are three reasons we can exit the loop:

1.  **The local slot we stopped at *is* the slot we're looking for.** We found
    an existing upvalue capturing the variable, so we reuse that upvalue.

2.  **We ran out of upvalues to search.** When `upvalue` is `NULL`, it means
    every open upvalue in the list points to locals above the slot we're looking
    for, or (more likely) the upvalue list is empty. Either way, we didn't find
    an upvalue for our slot.

3.  **We found an upvalue whose local slot is *below* the one we're looking
    for.** Since the list is sorted, that means we've gone past the slot we are
    closing over, and thus there must not be an existing upvalue for it.

In the first case, we're done and we've returned. Otherwise, we create a new
upvalue for our local slot and insert it into the list at the right location.

^code insert-upvalue-in-list (1 before, 1 after)

The current incarnation of this function already creates the upvalue, so we only
need to add code to insert the upvalue into the list. We exited the list
traversal by either going past the end of the list, or by stopping on the first
upvalue whose stack slot is below the one we're looking for. In either case,
that means we need to insert the new upvalue *before* the object pointed at by
`upvalue` (which may be `NULL` if we hit the end of the list).

As you may have learned in Data Structures 101, to insert a node into a linked
list, you set the `next` pointer of the previous node to point to your new one.
We have been conveniently keeping track of that preceding node as we walked the
list. We also need to handle the <span name="double">special</span> case where
we are inserting a new upvalue at the head of the list, in which case the "next"
pointer is the VM's head pointer.

<aside name="double">

There is a shorter implementation that handles updating either the head pointer
or the previous upvalue's `next` pointer uniformly by using a pointer to a
pointer, but that kind of code confuses almost everyone who hasn't reached some
Zen master level of pointer expertise. I went with the basic `if` statement
approach.

</aside>

With this updated function, the VM now ensures that there is only ever a single
ObjUpvalue for any given local slot. If two closures capture the same variable,
they will get the same upvalue. We're ready to move those upvalues off the
stack now.

### Closing upvalues at runtime

The compiler helpfully emits an `OP_CLOSE_UPVALUE` instruction to tell the VM
exactly when a local variable should be hoisted onto the heap. Executing that
instruction is the interpreter's responsibility.

^code interpret-close-upvalue (1 before, 1 after)

When we reach the instruction, the variable we are hoisting is right on top of
the stack. We call a helper function, passing the address of that stack slot.
That function is responsible for closing the upvalue and moving the local from
the stack to the heap. After that, the VM is free to discard the stack slot,
which it does by calling `pop()`.

The fun stuff happens here:

^code close-upvalues

This function takes a pointer to a stack slot. It closes every open upvalue it
can find that points to that slot or any slot above it on the stack. Right now,
we pass a pointer only to the top slot on the stack, so the "or above it" part
doesn't come into play, but it will soon.

To do this, we walk the VM's list of open upvalues, again from top to bottom. If
an upvalue's location points into the range of slots we're closing, we close the
upvalue. Otherwise, once we reach an upvalue outside of the range, we know the
rest will be too, so we stop iterating.

The way an upvalue gets closed is pretty <span name="cool">cool</span>. First,
we copy the variable's value into the `closed` field in the ObjUpvalue. That's
where closed-over variables live on the heap. The `OP_GET_UPVALUE` and
`OP_SET_UPVALUE` instructions need to look for the variable there after it's
been moved. We could add some conditional logic in the interpreter code for
those instructions to check some flag for whether the upvalue is open or closed.

But there is already a level of indirection in play -- those instructions
dereference the `location` pointer to get to the variable's value. When the
variable moves from the stack to the `closed` field, we simply update that
`location` to the address of the ObjUpvalue's *own* `closed` field.

<aside name="cool">

I'm not praising myself here. This is all the Lua dev team's innovation.

</aside>

<img src="image/closures/closing.png" alt="Moving a value from the stack to the upvalue's 'closed' field and then pointing the 'value' field to it."/>

We don't need to change how `OP_GET_UPVALUE` and `OP_SET_UPVALUE` are
interpreted at all. That keeps them simple, which in turn keeps them fast. We do
need to add the new field to ObjUpvalue, though.

^code closed-field (1 before, 1 after)

And we should zero it out when we create an ObjUpvalue so there's no
uninitialized memory floating around.

^code init-closed (1 before, 1 after)

Whenever the compiler reaches the end of a block, it discards all local
variables in that block and emits an `OP_CLOSE_UPVALUE` for each local variable
that was closed over. The compiler <span name="close">does</span> *not* emit any
instructions at the end of the outermost block scope that defines a function
body. That scope contains the function's parameters and any locals declared
immediately inside the function. Those need to get closed too.

<aside name="close">

There's nothing *preventing* us from closing the outermost function scope in the
compiler and emitting `OP_POP` and `OP_CLOSE_UPVALUE` instructions. Doing so is
just unnecessary because the runtime discards all of the stack slots used by the
function implicitly when it pops the call frame.

</aside>

This is the reason `closeUpvalues()` accepts a pointer to a stack slot. When a
function returns, we call that same helper and pass in the first stack slot
owned by the function.

^code return-close-upvalues (1 before, 1 after)

By passing the first slot in the function's stack window, we close every
remaining open upvalue owned by the returning function. And with that, we now
have a fully functioning closure implementation. Closed-over variables live as
long as they are needed by the functions that capture them.

This was a lot of work! In jlox, closures fell out naturally from our
environment representation. In clox, we had to add a lot of code -- new bytecode
instructions, more data structures in the compiler, and new runtime objects. The
VM very much treats variables in closures as different from other variables.

There is a rationale for that. In terms of implementation complexity, jlox gave
us closures "for free". But in terms of *performance*, jlox's closures are
anything but. By allocating *all* environments on the heap, jlox pays a
significant performance price for *all* local variables, even the majority which
are never captured by closures.

With clox, we have a more complex system, but that allows us to tailor the
implementation to fit the two use patterns we observe for local variables. For
most variables which do have stack semantics, we allocate them entirely on the
stack which is simple and fast. Then, for the few local variables where that
doesn't work, we have a second slower path we can opt in to as needed.

Fortunately, users don't perceive the complexity. From their perspective, local
variables in Lox are simple and uniform. The *language itself* is as simple as
jlox's implementation. But under the hood, clox is watching what the user does
and optimizing for their specific uses. As your language implementations grow in
sophistication, you'll find yourself doing this more. A large fraction of
"optimization" is about adding special case code that detects certain uses and
provides a custom-built, faster path for code that fits that pattern.

We have lexical scoping fully working in clox now, which is a major milestone.
And, now that we have functions and variables with complex lifetimes, we also
have a *lot* of objects floating around in clox's heap, with a web of pointers
stringing them together. The [next step][] is figuring out how to manage that
memory so that we can free some of those objects when they're no longer needed.

[next step]: garbage-collection.html

<div class="challenges">

## Challenges

1.  Wrapping every ObjFunction in an ObjClosure introduces a level of
    indirection that has a performance cost. That cost isn't necessary for
    functions that do not close over any variables, but it does let the runtime
    treat all calls uniformly.

    Change clox to only wrap functions in ObjClosures that need upvalues. How
    does the code complexity and performance compare to always wrapping
    functions? Take care to benchmark programs that do and do not use closures.
    How should you weight the importance of each benchmark? If one gets slower
    and one faster, how do you decide what trade-off to make to choose an
    implementation strategy?

2.  Read the design note below. I'll wait. Now, how do you think Lox *should*
    behave? Change the implementation to create a new variable for each loop
    iteration.

3.  A [famous koan][koan] teaches us that "objects are a poor man's closure"
    (and vice versa). Our VM doesn't support objects yet, but now that we have
    closures we can approximate them. Using closures, write a Lox program that
    models two-dimensional vector "objects". It should:

    *   Define a "constructor" function to create a new vector with the given
        *x* and *y* coordinates.

    *   Provide "methods" to access the *x* and *y* coordinates of values
        returned from that constructor.

    *   Define an addition "method" that adds two vectors and produces a third.


[koan]: http://wiki.c2.com/?ClosuresAndObjectsAreEquivalent

</div>

<div class="design-note">

## Design Note: Closing Over the Loop Variable

Closures capture variables. When two closures capture the same variable, they
share a reference to the same underlying storage location. This fact is visible
when new values are assigned to the variable. Obviously, if two closures capture
*different* variables, there is no sharing.

```lox
var globalOne;
var globalTwo;

fun main() {
  {
    var a = "one";
    fun one() {
      print a;
    }
    globalOne = one;
  }

  {
    var a = "two";
    fun two() {
      print a;
    }
    globalTwo = two;
  }
}

main();
globalOne();
globalTwo();
```

This prints "one" then "two". In this example, it's pretty clear that the two
`a` variables are different. But it's not always so obvious. Consider:

```lox
var globalOne;
var globalTwo;

fun main() {
  for (var a = 1; a <= 2; a = a + 1) {
    fun closure() {
      print a;
    }
    if (globalOne == nil) {
      globalOne = closure;
    } else {
      globalTwo = closure;
    }
  }
}

main();
globalOne();
globalTwo();
```

The code is convoluted because Lox has no collection types. The important part
is that the `main()` function does two iterations of a `for` loop. Each time
through the loop, it creates a closure that captures the loop variable. It
stores the first closure in `globalOne` and the second in `globalTwo`.

There are definitely two different closures. Do they close over two different
variables? Is there only one `a` for the entire duration of the loop, or does
each iteration get its own distinct `a` variable?

The script here is strange and contrived, but this does show up in real code
in languages that aren't as minimal as clox. Here's a JavaScript example:

```js
var closures = [];
for (var i = 1; i <= 2; i++) {
  closures.push(function () { console.log(i); });
}

closures[0]();
closures[1]();
```

Does this print "1" then "2", or does it print <span name="three">"3"</span>
twice? You may be surprised to hear that it prints "3" twice. In this JavaScript
program, there is only a single `i` variable whose lifetime includes all
iterations of the loop, including the final exit.

<aside name="three">

You're wondering how *three* enters the picture? After the second iteration,
`i++` is executed, which increments `i` to three. That's what causes `i <= 2` to
evaluate to false and end the loop. If `i` never reached three, the loop would
run forever.

</aside>

If you're familiar with JavaScript, you probably know that variables declared
using `var` are implicitly *hoisted* to the surrounding function or top-level
scope. It's as if you really wrote this:

```js
var closures = [];
var i;
for (i = 1; i <= 2; i++) {
  closures.push(function () { console.log(i); });
}

closures[0]();
closures[1]();
```

At that point, it's clearer that there is only a single `i`. Now consider if
you change the program to use the newer `let` keyword:

```js
var closures = [];
for (let i = 1; i <= 2; i++) {
  closures.push(function () { console.log(i); });
}

closures[0]();
closures[1]();
```

Does this new program behave the same? Nope. In this case, it prints "1" then
"2". Each closure gets its own `i`. That's sort of strange when you think about
it. The increment clause is `i++`. That looks very much like it is assigning to
and mutating an existing variable, not creating a new one.

Let's try some other languages. Here's Python:

```python
closures = []
for i in range(1, 3):
  closures.append(lambda: print(i))

closures[0]()
closures[1]()
```

Python doesn't really have block scope. Variables are implicitly declared and
are automatically scoped to the surrounding function. Kind of like hoisting in
JS, now that I think about it. So both closures capture the same variable.
Unlike C, though, we don't exit the loop by incrementing `i` *past* the last
value, so this prints "2" twice.

What about Ruby? Ruby has two typical ways to iterate numerically. Here's the
classic imperative style:

```ruby
closures = []
for i in 1..2 do
  closures << lambda { puts i }
end

closures[0].call
closures[1].call
```

This, like Python, prints "2" twice. But the more idiomatic Ruby style is using
a higher-order `each()` method on range objects:

```ruby
closures = []
(1..2).each do |i|
  closures << lambda { puts i }
end

closures[0].call
closures[1].call
```

If you're not familiar with Ruby, the `do |i| ... end` part is basically a
closure that gets created and passed to the `each()` method. The `|i|` is the
parameter signature for the closure. The `each()` method invokes that closure
twice, passing in 1 for `i` the first time and 2 the second time.

In this case, the "loop variable" is really a function parameter. And, since
each iteration of the loop is a separate invocation of the function, those are
definitely separate variables for each call. So this prints "1" then "2".

If a language has a higher-level iterator-based looping structure like `foreach`
in C#, Java's "enhanced for", `for-of` in JavaScript, `for-in` in Dart, etc.,
then I think it's natural to the reader to have each iteration create a new
variable. The code *looks* like a new variable because the loop header looks
like a variable declaration. And there's no increment expression that looks like
it's mutating that variable to advance to the next step.

If you dig around StackOverflow and other places, you find evidence that this is
what users expect, because they are very surprised when they *don't* get it. In
particular, C# originally did *not* create a new loop variable for each
iteration of a `foreach` loop. This was such a frequent source of user confusion
that they took the very rare step of shipping a breaking change to the language.
In C# 5, each iteration creates a fresh variable.

Old C-style `for` loops are harder. The increment clause really does look like
mutation. That implies there is a single variable that's getting updated each
step. But it's almost never *useful* for each iteration to share a loop
variable. The only time you can even detect this is when closures capture it.
And it's rarely helpful to have a closure that references a variable whose value
is whatever value caused you to exit the loop.

The pragmatically useful answer is probably to do what JavaScript does with
`let` in `for` loops. Make it look like mutation but actually create a new
variable each time, because that's what users want. It is kind of weird when you
think about it, though.

</div>


================================================
FILE: book/compiling-expressions.md
================================================
> In the middle of the journey of our life I found myself within a dark woods
> where the straight way was lost.
>
> <cite>Dante Alighieri, <em>Inferno</em></cite>

This chapter is exciting for not one, not two, but *three* reasons. First, it
provides the final segment of our VM's execution pipeline. Once in place, we can
plumb the user's source code from scanning all the way through to executing it.

<img src="image/compiling-expressions/pipeline.png" alt="Lowering the 'compiler' section of pipe between 'scanner' and 'VM'." />

Second, we get to write an actual, honest-to-God *compiler*. It parses source
code and outputs a low-level series of binary instructions. Sure, it's <span
name="wirth">bytecode</span> and not some chip's native instruction set, but
it's way closer to the metal than jlox was. We're about to be real language
hackers.

<aside name="wirth">

Bytecode was good enough for Niklaus Wirth, and no one questions his street
cred.

</aside>

<span name="pratt">Third</span> and finally, I get to show you one of my
absolute favorite algorithms: Vaughan Pratt's "top-down operator precedence
parsing". It's the most elegant way I know to parse expressions. It gracefully
handles prefix operators, postfix, infix, *mixfix*, any kind of *-fix* you got.
It deals with precedence and associativity without breaking a sweat. I love it.

<aside name="pratt">

Pratt parsers are a sort of oral tradition in industry. No compiler or language
book I've read teaches them. Academia is very focused on generated parsers, and
Pratt's technique is for handwritten ones, so it gets overlooked.

But in production compilers, where hand-rolled parsers are common, you'd be
surprised how many people know it. Ask where they learned it, and it's always,
"Oh, I worked on this compiler years ago and my coworker said they took it from
this old front end..."

</aside>

As usual, before we get to the fun stuff, we've got some preliminaries to work
through. You have to eat your vegetables before you get dessert. First, let's
ditch that temporary scaffolding we wrote for testing the scanner and replace it
with something more useful.

^code interpret-chunk (1 before, 1 after)

We create a new empty chunk and pass it over to the compiler. The compiler will
take the user's program and fill up the chunk with bytecode. At least, that's
what it will do if the program doesn't have any compile errors. If it does
encounter an error, `compile()` returns `false` and we discard the unusable
chunk.

Otherwise, we send the completed chunk over to the VM to be executed. When the
VM finishes, we free the chunk and we're done. As you can see, the signature to
`compile()` is different now.

^code compile-h (2 before, 2 after)

We pass in the chunk where the compiler will write the code, and then
`compile()` returns whether or not compilation succeeded. We make the same
change to the signature in the implementation.

^code compile-signature (2 before, 1 after)

That call to `initScanner()` is the only line that survives this chapter. Rip
out the temporary code we wrote to test the scanner and replace it with these
three lines:

^code compile-chunk (1 before, 1 after)

The call to `advance()` "primes the pump" on the scanner. We'll see what it does
soon. Then we parse a single expression. We aren't going to do statements yet,
so that's the only subset of the grammar we support. We'll revisit this when we
[add statements in a few chapters][globals]. After we compile the expression, we
should be at the end of the source code, so we check for the sentinel EOF token.

[globals]: global-variables.html

We're going to spend the rest of the chapter making this function work,
especially that little `expression()` call. Normally, we'd dive right into that
function definition and work our way through the implementation from top to
bottom.

This chapter is <span name="blog">different</span>. Pratt's parsing technique is
remarkably simple once you have it all loaded in your head, but it's a little
tricky to break into bite-sized pieces. It's recursive, of course, which is part
of the problem. But it also relies on a big table of data. As we build up the
algorithm, that table grows additional columns.

<aside name="blog">

If this chapter isn't clicking with you and you'd like another take on the
concepts, I wrote an article that teaches the same algorithm but using Java and
an object-oriented style: ["Pratt Parsing: Expression Parsing Made Easy"][blog].

[blog]: http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/

</aside>

I don't want to revisit 40-something lines of code each time we extend the
table. So we're going to work our way into the core of the parser from the
outside and cover all of the surrounding bits before we get to the juicy center.
This will require a little more patience and mental scratch space than most
chapters, but it's the best I could do.

## Single-Pass Compilation

A compiler has roughly two jobs. It parses the user's source code to understand
what it means. Then it takes that knowledge and outputs low-level instructions
that produce the same semantics. Many languages split those two roles into two
separate <span name="passes">passes</span> in the implementation. A parser
produces an AST -- just like jlox does -- and then a code generator traverses
the AST and outputs target code.

<aside name="passes">

In fact, most sophisticated optimizing compilers have a heck of a lot more than
two passes. Determining not just *what* optimization passes to have, but how to
order them to squeeze the most performance out of the compiler -- since the
optimizations often interact in complex ways -- is somewhere between an "open
area of research" and a "dark art".

</aside>

In clox, we're taking an old-school approach and merging these two passes into
one. Back in the day, language hackers did this because computers literally
didn't have enough memory to store an entire source file's AST. We're doing it
because it keeps our compiler simpler, which is a real asset when programming in
C.

Single-pass compilers like we're going to build don't work well for all
languages. Since the compiler has only a peephole view into the user's program
while generating code, the language must be designed such that you don't need
much surrounding context to understand a piece of syntax. Fortunately, tiny,
dynamically typed Lox is <span name="lox">well-suited</span> to that.

<aside name="lox">

Not that this should come as much of a surprise. I did design the language
specifically for this book after all.

<img src="image/compiling-expressions/keyhole.png" alt="Peering through a keyhole at 'var x;'" />

</aside>

What this means in practical terms is that our "compiler" C module has
functionality you'll recognize from jlox for parsing -- consuming tokens,
matching expected token types, etc. And it also has functions for code gen --
emitting bytecode and adding constants to the destination chunk. (And it means
I'll use "parsing" and "compiling" interchangeably throughout this and later
chapters.)

We'll build the parsing and code generation halves first. Then we'll stitch them
together with the code in the middle that uses Pratt's technique to parse Lox's
particular grammar and output the right bytecode.

## Parsing Tokens

First up, the front half of the compiler. This function's name should sound
familiar.

^code advance (1 before)

Just like in jlox, it steps forward through the token stream. It asks the
scanner for the next token and stores it for later use. Before doing that, it
takes the old `current` token and stashes that in a `previous` field. That will
come in handy later so that we can get at the lexeme after we match a token.

The code to read the next token is wrapped in a loop. Remember, clox's scanner
doesn't report lexical errors. Instead, it creates special *error tokens* and
leaves it up to the parser to report them. We do that here.

We keep looping, reading tokens and reporting the errors, until we hit a
non-error one or reach the end. That way, the rest of the parser sees only valid
tokens. The current and previous token are stored in this struct:

^code parser (1 before, 2 after)

Like we did in other modules, we have a single global variable of this struct
type so we don't need to pass the state around from function to function in the
compiler.

### Handling syntax errors

If the scanner hands us an error token, we need to actually tell the user. That
happens using this:

^code error-at-current

We pull the location out of the current token in order to tell the user where
the error occurred and forward it to `errorAt()`. More often, we'll report an
error at the location of the token we just consumed, so we give the shorter name
to this other function:

^code error

The actual work happens here:

^code error-at

First, we print where the error occurred. We try to show the lexeme if it's
human-readable. Then we print the error message itself. After that, we set this
`hadError` flag. That records whether any errors occurred during compilation.
This field also lives in the parser struct.

^code had-error-field (1 before, 1 after)

Earlier I said that `compile()` should return `false` if an error occurred. Now
we can make it do that.

^code return-had-error (1 before, 1 after)

I've got another flag to introduce for error handling. We want to avoid error
cascades. If the user has a mistake in their code and the parser gets confused
about where it is in the grammar, we don't want it to spew out a whole pile of
meaningless knock-on errors after the first one.

We fixed that in jlox using panic mode error recovery. In the Java interpreter,
we threw an exception to unwind out of all of the parser code to a point where
we could skip tokens and resynchronize. We don't have <span
name="setjmp">exceptions</span> in C. Instead, we'll do a little smoke and
mirrors. We add a flag to track whether we're currently in panic mode.

<aside name="setjmp">

There is `setjmp()` and `longjmp()`, but I'd rather not go there. Those make it
too easy to leak memory, forget to maintain invariants, or otherwise have a Very
Bad Day.

</aside>

^code panic-mode-field (1 before, 1 after)

When an error occurs, we set it.

^code set-panic-mode (1 before, 1 after)

After that, we go ahead and keep compiling as normal as if the error never
occurred. The bytecode will never get executed, so it's harmless to keep on
trucking. The trick is that while the panic mode flag is set, we simply suppress
any other errors that get detected.

^code check-panic-mode (1 before, 1 after)

There's a good chance the parser will go off in the weeds, but the user won't
know because the errors all get swallowed. Panic mode ends when the parser
reaches a synchronization point. For Lox, we chose statement boundaries, so when
we later add those to our compiler, we'll clear the flag there.

These new fields need to be initialized.

^code init-parser-error (1 before, 1 after)

And to display the errors, we need a standard header.

^code compiler-include-stdlib (1 before, 2 after)

There's one last parsing function, another old friend from jlox.

^code consume

It's similar to `advance()` in that it reads the next token. But it also
validates that the token has an expected type. If not, it reports an error. This
function is the foundation of most syntax errors in the compiler.

OK, that's enough on the front end for now.

## Emitting Bytecode

After we parse and understand a piece of the user's program, the next step is to
translate that to a series of bytecode instructions. It starts with the easiest
possible step: appending a single byte to the chunk.

^code emit-byte

It's hard to believe great things will flow through such a simple function. It
writes the given byte, which may be an opcode or an operand to an instruction.
It sends in the previous token's line information so that runtime errors are
associated with that line.

The chunk that we're writing gets passed into `compile()`, but it needs to make
its way to `emitByte()`. To do that, we rely on this intermediary function:

^code compiling-chunk (1 before, 1 after)

Right now, the chunk pointer is stored in a module-level variable like we store
other global state. Later, when we start compiling user-defined functions, the
notion of "current chunk" gets more complicated. To avoid having to go back and
change a lot of code, I encapsulate that logic in the `currentChunk()` function.

We initialize this new module variable before we write any bytecode:

^code init-compile-chunk (2 before, 2 after)

Then, at the very end, when we're done compiling the chunk, we wrap things up.

^code finish-compile (1 before, 1 after)

That calls this:

^code end-compiler

In this chapter, our VM deals only with expressions. When you run clox, it will
parse, compile, and execute a single expression, then print the result. To print
that value, we are temporarily using the `OP_RETURN` instruction. So we have the
compiler add one of those to the end of the chunk.

^code emit-return

While we're here in the back end we may as well make our lives easier.

^code emit-bytes

Over time, we'll have enough cases where we need to write an opcode followed by
a one-byte operand that it's worth defining this convenience function.

## Parsing Prefix Expressions

We've assembled our parsing and code generation utility functions. The missing
piece is the code in the middle that connects those together.

<img src="image/compiling-expressions/mystery.png" alt="Parsing functions on the left, bytecode emitting functions on the right. What goes in the middle?" />

The only step in `compile()` that we have left to implement is this function:

^code expression

We aren't ready to implement every kind of expression in Lox yet. Heck, we don't
even have Booleans. For this chapter, we're only going to worry about four:

* Number literals: `123`
* Parentheses for grouping: `(123)`
* Unary negation: `-123`
* The Four Horsemen of the Arithmetic: `+`, `-`, `*`, `/`

As we work through the functions to compile each of those kinds of expressions,
we'll also assemble the requirements for the table-driven parser that calls
them.

### Parsers for tokens

For now, let's focus on the Lox expressions that are each only a single token.
In this chapter, that's just number literals, but there will be more later. Here's
how we can compile them:

We map each token type to a different kind of expression. We define a function
for each expression that outputs the appropriate bytecode. Then we build an
array of function pointers. The indexes in the array correspond to the
`TokenType` enum values, and the function at each index is the code to compile
an expression of that token type.

To compile number literals, we store a pointer to the following function at the
`TOKEN_NUMBER` index in the array.

^code number

We assume the token for the number literal has already been consumed and is
stored in `previous`. We take that lexeme and use the C standard library to
convert it to a double value. Then we generate the code to load that value using
this function:

^code emit-constant

First, we add the value to the constant table, then we emit an `OP_CONSTANT`
instruction that pushes it onto the stack at runtime. To insert an entry in the
constant table, we rely on:

^code make-constant

Most of the work happens in `addConstant()`, which we defined back in an
[earlier chapter][bytecode]. That adds the given value to the end of the chunk's
constant table and returns its index. The new function's job is mostly to make
sure we don't have too many constants. Since the `OP_CONSTANT` instruction uses
a single byte for the index operand, we can store and load only up to <span
name="256">256</span> constants in a chunk.

[bytecode]: chunks-of-bytecode.html

<aside name="256">

Yes, that limit is pretty low. If this were a full-sized language
implementation, we'd want to add another instruction like `OP_CONSTANT_16` that
stores the index as a two-byte operand so we could handle more constants when
needed.

The code to support that isn't particularly illuminating, so I omitted it from
clox, but you'll want your VMs to scale to larger programs.

</aside>

That's basically all it takes. Provided there is some suitable code that
consumes a `TOKEN_NUMBER` token, looks up `number()` in the function pointer
array, and then calls it, we can now compile number literals to bytecode.

### Parentheses for grouping

Our as-yet-imaginary array of parsing function pointers would be great if every
expression was only a single token long. Alas, most are longer. However, many
expressions *start* with a particular token. We call these *prefix* expressions.
For example, when we're parsing an expression and the current token is `(`, we
know we must be looking at a parenthesized grouping expression.

It turns out our function pointer array handles those too. The parsing function
for an expression type can consume any additional tokens that it wants to, just
like in a regular recursive descent parser. Here's how parentheses work:

^code grouping

Again, we assume the initial `(` has already been consumed. We <span
name="recursive">recursively</span> call back into `expression()` to compile the
expression between the parentheses, then parse the closing `)` at the end.

<aside name="recursive">

A Pratt parser isn't a recursive *descent* parser, but it's still recursive.
That's to be expected since the grammar itself is recursive.

</aside>

As far as the back end is concerned, there's literally nothing to a grouping
expression. Its sole function is syntactic -- it lets you insert a
lower-precedence expression where a higher precedence is expected. Thus, it has
no runtime semantics on its own and therefore doesn't emit any bytecode. The
inner call to `expression()` takes care of generating bytecode for the
expression inside the parentheses.

### Unary negation

Unary minus is also a prefix expression, so it works with our model too.

^code unary

The leading `-` token has been consumed and is sitting in `parser.previous`. We
grab the token type from that to note which unary operator we're dealing with.
It's unnecessary right now, but this will make more sense when we use this same
function to compile the `!` operator in [the next chapter][next].

[next]: types-of-values.html

As in `grouping()`, we recursively call `expression()` to compile the operand.
After that, we emit the bytecode to perform the negation. It might seem a little
weird to write the negate instruction *after* its operand's bytecode since the
`-` appears on the left, but think about it in terms of order of execution:

1. We evaluate the operand first which leaves its value on the stack.

2. Then we pop that value, negate it, and push the result.

So the `OP_NEGATE` instruction should be emitted <span name="line">last</span>.
This is part of the compiler's job -- parsing the program in the order it
appears in the source code and rearranging it into the order that execution
happens.

<aside name="line">

Emitting the `OP_NEGATE` instruction after the operands does mean that the
current token when the bytecode is written is *not* the `-` token. That mostly
doesn't matter, except that we use that token for the line number to associate
with that instruction.

This means if you have a multi-line negation expression, like:

```lox
print -
  true;
```

Then the runtime error will be reported on the wrong line. Here, it would show
the error on line 2, even though the `-` is on line 1. A more robust approach
would be to store the token's line before compiling the operand and then pass
that into `emitByte()`, but I wanted to keep things simple for the book.

</aside>

There is one problem with this code, though. The `expression()` function it
calls will parse any expression for the operand, regardless of precedence. Once
we add binary operators and other syntax, that will do the wrong thing.
Consider:

```lox
-a.b + c;
```

Here, the operand to `-` should be just the `a.b` expression, not the entire
`a.b + c`. But if `unary()` calls `expression()`, the latter will happily chew
through all of the remaining code including the `+`. It will erroneously treat
the `-` as lower precedence than the `+`.

When parsing the operand to unary `-`, we need to compile only expressions at a
certain precedence level or higher. In jlox's recursive descent parser we
accomplished that by calling into the parsing method for the lowest-precedence
expression we wanted to allow (in this case, `call()`). Each method for parsing
a specific expression also parsed any expressions of higher precedence too, so
that included the rest of the precedence table.

The parsing functions like `number()` and `unary()` here in clox are different.
Each only parses exactly one type of expression. They don't cascade to include
higher-precedence expression types too. We need a different solution, and it
looks like this:

^code parse-precedence

This function -- once we implement it -- starts at the current token and parses
any expression at the given precedence level or higher. We have some other setup
to get through before we can write the body of this function, but you can
probably guess that it will use that table of parsing function pointers I've
been talking about. For now, don't worry too much about how it works. In order
to take the "precedence" as a parameter, we define it numerically.

^code precedence (1 before, 2 after)

These are all of Lox's precedence levels in order from lowest to highest. Since
C implicitly gives successively larger numbers for enums, this means that
`PREC_CALL` is numerically larger than `PREC_UNARY`. For example, say the
compiler is sitting on a chunk of code like:

```lox
-a.b + c
```

If we call `parsePrecedence(PREC_ASSIGNMENT)`, then it will parse the entire
expression because `+` has higher precedence than assignment. If instead we
call `parsePrecedence(PREC_UNARY)`, it will compile the `-a.b` and stop there.
It doesn't keep going through the `+` because the addition has lower precedence
than unary operators.

With this function in hand, it's a snap to fill in the missing body for
`expression()`.

^code expression-body (1 before, 1 after)

We simply parse the lowest precedence level, which subsumes all of the
higher-precedence expressions too. Now, to compile the operand for a unary
expression, we call this new function and limit it to the appropriate level:

^code unary-operand (1 before, 2 after)

We use the unary operator's own `PREC_UNARY` precedence to permit <span
name="useful">nested</span> unary expressions like `!!doubleNegative`. Since
unary operators have pretty high precedence, that correctly excludes things like
binary operators. Speaking of which...

<aside name="useful">

Not that nesting unary expressions is particularly useful in Lox. But other
languages let you do it, so we do too.

</aside>

## Parsing Infix Expressions

Binary operators are different from the previous expressions because they are
*infix*. With the other expressions, we know what we are parsing from the very
first token. With infix expressions, we don't know we're in the middle of a
binary operator until *after* we've parsed its left operand and then stumbled
onto the operator token in the middle.

Here's an example:

```lox
1 + 2
```

Let's walk through trying to compile it with what we know so far:

1.  We call `expression()`. That in turn calls
    `parsePrecedence(PREC_ASSIGNMENT)`.

2.  That function (once we implement it) sees the leading number token and
    recognizes it is parsing a number literal. It hands off control to
    `number()`.

3.  `number()` creates a constant, emits an `OP_CONSTANT`, and returns back to
    `parsePrecedence()`.

Now what? The call to `parsePrecedence()` should consume the entire addition
expression, so it needs to keep going somehow. Fortunately, the parser is right
where we need it to be. Now that we've compiled the leading number expression,
the next token is `+`. That's the exact token that `parsePrecedence()` needs to
detect that we're in the middle of an infix expression and to realize that the
expression we already compiled is actually an operand to that.

So this hypothetical array of function pointers doesn't just list functions to
parse expressions that start with a given token. Instead, it's a *table* of
function pointers. One column associates prefix parser functions with token
types. The second column associates infix parser functions with token types.

The function we will use as the infix parser for `TOKEN_PLUS`, `TOKEN_MINUS`,
`TOKEN_STAR`, and `TOKEN_SLASH` is this:

^code binary

When a prefix parser function is called, the leading token has already been
consumed. An infix parser function is even more *in medias res* -- the entire
left-hand operand expression has already been compiled and the subsequent infix
operator consumed.

The fact that the left operand gets compiled first works out fine. It means at
runtime, that code gets executed first. When it runs, the value it produces will
end up on the stack. That's right where the infix operator is going to need it.

Then we come here to `binary()` to handle the rest of the arithmetic operators.
This function compiles the right operand, much like how `unary()` compiles its
own trailing operand. Finally, it emits the bytecode instruction that performs
the binary operation.

When run, the VM will execute the left and right operand code, in that order,
leaving their values on the stack. Then it executes the instruction for the
operator. That pops the two values, computes the operation, and pushes the
result.

The code that probably caught your eye here is that `getRule()` line. When we
parse the right-hand operand, we again need to worry about precedence. Take an
expression like:

```lox
2 * 3 + 4
```

When we parse the right operand of the `*` expression, we need to just capture
`3`, and not `3 + 4`, because `+` is lower precedence than `*`. We could define
a separate function for each binary operator. Each would call
`parsePrecedence()` and pass in the correct precedence level for its operand.

But that's kind of tedious. Each binary operator's right-hand operand precedence
is one level <span name="higher">higher</span> than its own. We can look that up
dynamically with this `getRule()` thing we'll get to soon. Using that, we call
`parsePrecedence()` with one level higher than this operator's level.

<aside name="higher">

We use one *higher* level of precedence for the right operand because the binary
operators are left-associative. Given a series of the *same* operator, like:

```lox
1 + 2 + 3 + 4
```

We want to parse it like:

```lox
((1 + 2) + 3) + 4
```

Thus, when parsing the right-hand operand to the first `+`, we want to consume
the `2`, but not the rest, so we use one level above `+`'s precedence. But if
our operator was *right*-associative, this would be wrong. Given:

```lox
a = b = c = d
```

Since assignment is right-associative, we want to parse it as:

```lox
a = (b = (c = d))
```

To enable that, we would call `parsePrecedence()` with the *same* precedence as
the current operator.

</aside>

This way, we can use a single `binary()` function for all binary operators even
though they have different precedences.

## A Pratt Parser

We now have all of the pieces and parts of the compiler laid out. We have a
function for each grammar production: `number()`, `grouping()`, `unary()`, and
`binary()`. We still need to implement `parsePrecedence()`, and `getRule()`. We
also know we need a table that, given a token type, lets us find

*   the function to compile a prefix expression starting with a token of that
    type,

*   the function to compile an infix expression whose left operand is followed
    by a token of that type, and

*   the precedence of an <span name="prefix">infix</span> expression that uses
    that token as an operator.

<aside name="prefix">

We don't need to track the precedence of the *prefix* expression starting with a
given token because all prefix operators in Lox have the same precedence.

</aside>

We wrap these three properties in a little struct which represents a single row
in the parser table.

^code parse-rule (1 before, 2 after)

That ParseFn type is a simple <span name="typedef">typedef</span> for a function
type that takes no arguments and returns nothing.

<aside name="typedef" class="bottom">

C's syntax for function pointer types is so bad that I always hide it behind a
typedef. I understand the intent behind the syntax -- the whole "declaration
reflects use" thing -- but I think it was a failed syntactic experiment.

</aside>

^code parse-fn-type (1 before, 2 after)

The table that drives our whole parser is an array of ParseRules. We've been
talking about it forever, and finally you get to see it.

^code rules

<aside name="big">

See what I mean about not wanting to revisit the table each time we needed a new
column? It's a beast.

If you haven't seen the `[TOKEN_DOT] = ` syntax in a C array literal, that is
C99's designated initializer syntax. It's clearer than having to count array
indexes by hand.

</aside>

You can see how `grouping` and `unary` are slotted into the prefix parser column
for their respective token types. In the next column, `binary` is wired up to
the four arithmetic infix operators. Those infix operators also have their
precedences set in the last column.

Aside from those, the rest of the table is full of `NULL` and `PREC_NONE`. Most
of those empty cells are because there is no expression associated with those
tokens. You can't start an expression with, say, `else`, and `}` would make for
a pretty confusing infix operator.

But, also, we haven't filled in the entire grammar yet. In later chapters, as we
add new expression types, some of these slots will get functions in them. One of
the things I like about this approach to parsing is that it makes it very easy
to see which tokens are in use by the grammar and which are available.

Now that we have the table, we are finally ready to write the code that uses it.
This is where our Pratt parser comes to life. The easiest function to define is
`getRule()`.

^code get-rule

It simply returns the rule at the given index. It's called by `binary()` to look
up the precedence of the current operator. This function exists solely to handle
a declaration cycle in the C code. `binary()` is defined *before* the rules
table so that the table can store a pointer to it. That means the body of
`binary()` cannot access the table directly.

Instead, we wrap the lookup in a function. That lets us forward declare
`getRule()` before the definition of `binary()`, and <span
name="forward">then</span> *define* `getRule()` after the table. We'll need a
couple of other forward declarations to handle the fact that our grammar is
recursive, so let's get them all out of the way.

<aside name="forward">

This is what happens when you write your VM in a language that was designed to
be compiled on a PDP-11.

</aside>

^code forward-declarations (2 before, 1 after)

If you're following along and implementing clox yourself, pay close attention to
the little annotations that tell you where to put these code snippets. Don't
worry, though, if you get it wrong, the C compiler will be happy to tell you.

### Parsing with precedence

Now we're getting to the fun stuff. The maestro that orchestrates all of the
parsing functions we've defined is `parsePrecedence()`. Let's start with parsing
prefix expressions.

^code precedence-body (1 before, 1 after)

We read the next token and look up the corresponding ParseRule. If there is no
prefix parser, then the token must be a syntax error. We report that and return
to the caller.

Otherwise, we call that prefix parse function and let it do its thing. That
prefix parser compiles the rest of the prefix expression, consuming any other
tokens it needs, and returns back here. Infix expressions are where it gets
interesting since precedence comes into play. The implementation is remarkably
simple.

^code infix (1 before, 1 after)

That's the whole thing. Really. Here's how the entire function works: At the
beginning of `parsePrecedence()`, we look up a prefix parser for the current
token. The first token is *always* going to belong to some kind of prefix
expression, by definition. It may turn out to be nested as an operand inside one
or more infix expressions, but as you read the code from left to right, the
first token you hit always belongs to a prefix expression.

After parsing that, which may consume more tokens, the prefix expression is
done. Now we look for an infix parser for the next token. If we find one, it
means the prefix expression we already compiled might be an operand for it. But
only if the call to `parsePrecedence()` has a `precedence` that is low enough to
permit that infix operator.

If the next token is too low precedence, or isn't an infix operator at all,
we're done. We've parsed as much expression as we can. Otherwise, we consume the
operator and hand off control to the infix parser we found. It consumes whatever
other tokens it needs (usually the right operand) and returns back to
`parsePrecedence()`. Then we loop back around and see if the *next* token is
also a valid infix operator that can take the entire preceding expression as its
operand. We keep looping like that, crunching through infix operators and their
operands until we hit a token that isn't an infix operator or is too low
precedence and stop.

That's a lot of prose, but if you really want to mind meld with Vaughan Pratt
and fully understand the algorithm, step through the parser in your debugger as
it works through some expressions. Maybe a picture will help. There's only a
handful of functions, but they are marvelously intertwined:

<span name="connections"></span>

<img src="image/compiling-expressions/connections.png" alt="The various parsing
functions and how they call each other." />

<aside name="connections">

The <img src="image/compiling-expressions/calls.png" alt="A solid arrow."
class="arrow" /> arrow connects a function to another function it directly
calls. The <img src="image/compiling-expressions/points-to.png" alt="An open
arrow." class="arrow" /> arrow shows the table's pointers to the parsing
functions.

</aside>

Later, we'll need to tweak the code in this chapter to handle assignment. But,
otherwise, what we wrote covers all of our expression compiling needs for the
rest of the book. We'll plug additional parsing functions into the table when we
add new kinds of expressions, but `parsePrecedence()` is complete.

## Dumping Chunks

While we're here in the core of our compiler, we should put in some
instrumentation. To help debug the generated bytecode, we'll add support for
dumping the chunk once the compiler finishes. We had some temporary logging
earlier when we hand-authored the chunk. Now we'll put in some real code so that
we can enable it whenever we want.

Since this isn't for end users, we hide it behind a flag.

^code define-debug-print-code (2 before, 1 after)

When that flag is defined, we use our existing "debug" module to print out the
chunk's bytecode.

^code dump-chunk (1 before, 1 after)

We do this only if the code was free of errors. After a syntax error, the
compiler keeps on going but it's in kind of a weird state and might produce
broken code. That's harmless because it won't get executed, but we'll just
confuse ourselves if we try to read it.

Finally, to access `disassembleChunk()`, we need to include its header.

^code include-debug (1 before, 2 after)

We made it! This was the last major section to install in our VM's compilation
and execution pipeline. Our interpreter doesn't *look* like much, but inside it
is scanning, parsing, compiling to bytecode, and executing.

Fire up the VM and type in an expression. If we did everything right, it should
calculate and print the result. We now have a very over-engineered arithmetic
calculator. We have a lot of language features to add in the coming chapters,
but the foundation is in place.

<div class="challenges">

## Challenges

1.  To really understand the parser, you need to see how execution threads
    through the interesting parsing functions -- `parsePrecedence()` and the
    parser functions stored in the table. Take this (strange) expression:

    ```lox
    (-1 + 2) * 3 - -4
    ```

    Write a trace of how those functions are called. Show the order they are
    called, which calls which, and the arguments passed to them.

2.  The ParseRule row for `TOKEN_MINUS` has both prefix and infix function
    pointers. That's because `-` is both a prefix operator (unary negation) and
    an infix one (subtraction).

    In the full Lox language, what other tokens can be used in both prefix and
    infix positions? What about in C or in another language of your choice?

3.  You might be wondering about complex "mixfix" expressions that have more
    than two operands separated by tokens. C's conditional or "ternary"
    operator, `?:`, is a widely known one.

    Add support for that operator to the compiler. You don't have to generate
    any bytecode, just show how you would hook it up to the parser and handle
    the operands.

</div>

<div class="design-note">

## Design Note: It's Just Parsing

I'm going to make a claim here that will be unpopular with some compiler and
language people. It's OK if you don't agree. Personally, I learn more from
strongly stated opinions that I disagree with than I do from several pages of
qualifiers and equivocation. My claim is that *parsing doesn't matter*.

Over the years, many programming language people, especially in academia, have
gotten *really* into parsers and taken them very seriously. Initially, it was
the compiler folks who got into <span name="yacc">compiler-compilers</span>,
LALR, and other stuff like that. The first half of the dragon book is a long
love letter to the wonders of parser generators.

<aside name="yacc">

All of us suffer from the vice of "when all you have is a hammer, everything
looks like a nail", but perhaps none so visibly as compiler people. You wouldn't
believe the breadth of software problems that miraculously seem to require a new
little language in their solution as soon as you ask a compiler hacker for help.

Yacc and other compiler-compilers are the most delightfully recursive example.
"Wow, writing compilers is a chore. I know, let's write a compiler to write our
compiler for us."

For the record, I don't claim immunity to this affliction.

</aside>

Later, the functional programming folks got into parser combinators, packrat
parsers, and other sorts of things. Because, obviously, if you give a functional
programmer a problem, the first thing they'll do is whip out a pocketful of
higher-order functions.

Over in math and algorithm analysis land, there is a long legacy of research
into proving time and memory usage for various parsing techniques, transforming
parsing problems into other problems and back, and assigning complexity classes
to different grammars.

At one level, this stuff is important. If you're implementing a language, you
want some assurance that your parser won't go exponential and take 7,000 years
to parse a weird edge case in the grammar. Parser theory gives you that bound.
As an intellectual exercise, learning about parsing techniques is also fun and
rewarding.

But if your goal is just to implement a language and get it in front of users,
almost all of that stuff doesn't matter. It's really easy to get worked up by
the enthusiasm of the people who *are* into it and think that your front end
*needs* some whiz-bang generated combinator-parser-factory thing. I've seen
people burn tons of time writing and rewriting their parser using whatever
today's hot library or technique is.

That's time that doesn't add any value to your user's life. If you're just
trying to get your parser done, pick one of the bog-standard techniques, use it,
and move on. Recursive descent, Pratt parsing, and the popular parser generators
like ANTLR or Bison are all fine.

Take the extra time you saved not rewriting your parsing code and spend it
improving the compile error messages your compiler shows users. Good error
handling and reporting is more valuable to users than almost anything else you
can put time into in the front end.

</div>


================================================
FILE: book/contents.md
================================================
This text is not used. All of the content is in the contents.html template.


================================================
FILE: book/control-flow.md
================================================
> Logic, like whiskey, loses its beneficial effect when taken in too large
> quantities.
>
> <cite>Edward John Moreton Drax Plunkett, Lord Dunsany</cite>

Compared to [last chapter's][statements] grueling marathon, today is a
lighthearted frolic through a daisy meadow. But while the work is easy, the
reward is surprisingly large.

[statements]: statements-and-state.html

Right now, our interpreter is little more than a calculator. A Lox program can
only do a fixed amount of work before completing. To make it run twice as long
you have to make the source code twice as lengthy. We're about to fix that. In
this chapter, our interpreter takes a big step towards the programming
language major leagues: *Turing-completeness*.

## Turing Machines (Briefly)

In the early part of last century, mathematicians stumbled into a series of
confusing <span name="paradox">paradoxes</span> that led them to doubt the
stability of the foundation they had built their work upon. To address that
[crisis][], they went back to square one. Starting from a handful of axioms,
logic, and set theory, they hoped to rebuild mathematics on top of an
impervious foundation.

[crisis]: https://en.wikipedia.org/wiki/Foundations_of_mathematics#Foundational_crisis

<aside name="paradox">

The most famous is [**Russell's paradox**][russell]. Initially, set theory
allowed you to define any sort of set. If you could describe it in English, it
was valid. Naturally, given mathematicians' predilection for self-reference,
sets can contain other sets. So Russell, rascal that he was, came up with:

*R is the set of all sets that do not contain themselves.*

Does R contain itself? If it doesn't, then according to the second half of the
definition it should. But if it does, then it no longer meets the definition.
Cue mind exploding.

[russell]: https://en.wikipedia.org/wiki/Russell%27s_paradox

</aside>

They wanted to rigorously answer questions like, "Can all true statements be
proven?", "Can we [compute][] all functions that we can define?", or even the
more general question, "What do we mean when we claim a function is
'computable'?"

[compute]: https://en.wikipedia.org/wiki/Computable_function

They presumed the answer to the first two questions would be "yes". All that
remained was to prove it. It turns out that the answer to both is "no", and
astonishingly, the two questions are deeply intertwined. This is a fascinating
corner of mathematics that touches fundamental questions about what brains are
able to do and how the universe works. I can't do it justice here.

What I do want to note is that in the process of proving that the answer to the
first two questions is "no", Alan Turing and Alonzo Church devised a precise
answer to the last question -- a definition of what kinds of functions are <span
name="uncomputable">computable</span>. They each crafted a tiny system with a
minimum set of machinery that is still powerful enough to compute any of a
(very) large class of functions.

<aside name="uncomputable">

They proved the answer to the first question is "no" by showing that the
function that returns the truth value of a given statement is *not* a computable
one.

</aside>

These are now considered the "computable functions". Turing's system is called a
<span name="turing">**Turing machine**</span>. Church's is the **lambda
calculus**. Both are still widely used as the basis for models of computation
and, in fact, many modern functional programming languages use the lambda
calculus at their core.

<aside name="turing">

Turing called his inventions "a-machines" for "automatic". He wasn't so
self-aggrandizing as to put his *own* name on them. Later mathematicians did
that for him. That's how you get famous while still retaining some modesty.

</aside>

<img src="image/control-flow/turing-machine.png" alt="A Turing machine." />

Turing machines have better name recognition -- there's no Hollywood film about
Alonzo Church yet -- but the two formalisms are [equivalent in power][thesis].
In fact, any programming language with some minimal level of expressiveness is
powerful enough to compute *any* computable function.

[thesis]: https://en.wikipedia.org/wiki/Church%E2%80%93Turing_thesis

You can prove that by writing a simulator for a Turing machine in your language.
Since Turing proved his machine can compute any computable function, by
extension, that means your language can too. All you need to do is translate the
function into a Turing machine, and then run that on your simulator.

If your language is expressive enough to do that, it's considered
**Turing-complete**. Turing machines are pretty dang simple, so it doesn't take
much power to do this. You basically need arithmetic, a little control flow,
and the ability to allocate and use (theoretically) arbitrary amounts of memory.
We've got the first. By the end of this chapter, we'll have the <span
name="memory">second</span>.

<aside name="memory">

We *almost* have the third too. You can create and concatenate strings of
arbitrary size, so you can *store* unbounded memory. But we don't have any way
to access parts of a string.

</aside>

## Conditional Execution

Enough history, let's jazz up our language. We can divide control flow roughly
into two kinds:

*   **Conditional** or **branching control flow** is used to *not* execute
    some piece of code. Imperatively, you can think of it as jumping *ahead*
    over a region of code.

*   **Looping control flow** executes a chunk of code more than once. It jumps
    *back* so that you can do something again. Since you don't usually want
    *infinite* loops, it typically has some conditional logic to know when to
    stop looping as well.

Branching is simpler, so we'll start there. C-derived languages have two main
conditional execution features, the `if` statement and the perspicaciously named
"conditional" <span name="ternary">operator</span> (`?:`). An `if` statement
lets you conditionally execute statements and the conditional operator lets you
conditionally execute expressions.

<aside name="ternary">

The conditional operator is also called the "ternary" operator because it's the
only operator in C that takes three operands.

</aside>

For simplicity's sake, Lox doesn't have a conditional operator, so let's get our
`if` statement on. Our statement grammar gets a new production.

<span name="semicolon"></span>

```ebnf
statement      → exprStmt
               | ifStmt
               | printStmt
               | block ;

ifStmt         → "if" "(" expression ")" statement
               ( "else" statement )? ;
```

<aside name="semicolon">

The semicolons in the rules aren't quoted, which means they are part of the
grammar metasyntax, not Lox's syntax. A block does not have a `;` at the end and
an `if` statement doesn't either, unless the then or else statement happens to
be one that ends in a semicolon.

</aside>

An `if` statement has an expression for the condition, then a statement to execute
if the condition is truthy. Optionally, it may also have an `else` keyword and a
statement to execute if the condition is falsey. The <span name="if-ast">syntax
tree node</span> has fields for each of those three pieces.

^code if-ast (1 before, 1 after)

<aside name="if-ast">

The generated code for the new node is in [Appendix II][appendix-if].

[appendix-if]: appendix-ii.html#if-statement

</aside>

Like other statements, the parser recognizes an `if` statement by the leading
`if` keyword.

^code match-if (1 before, 1 after)

When it finds one, it calls this new method to parse the rest:

^code if-statement

<aside name="parens">

The parentheses around the condition are only half useful. You need some kind of
delimiter *between* the condition and the then statement, otherwise the parser
can't tell when it has reached the end of the condition expression. But the
*opening* parenthesis after `if` doesn't do anything useful. Dennis Ritchie put
it there so he could use `)` as the ending delimiter without having unbalanced
parentheses.

Other languages like Lua and some BASICs use a keyword like `then` as the ending
delimiter and don't have anything before the condition. Go and Swift instead
require the statement to be a braced block. That lets them use the `{` at the
beginning of the statement to tell when the condition is done.

</aside>

As usual, the parsing code hews closely to the grammar. It detects an else
clause by looking for the preceding `else` keyword. If there isn't one, the
`elseBranch` field in the syntax tree is `null`.

That seemingly innocuous optional else has, in fact, opened up an ambiguity in
our grammar. Consider:

```lox
if (first) if (second) whenTrue(); else whenFalse();
```

Here's the riddle: Which `if` statement does that else clause belong to? This
isn't just a theoretical question about how we notate our grammar. It actually
affects how the code executes:

*   If we attach the else to the first `if` statement, then `whenFalse()` is
    called if `first` is falsey, regardless of what value `second` has.

*   If we attach it to the second `if` statement, then `whenFalse()` is only
    called if `first` is truthy and `second` is falsey.

Since else clauses are optional, and there is no explicit delimiter marking the
end of the `if` statement, the grammar is ambiguous when you nest `if`s in this
way. This classic pitfall of syntax is called the **[dangling else][]** problem.

[dangling else]: https://en.wikipedia.org/wiki/Dangling_else

<span name="else"></span>

<img class="above" src="image/control-flow/dangling-else.png" alt="Two ways the else can be interpreted." />

<aside name="else">

Here, formatting highlights the two ways the else could be parsed. But note that
since whitespace characters are ignored by the parser, this is only a guide to
the human reader.

</aside>

It *is* possible to define a context-free grammar that avoids the ambiguity
directly, but it requires splitting most of the statement rules into pairs, one
that allows an `if` with an `else` and one that doesn't. It's annoying.

Instead, most languages and parsers avoid the problem in an ad hoc way. No
matter what hack they use to get themselves out of the trouble, they always
choose the same interpretation -- the `else` is bound to the nearest `if` that
precedes it.

Our parser conveniently does that already. Since `ifStatement()` eagerly looks
for an `else` before returning, the innermost call to a nested series will claim
the else clause for itself before returning to the outer `if` statements.

Syntax in hand, we are ready to interpret.

^code visit-if

The interpreter implementation is a thin wrapper around the self-same Java code.
It evaluates the condition. If truthy, it executes the then branch. Otherwise,
if there is an else branch, it executes that.

If you compare this code to how the interpreter handles other syntax we've
implemented, the part that makes control flow special is that Java `if`
statement. Most other syntax trees always evaluate their subtrees. Here, we may
not evaluate the then or else statement. If either of those has a side effect,
the choice not to evaluate it becomes user visible.

## Logical Operators

Since we don't have the conditional operator, you might think we're done with
branching, but no. Even without the ternary operator, there are two other
operators that are technically control flow constructs -- the logical operators
`and` and `or`.

These aren't like other binary operators because they **short-circuit**. If,
after evaluating the left operand, we know what the result of the logical
expression must be, we don't evaluate the right operand. For example:

```lox
false and sideEffect();
```

For an `and` expression to evaluate to something truthy, both operands must be
truthy. We can see as soon as we evaluate the left `false` operand that that
isn't going to be the case, so there's no need to evaluate `sideEffect()` and it
gets skipped.

This is why we didn't implement the logical operators with the other binary
operators. Now we're ready. The two new operators are low in the precedence
table. Similar to `||` and `&&` in C, they each have their <span
name="logical">own</span> precedence with `or` lower than `and`. We slot them
right between `assignment` and `equality`.

<aside name="logical">

I've always wondered why they don't have the same precedence, like the various
comparison or equality operators do.

</aside>

```ebnf
expression     → assignment ;
assignment     → IDENTIFIER "=" assignment
               | logic_or ;
logic_or       → logic_and ( "or" logic_and )* ;
logic_and      → equality ( "and" equality )* ;
```

Instead of falling back to `equality`, `assignment` now cascades to `logic_or`.
The two new rules, `logic_or` and `logic_and`, are <span
name="same">similar</span> to other binary operators. Then `logic_and` calls
out to `equality` for its operands, and we chain back to the rest of the
expression rules.

<aside name="same">

The *syntax* doesn't care that they short-circuit. That's a semantic concern.

</aside>

We could reuse the existing Expr.Binary class for these two new expressions
since they have the same fields. But then `visitBinaryExpr()` would have to
check to see if the operator is one of the logical operators and use a different
code path to handle the short circuiting. I think it's cleaner to define a <span
name="logical-ast">new class</span> for these operators so that they get their
own visit method.

^code logical-ast (1 before, 1 after)

<aside name="logical-ast">

The generated code for the new node is in [Appendix II][appendix-logical].

[appendix-logical]: appendix-ii.html#logical-expression

</aside>

To weave the new expressions into the parser, we first change the parsing code
for assignment to call `or()`.

^code or-in-assignment (1 before, 2 after)

The code to parse a series of `or` expressions mirrors other binary operators.

^code or

Its operands are the next higher level of precedence, the new `and` expression.

^code and

That calls `equality()` for its operands, and with that, the expression parser
is all tied back together again. We're ready to interpret.

^code visit-logical

If you compare this to the [earlier chapter's][evaluating] `visitBinaryExpr()`
method, you can see the difference. Here, we evaluate the left operand first. We
look at its value to see if we can short-circuit. If not, and only then, do we
evaluate the right operand.

[evaluating]: evaluating-expressions.html

The other interesting piece here is deciding what actual value to return. Since
Lox is dynamically typed, we allow operands of any type and use truthiness to
determine what each operand represents. We apply similar reasoning to the
result. Instead of promising to literally return `true` or `false`, a logic
operator merely guarantees it will return a value with appropriate truthiness.

Fortunately, we have values with proper truthiness right at hand -- the results
of the operands themselves. So we use those. For example:

```lox
print "hi" or 2; // "hi".
print nil or "yes"; // "yes".
```

On the first line, `"hi"` is truthy, so the `or` short-circuits and returns
that. On the second line, `nil` is falsey, so it evaluates and returns the
second operand, `"yes"`.

That covers all of the branching primitives in Lox. We're ready to jump ahead to
loops. You see what I did there? *Jump. Ahead.* Get it? See, it's like a
reference to... oh, forget it.

## While Loops

Lox features two looping control flow statements, `while` and `for`. The `while`
loop is the simpler one, so we'll start there. Its grammar is the same as in C.

```ebnf
statement      → exprStmt
               | ifStmt
               | printStmt
               | whileStmt
               | block ;

whileStmt      → "while" "(" expression ")" statement ;
```

We add another clause to the statement rule that points to the new rule for
while. It takes a `while` keyword, followed by a parenthesized condition
expression, then a statement for the body. That new grammar rule gets a <span
name="while-ast">syntax tree node</span>.

^code while-ast (1 before, 1 after)

<aside name="while-ast">

The generated code for the new node is in [Appendix II][appendix-while].

[appendix-while]: appendix-ii.html#while-statement

</aside>

The node stores the condition and body. Here you can see why it's nice to have
separate base classes for expressions and statements. The field declarations
make it clear that the condition is an expression and the body is a statement.

Over in the parser, we follow the same process we used for `if` statements.
First, we add another case in `statement()` to detect and match the leading
keyword.

^code match-while (1 before, 1 after)

That delegates the real work to this method:

^code while-statement

The grammar is dead simple and this is a straight translation of it to Java.
Speaking of translating straight to Java, here's how we execute the new syntax:

^code visit-while

Like the visit method for `if`, this visitor uses the corresponding Java
feature. This method isn't complex, but it makes Lox much more powerful. We can
finally write a program whose running time isn't strictly bound by the length of
the source code.

## For Loops

We're down to the last control flow construct, <span name="for">Ye Olde</span>
C-style `for` loop. I probably don't need to remind you, but it looks like this:

```lox
for (var i = 0; i < 10; i = i + 1) print i;
```

In grammarese, that's:

```ebnf
statement      → exprStmt
               | forStmt
               | ifStmt
               | printStmt
               | whileStmt
               | block ;

forStmt        → "for" "(" ( varDecl | exprStmt | ";" )
                 expression? ";"
                 expression? ")" statement ;
```

<aside name="for">

Most modern languages have a higher-level looping statement for iterating over
arbitrary user-defined sequences. C# has `foreach`, Java has "enhanced for",
even C++ has range-based `for` statements now. Those offer cleaner syntax than
C's `for` statement by implicitly calling into an iteration protocol that the
object being looped over supports.

I love those. For Lox, though, we're limited by building up the interpreter a
chapter at a time. We don't have objects and methods yet, so we have no way of
defining an iteration protocol that the `for` loop could use. So we'll stick
with the old school C `for` loop. Think of it as "vintage". The fixie of control
flow statements.

</aside>

Inside the parentheses, you have three clauses separated by semicolons:

1.  The first clause is the *initializer*. It is executed exactly once, before
    anything else. It's usually an expression, but for convenience, we also
    allow a variable declaration. In that case, the variable is scoped to the
    rest of the `for` loop -- the other two clauses and the body.

2.  Next is the *condition*. As in a `while` loop, this expression controls when
    to exit the loop. It's evaluated once at the beginning of each iteration,
    including the first. If the result is truthy, it executes the loop body.
    Otherwise, it bails.

3.  The last clause is the *increment*. It's an arbitrary expression that does
    some work at the end of each loop iteration. The result of the expression is
    discarded, so it must have a side effect to be useful. In practice, it
    usually increments a variable.

Any of these clauses can be omitted. Following the closing parenthesis is a
statement for the body, which is typically a block.

### Desugaring

That's a lot of machinery, but note that none of it does anything you couldn't
do with the statements we already have. If `for` loops didn't support
initializer clauses, you could just put the initializer expression before the
`for` statement. Without an increment clause, you could simply put the increment
expression at the end of the body yourself.

In other words, Lox doesn't *need* `for` loops, they just make some common code
patterns more pleasant to write. These kinds of features are called <span
name="sugar">**syntactic sugar**</span>. For example, the previous `for` loop
could be rewritten like so:

<aside name="sugar">

This delightful turn of phrase was coined by Peter J. Landin in 1964 to describe
how some of the nice expression forms supported by languages like ALGOL were a
sweetener sprinkled over the more fundamental -- but presumably less palatable
-- lambda calculus underneath.

<img class="above" src="image/control-flow/sugar.png" alt="Slightly more than a spoonful of sugar." />

</aside>

```lox
{
  var i = 0;
  while (i < 10) {
    print i;
    i = i + 1;
  }
}
```

This script has the exact same semantics as the previous one, though it's not as
easy on the eyes. Syntactic sugar features like Lox's `for` loop make a language
more pleasant and productive to work in. But, especially in sophisticated
language implementations, every language feature that requires back-end support
and optimization is expensive.

We can have our cake and eat it too by <span
name="caramel">**desugaring**</span>. That funny word describes a process where
the front end takes code using syntax sugar and translates it to a more
primitive form that the back end already knows how to execute.

<aside name="caramel">

Oh, how I wish the accepted term for this was "caramelization". Why introduce a
metaphor if you aren't going to stick with it?

</aside>

We're going to desugar `for` loops to the `while` loops and other statements the
interpreter already handles. In our simple interpreter, desugaring really
doesn't save us much work, but it does give me an excuse to introduce you to the
technique. So, unlike the previous statements, we *won't* add a new syntax tree
node. Instead, we go straight to parsing. First, add an import we'll need soon.

^code import-arrays (1 before, 1 after)

Like every statement, we start parsing a `for` loop by matching its keyword.

^code match-for (1 before, 1 after)

Here is where it gets interesting. The desugaring is going to happen here, so
we'll build this method a piece at a time, starting with the opening parenthesis
before the clauses.

^code for-statement

The first clause following that is the initializer.

^code for-initializer (2 before, 1 after)

If the token following the `(` is a semicolon then the initializer has been
omitted. Otherwise, we check for a `var` keyword to see if it's a <span
name="variable">variable</span> declaration. If neither of those matched, it
must be an expression. We parse that and wrap it in an expression statement so
that the initializer is always of type Stmt.

<aside name="variable">

In a previous chapter, I said we can split expression and statement syntax trees
into two separate class hierarchies because there's no single place in the
grammar that allows both an expression and a statement. That wasn't *entirely*
true, I guess.

</aside>

Next up is the condition.

^code for-condition (2 before, 1 after)

Again, we look for a semicolon to see if the clause has been omitted. The last
clause is the increment.

^code for-increment (1 before, 1 after)

It's similar to the condition clause except this one is terminated by the
closing parenthesis. All that remains is the <span name="body">body</span>.

<aside name="body">

Is it just me or does that sound morbid? "All that remained... was the *body*".

</aside>

^code for-body (1 before, 1 after)

We've parsed all of the various pieces of the `for` loop and the resulting AST
nodes are sitting in a handful of Java local variables. This is where the
desugaring comes in. We take those and use them to synthesize syntax tree nodes
that express the semantics of the `for` loop, like the hand-desugared example I
showed you earlier.

The code is a little simpler if we work backward, so we start with the increment
clause.

^code for-desugar-increment (2 before, 1 after)

The increment, if there is one, executes after the body in each iteration of the
loop. We do that by replacing the body with a little block that contains the
original body followed by an expression statement that evaluates the increment.

^code for-desugar-condition (2 before, 1 after)

Next, we take the condition and the body and build the loop using a primitive
`while` loop. If the condition is omitted, we jam in `true` to make an infinite
loop.

^code for-desugar-initializer (2 before, 1 after)

Finally, if there is an initializer, it runs once before the entire loop. We do
that by, again, replacing the whole statement with a block that runs the
initializer and then executes the loop.

That's it. Our interpreter now supports C-style `for` loops and we didn't have
to touch the Interpreter class at all. Since we desugared to nodes the
interpreter already knows how to visit, there is no more work to do.

Finally, Lox is powerful enough to entertain us, at least for a few minutes.
Here's a tiny program to print the first 21 elements in the Fibonacci
sequence:

```lox
var a = 0;
var temp;

for (var b = 1; a < 10000; b = temp + b) {
  print a;
  temp = a;
  a = b;
}
```

<div class="challenges">

## Challenges

1.  A few chapters from now, when Lox supports first-class functions and dynamic
    dispatch, we technically won't *need* branching statements built into the
    language. Show how conditional execution can be implemented in terms of
    those. Name a language that uses this technique for its control flow.

2.  Likewise, looping can be implemented using those same tools, provided our
    interpreter supports an important optimization. What is it, and why is it
    necessary? Name a language that uses this technique for iteration.

3.  Unlike Lox, most other C-style languages also support `break` and `continue`
    statements inside loops. Add support for `break` statements.

    The syntax is a `break` keyword followed by a semicolon. It should be a
    syntax error to have a `break` statement appear outside of any enclosing
    loop. At runtime, a `break` statement causes execution to jump to the end of
    the nearest enclosing loop and proceeds from there. Note that the `break`
    may be nested inside other blocks and `if` statements that also need to be
    exited.

</div>

<div class="design-note">

## Design Note: Spoonfuls of Syntactic Sugar

When you design your own language, you choose how much syntactic sugar to pour
into the grammar. Do you make an unsweetened health food where each semantic
operation maps to a single syntactic unit, or some decadent dessert where every
bit of behavior can be expressed ten different ways? Successful languages
inhabit all points along this continuum.

On the extreme acrid end are those with ruthlessly minimal syntax like Lisp,
Forth, and Smalltalk. Lispers famously claim their language "has no syntax",
while Smalltalkers proudly show that you can fit the entire grammar on an index
card. This tribe has the philosophy that the *language* doesn't need syntactic
sugar. Instead, the minimal syntax and semantics it provides are powerful enough
to let library code be as expressive as if it were part of the language itself.

Near these are languages like C, Lua, and Go. They aim for simplicity and
clarity over minimalism. Some, like Go, deliberately eschew both syntactic sugar
and the kind of syntactic extensibility of the previous category. They want the
syntax to get out of the way of the semantics, so they focus on keeping both the
grammar and libraries simple. Code should be obvious more than beautiful.

Somewhere in the middle you have languages like Java, C#, and Python. Eventually
you reach Ruby, C++, Perl, and D -- languages which have stuffed so much syntax
into their grammar, they are running out of punctuation characters on the
keyboard.

To some degree, location on the spectrum correlates with age. It's relatively
easy to add bits of syntactic sugar in later releases. New syntax is a crowd
pleaser, and it's less likely to break existing programs than mucking with the
semantics. Once added, you can never take it away, so languages tend to sweeten
with time. One of the main benefits of creating a new language from scratch is
it gives you an opportunity to scrape off those accumulated layers of frosting
and start over.

Syntactic sugar has a bad rap among the PL intelligentsia. There's a real fetish
for minimalism in that crowd. There is some justification for that. Poorly
designed, unneeded syntax raises the cognitive load without adding enough
expressiveness to carry its weight. Since there is always pressure to cram new
features into the language, it takes discipline and a focus on simplicity to
avoid bloat. Once you add some syntax, you're stuck with it, so it's smart to be
parsimonious.

At the same time, most successful languages do have fairly complex grammars, at
least by the time they are widely used. Programmers spend a ton of time in their
language of choice, and a few niceties here and there really can improve the
comfort and efficiency of their work.

Striking the right balance -- choosing the right level of sweetness for your
language -- relies on your own sense of taste.

</div>


================================================
FILE: book/dedication.md
================================================
<div class="dedication">

<img src="image/ginny.png" alt="My beloved dog and her stupid face." />

To Ginny, I miss your stupid face.

</div>

================================================
FILE: book/evaluating-expressions.md
================================================
> You are my creator, but I am your master; Obey!
>
> <cite>Mary Shelley, <em>Frankenstein</em></cite>

If you want to properly set the mood for this chapter, try to conjure up a
thunderstorm, one of those swirling tempests that likes to yank open shutters at
the climax of the story. Maybe toss in a few bolts of lightning. In this
chapter, our interpreter will take breath, open its eyes, and execute some code.

<span name="spooky"></span>

<img src="image/evaluating-expressions/lightning.png" alt="A bolt of lightning strikes a Victorian mansion. Spooky!" />

<aside name="spooky">

A decrepit Victorian mansion is optional, but adds to the ambiance.

</aside>

There are all manner of ways that language implementations make a computer do
what the user's source code commands. They can compile it to machine code,
translate it to another high-level language, or reduce it to some bytecode
format for a virtual machine to run. For our first interpreter, though, we are
going to take the simplest, shortest path and execute the syntax tree itself.

Right now, our parser only supports expressions. So, to "execute" code, we will
evaluate an expression and produce a value. For each kind of expression syntax
we can parse -- literal, operator, etc. -- we need a corresponding chunk of code
that knows how to evaluate that tree and produce a result. That raises two
questions:

1. What kinds of values do we produce?

2. How do we organize those chunks of code?

Taking them on one at a time...

## Representing Values

In Lox, <span name="value">values</span> are created by literals, computed by
expressions, and stored in variables. The user sees these as *Lox* objects, but
they are implemented in the underlying language our interpreter is written in.
That means bridging the lands of Lox's dynamic typing and Java's static types. A
variable in Lox can store a value of any (Lox) type, and can even store values
of different types at different points in time. What Java type might we use to
represent that?

<aside name="value">

Here, I'm using "value" and "object" pretty much interchangeably.

Later in the C interpreter we'll make a slight distinction between them, but
that's mostly to have unique terms for two different corners of the
implementation -- in-place versus heap-allocated data. From the user's
perspective, the terms are synonymous.

</aside>

Given a Java variable with that static type, we must also be able to determine
which kind of value it holds at runtime. When the interpreter executes a `+`
operator, it needs to tell if it is adding two numbers or concatenating two
strings. Is there a Java type that can hold numbers, strings, Booleans, and
more? Is there one that can tell us what its runtime type is? There is! Good old
java.lang.Object.

In places in the interpreter where we need to store a Lox value, we can use
Object as the type. Java has boxed versions of its primitive types that all
subclass Object, so we can use those for Lox's built-in types:

<table>
<thead>
<tr>
  <td>Lox type</td>
  <td>Java representation</td>
</tr>
</thead>
<tbody>
<tr>
  <td>Any Lox value</td>
  <td>Object</td>
</tr>
<tr>
  <td><code>nil</code></td>
  <td><code>null</code></td>
</tr>
<tr>
  <td>Boolean</td>
  <td>Boolean</td>
</tr>
<tr>
  <td>number</td>
  <td>Double</td>
</tr>
<tr>
  <td>string</td>
  <td>String</td>
</tr>
</tbody>
</table>

Given a value of static type Object, we can determine if the runtime value is a
number or a string or whatever using Java's built-in `instanceof` operator. In
other words, the <span name="jvm">JVM</span>'s own object representation
conveniently gives us everything we need to implement Lox's built-in types.
We'll have to do a little more work later when we add Lox's notions of
functions, classes, and instances, but Object and the boxed primitive classes
are sufficient for the types we need right now.

<aside name="jvm">

Another thing we need to do with values is manage their memory, and Java does
that too. A handy object representation and a really nice garbage collector are
the main reasons we're writing our first interpreter in Java.

</aside>

## Evaluating Expressions

Next, we need blobs of code to implement the evaluation logic for each kind of
expression we can parse. We could stuff that code into the syntax tree classes
in something like an `interpret()` method. In effect, we could tell each syntax
tree node, "Interpret thyself". This is the Gang of Four's
[Interpreter design pattern][]. It's a neat pattern, but like I mentioned
earlier, it gets messy if we jam all sorts of logic into the tree classes.

[interpreter design pattern]: https://en.wikipedia.org/wiki/Interpreter_pattern

Instead, we're going to reuse our groovy [Visitor pattern][]. In the previous
chapter, we created an AstPrinter class. It took in a syntax tree and
recursively traversed it, building up a string which it ultimately returned.
That's almost exactly what a real interpreter does, except instead of
concatenating strings, it computes values.

[visitor pattern]: representing-code.html#the-visitor-pattern

We start with a new class.

^code interpreter-class

The class declares that it's a visitor. The return type of the visit methods
will be Object, the root class that we use to refer to a Lox value in our Java
code. To satisfy the Visitor interface, we need to define visit methods for each
of the four expression tree classes our parser produces. We'll start with the
simplest...

### Evaluating literals

The leaves of an expression tree -- the atomic bits of syntax that all other
expressions are composed of -- are <span name="leaf">literals</span>. Literals
are almost values already, but the distinction is important. A literal is a *bit
of syntax* that produces a value. A literal always appears somewhere in the
user's source code. Lots of values are produced by computation and don't exist
anywhere in the code itself. Those aren't literals. A literal comes from the
parser's domain. Values are an interpreter concept, part of the runtime's world.

<aside name="leaf">

In the [next chapter][vars], when we implement variables, we'll add identifier
expressions, which are also leaf nodes.

[vars]: statements-and-state.html

</aside>

So, much like we converted a literal *token* into a literal *syntax tree node*
in the parser, now we convert the literal tree node into a runtime value. That
turns out to be trivial.

^code visit-literal

We eagerly produced the runtime value way back during scanning and stuffed it in
the token. The parser took that value and stuck it in the literal tree node,
so to evaluate a literal, we simply pull it back out.

### Evaluating parentheses

The next simplest node to evaluate is grouping -- the node you get as a result
of using explicit parentheses in an expression.

^code visit-grouping

A <span name="grouping">grouping</span> node has a reference to an inner node
for the expression contained inside the parentheses. To evaluate the grouping
expression itself, we recursively evaluate that subexpression and return it.

We rely on this helper method which simply sends the expression back into the
interpreter's visitor implementation:

<aside name="grouping">

Some parsers don't define tree nodes for parentheses. Instead, when parsing a
parenthesized expression, they simply return the node for the inner expression.
We do create a node for parentheses in Lox because we'll need it later to
correctly handle the left-hand sides of assignment expressions.

</aside>

^code evaluate

### Evaluating unary expressions

Like grouping, unary expressions have a single subexpression that we must
evaluate first. The difference is that the unary expression itself does a little
work afterwards.

^code visit-unary

First, we evaluate the operand expression. Then we apply the unary operator
itself to the result of that. There are two different unary expressions,
identified by the type of the operator token.

Shown here is `-`, which negates the result of the subexpression. The
subexpression must be a number. Since we don't *statically* know that in Java,
we <span name="cast">cast</span> it before performing the operation. This type
cast happens at runtime when the `-` is evaluated. That's the core of what makes
a language dynamically typed right there.

<aside name="cast">

You're probably wondering what happens if the cast fails. Fear not, we'll get
into that soon.

</aside>

You can start to see how evaluation recursively traverses the tree. We can't
evaluate the unary operator itself until after we evaluate its operand
subexpression. That means our interpreter is doing a **post-order traversal** --
each node evaluates its children before doing its own work.

The other unary operator is logical not.

^code unary-bang (1 before, 1 after)

The implementation is simple, but what is this "truthy" thing about? We need to
make a little side trip to one of the great questions of Western philosophy:
*What is truth?*

### Truthiness and falsiness

OK, maybe we're not going to really get into the universal question, but at
least inside the world of Lox, we need to decide what happens when you use
something other than `true` or `false` in a logic operation like `!` or any
other place where a Boolean is expected.

We *could* just say it's an error because we don't roll with implicit
conversions, but most dynamically typed languages aren't that ascetic. Instead,
they take the universe of values of all types and partition them into two sets,
one of which they define to be "true", or "truthful", or (my favorite) "truthy",
and the rest which are "false" or "falsey". This partitioning is somewhat
arbitrary and gets <span name="weird">weird</span> in a few languages.

<aside name="weird" class="bottom">

In JavaScript, strings are truthy, but empty strings are not. Arrays are truthy
but empty arrays are... also truthy. The number `0` is falsey, but the *string*
`"0"` is truthy.

In Python, empty strings are falsey like in JS, but other empty sequences are
falsey too.

In PHP, both the number `0` and the string `"0"` are falsey. Most other
non-empty strings are truthy.

Get all that?

</aside>

Lox follows Ruby's simple rule: `false` and `nil` are falsey, and everything else
is truthy. We implement that like so:

^code is-truthy

### Evaluating binary operators

On to the last expression tree class, binary operators. There's a handful of
them, and we'll start with the arithmetic ones.

^code visit-binary

<aside name="left">

Did you notice we pinned down a subtle corner of the language semantics here?
In a binary expression, we evaluate the operands in left-to-right order. If
those operands have side effects, that choice is user visible, so this isn't
simply an implementation detail.

If we want our two interpreters to be consistent (hint: we do), we'll need to
make sure clox does the same thing.

</aside>

I think you can figure out what's going on here. The main difference from the
unary negation operator is that we have two operands to evaluate.

I left out one arithmetic operator because it's a little special.

^code binary-plus (3 before, 1 after)

The `+` operator can also be used to concatenate two strings. To handle that, we
don't just assume the operands are a certain type and *cast* them, we
dynamically *check* the type and choose the appropriate operation. This is why
we need our object representation to support `instanceof`.

<aside name="plus">

We could have defined an operator specifically for string concatenation. That's
what Perl (`.`), Lua (`..`), Smalltalk (`,`), Haskell (`++`), and others do.

I thought it would make Lox a little more approachable to use the same syntax as
Java, JavaScript, Python, and others. This means that the `+` operator is
**overloaded** to support both adding numbers and concatenating strings. Even in
languages that don't use `+` for strings, they still often overload it for
adding both integers and floating-point numbers.

</aside>

Next up are the comparison operators.

^code binary-comparison (1 before, 1 after)

They are basically the same as arithmetic. The only difference is that where the
arithmetic operators produce a value whose type is the same as the operands
(numbers or strings), the comparison operators always produce a Boolean.

The last pair of operators are equality.

^code binary-equality

Unlike the comparison operators which require numbers, the equality operators
support operands of any type, even mixed ones. You can't ask Lox if 3 is *less*
than `"three"`, but you can ask if it's <span name="equal">*equal*</span> to
it.

<aside name="equal">

Spoiler alert: it's not.

</aside>

Like truthiness, the equality logic is hoisted out into a separate method.

^code is-equal

This is one of those corners where the details of how we represent Lox objects
in terms of Java matter. We need to correctly implement *Lox's* notion of
equality, which may be different from Java's.

Fortunately, the two are pretty similar. Lox doesn't do implicit conversions in
equality and Java does not either. We do have to handle `nil`/`null` specially
so that we don't throw a NullPointerException if we try to call `equals()` on
`null`. Otherwise, we're fine. Java's <span name="nan">`equals()`</span> method
on Boolean, Double, and String have the behavior we want for Lox.

<aside name="nan">

What do you expect this to evaluate to:

```lox
(0 / 0) == (0 / 0)
```

According to [IEEE 754][], which specifies the behavior of double-precision
numbers, dividing a zero by zero gives you the special **NaN** ("not a number")
value. Strangely enough, NaN is *not* equal to itself.

In Java, the `==` operator on primitive doubles preserves that behavior, but the
`equals()` method on the Double class does not. Lox uses the latter, so doesn't
follow IEEE. These kinds of subtle incompatibilities occupy a dismaying fraction
of language implementers' lives.

[ieee 754]: https://en.wikipedia.org/wiki/IEEE_754

</aside>

And that's it! That's all the code we need to correctly interpret a valid Lox
expression. But what about an *invalid* one? In particular, what happens when a
subexpression evaluates to an object of the wrong type for the operation being
performed?

## Runtime Errors

I was cavalier about jamming casts in whenever a subexpression produces an
Object and the operator requires it to be a number or a string. Those casts can
fail. Even though the user's code is erroneous, if we want to make a <span
name="fail">usable</span> language, we are responsible for handling that error
gracefully.

<aside name="fail">

We could simply not detect or report a type error at all. This is what C does if
you cast a pointer to some type that doesn't match the data that is actually
being pointed to. C gains flexibility and speed by allowing that, but is
also famously dangerous. Once you misinterpret bits in memory, all bets are off.

Few modern languages accept unsafe operations like that. Instead, most are
**memory safe** and ensure -- through a combination of static and runtime checks
-- that a program can never incorrectly interpret the value stored in a piece of
memory.

</aside>

It's time for us to talk about **runtime errors**. I spilled a lot of ink in the
previous chapters talking about error handling, but those were all *syntax* or
*static* errors. Those are detected and reported before *any* code is executed.
Runtime errors are failures that the language semantics demand we detect and
report while the program is running (hence the name).

Right now, if an operand is the wrong type for the operation being performed,
the Java cast will fail and the JVM will throw a ClassCastException. That
unwinds the whole stack and exits the application, vomiting a Java stack trace
onto the user. That's probably not what we want. The fact that Lox is
implemented in Java should be a detail hidden from the user. Instead, we want
them to understand that a *Lox* runtime error occurred, and give them an error
message relevant to our language and their program.

The Java behavior does have one thing going for it, though. It correctly stops
executing any code when the error occurs. Let's say the user enters some
expression like:

```lox
2 * (3 / -"muffin")
```

You can't negate a <span name="muffin">muffin</span>, so we need to report a
runtime error at that inner `-` expression. That in turn means we can't evaluate
the `/` expression since it has no meaningful right operand. Likewise for the
`*`. So when a runtime error occurs deep in some expression, we need to escape
all the way out.

<aside name="muffin">

I don't know, man, *can* you negate a muffin?

<img src="image/evaluating-expressions/muffin.png" alt="A muffin, negated." />

</aside>

We could print a runtime error and then abort the process and exit the
application entirely. That has a certain melodramatic flair. Sort of the
programming language interpreter equivalent of a mic drop.

Tempting as that is, we should probably do something a little less cataclysmic.
While a runtime error needs to stop evaluating the *expression*, it shouldn't
kill the *interpreter*. If a user is running the REPL and has a typo in a line
of code, they should still be able to keep the session going and enter more code
after that.

### Detecting runtime errors

Our tree-walk interpreter evaluates nested expressions using recursive method
calls, and we need to unwind out of all of those. Throwing an exception in Java
is a fine way to accomplish that. However, instead of using Java's own cast
failure, we'll define a Lox-specific one so that we can handle it how we want.

Before we do the cast, we check the object's type ourselves. So, for unary `-`,
we add:

^code check-unary-operand (1 before, 1 after)

The code to check the operand is:

^code check-operand

When the check fails, it throws one of these:

^code runtime-error-class

Unlike the Java cast exception, our <span name="class">class</span> tracks the
token that identifies where in the user's code the runtime error came from. As
with static errors, this helps the user know where to fix their code.

<aside name="class">

I admit the name "RuntimeError" is confusing since Java defines a
RuntimeException class. An annoying thing about building interpreters is your
names often collide with ones already taken by the implementation language. Just
wait until we support Lox classes.

</aside>

We need similar checking for the binary operators. Since I promised you every
single line of code needed to implement the interpreters, I'll run through them
all.

Greater than:

^code check-greater-operand (1 before, 1 after)

Greater than or equal to:

^code check-greater-equal-operand (1 before, 1 after)

Less than:

^code check-less-operand (1 before, 1 after)

Less than or equal to:

^code check-less-equal-operand (1 before, 1 after)

Subtraction:

^code check-minus-operand (1 before, 1 after)

Division:

^code check-slash-operand (1 before, 1 after)

Multiplication:

^code check-star-operand (1 before, 1 after)

All of those rely on this validator, which is virtually the same as the unary
one:

^code check-operands

<aside name="operand">

Another subtle semantic choice: We evaluate *both* operands before checking the
type of *either*. Imagine we have a function `say()` that prints its argument
then returns it. Using that, we write:

```lox
say("left") - say("right");
```

Our interpreter prints "left" and "right" before reporting the runtime error. We
could have instead specified that the left operand is checked before even
evaluating the right.

</aside>

The last remaining operator, again the odd one out, is addition. Since `+` is
overloaded for numbers and strings, it already has code to check the types. All
we need to do is fail if neither of the two success cases match.

^code string-wrong-type (3 before, 1 after)

That gets us detecting runtime errors deep in the innards of the evaluator. The
errors are getting thrown. The next step is to write the code that catches them.
For that, we need to wire up the Interpreter class into the main Lox class that
drives it.

## Hooking Up the Interpreter

The visit methods are sort of the guts of the Interpreter class, where the real
work happens. We need to wrap a skin around them to interface with the rest of
the program. The Interpreter's public API is simply one method.

^code interpret

This takes in a syntax tree for an expression and evaluates it. If that
succeeds, `evaluate()` returns an object for the result value. `interpret()`
converts that to a string and shows it to the user. To convert a Lox value to a
string, we rely on:

^code stringify

This is another of those pieces of code like `isTruthy()` that crosses the
membrane between the user's view of Lox objects and their internal
representation in Java.

It's pretty straightforward. Since Lox was designed to be familiar to someone
coming from Java, things like Booleans look the same in both languages. The two
edge cases are `nil`, which we represent using Java's `null`, and numbers.

Lox uses double-precision numbers even for integer values. In that case, they
should print without a decimal point. Since Java has both floating point and
integer types, it wants you to know which one you're using. It tells you by
adding an explicit `.0` to integer-valued doubles. We don't care about that, so
we <span name="number">hack</span> it off the end.

<aside name="number">

Yet again, we take care of this edge case with numbers to ensure that jlox and
clox work the same. Handling weird corners of the language like this will drive
you crazy but is an important part of the job.

Users rely on these details -- either deliberately or inadvertently -- and if
the implementations aren't consistent, their program will break when they run it
on different interpreters.

</aside>

### Reporting runtime errors

If a runtime error is thrown while evaluating the expression, `interpret()`
catches it. This lets us report the error to the user and then gracefully
continue. All of our existing error reporting code lives in the Lox class, so we
put this method there too:

^code runtime-error-method

We use the token associated with the RuntimeError to tell the user what line of
code was executing when the error occurred. Even better would be to give the
user an entire call stack to show how they *got* to be executing that code. But
we don't have function calls yet, so I guess we don't have to worry about it.

After showing the error, `runtimeError()` sets this field:

^code had-runtime-error-field (1 before, 1 after)

That field plays a small but important role.

^code check-runtime-error (4 before, 1 after)

If the user is running a Lox <span name="repl">script from a file</span> and a
runtime error occurs, we set an exit code when the process quits to let the
calling process know. Not everyone cares about shell etiquette, but we do.

<aside name="repl">

If the user is running the REPL, we don't care about tracking runtime errors.
After they are reported, we simply loop around and let them input new code and
keep going.

</aside>

### Running the interpreter

Now that we have an interpreter, the Lox class can start using it.

^code interpreter-instance (1 before, 1 after)

We make the field static so that successive calls to `run()` inside a REPL
session reuse the same interpreter. That doesn't make a difference now, but it
will later when the interpreter stores global variables. Those variables should
persist throughout the REPL session.

Finally, we remove the line of temporary code from the [last chapter][] for
printing the syntax tree and replace it with this:

[last chapter]: parsing-expressions.html

^code interpreter-interpret (3 before, 1 after)

We have an entire language pipeline now: scanning, parsing, and
execution. Congratulations, you now have your very own arithmetic calculator.

As you can see, the interpreter is pretty bare bones. But the Interpreter class
and the Visitor pattern we've set up today form the skeleton that later chapters
will stuff full of interesting guts -- variables, functions, etc. Right now, the
interpreter doesn't do very much, but it's alive!

<img src="image/evaluating-expressions/skeleton.png" alt="A skeleton waving hello." />

<div class="challenges">

## Challenges

1.  Allowing comparisons on types other than numbers could be useful. The
    operators might have a reasonable interpretation for strings. Even
    comparisons among mixed types, like `3 < "pancake"` could be handy to enable
    things like ordered collections of heterogeneous types. Or it could simply
    lead to bugs and confusion.

    Would you extend Lox to support comparing other types? If so, which pairs of
    types do you allow and how do you define their ordering? Justify your
    choices and compare them to other languages.

2.  Many languages define `+` such that if *either* operand is a string, the
    other is converted to a string and the results are then concatenated. For
    example, `"scone" + 4` would yield `scone4`. Extend the code in
    `visitBinaryExpr()` to support that.

3.  What happens right now if you divide a number by zero? What do you think
    should happen? Justify your choice. How do other languages you know handle
    division by zero, and why do they make the choices they do?

    Change the implementation in `visitBinaryExpr()` to detect and report a
    runtime error for this case.

</div>

<div class="design-note">

## Design Note: Static and Dynamic Typing

Some languages, like Java, are statically typed which means type errors are
detected and reported at compile time before any code is run. Others, like Lox,
are dynamically typed and defer checking for type errors until runtime right
before an operation is attempted. We tend to consider this a black-and-white
choice, but there is actually a continuum between them.

It turns out even most statically typed languages do *some* type checks at
runtime. The type system checks most type rules statically, but inserts runtime
checks in the generated code for other operations.

For example, in Java, the *static* type system assumes a cast expression will
always safely succeed. After you cast some value, you can statically treat it as
the destination type and not get any compile errors. But downcasts can fail,
obviously. The only reason the static checker can presume that casts always
succeed without violating the language's soundness guarantees, is because the
cast is checked *at runtime* and throws an exception on failure.

A more subtle example is [covariant arrays][] in Java and C#. The static
subtyping rules for arrays allow operations that are not sound. Consider:

[covariant arrays]: https://en.wikipedia.org/wiki/Covariance_and_contravariance_(computer_science)#Covariant_arrays_in_Java_and_C.23

```java
Object[] stuff = new Integer[1];
stuff[0] = "not an int!";
```

This code compiles without any errors. The first line upcasts the Integer array
and stores it in a variable of type Object array. The second line stores a
string in one of its cells. The Object array type statically allows that
-- strings *are* Objects -- but the actual Integer array that `stuff` refers to
at runtime should never have a string in it! To avoid that catastrophe, when you
store a value in an array, the JVM does a *runtime* check to make sure it's an
allowed type. If not, it throws an ArrayStoreException.

Java could have avoided the need to check this at runtime by disallowing the
cast on the first line. It could make arrays *invariant* such that an array of
Integers is *not* an array of Objects. That's statically sound, but it prohibits
common and safe patterns of code that only read from arrays. Covariance is safe
if you never *write* to the array. Those patterns were particularly important
for usability in Java 1.0 before it supported generics. James Gosling and the
other Java designers traded off a little static safety and performance -- those
array store checks take time -- in return for some flexibility.

There are few modern statically typed languages that don't make that trade-off
*somewhere*. Even Haskell will let you run code with non-exhaustive matches. If
you find yourself designing a statically typed language, keep in mind that you
can sometimes give users more flexibility without sacrificing *too* many of the
benefits of static safety by deferring some type checks until runtime.

On the other hand, a key reason users choose statically typed languages is
because of the confidence the language gives them that certain kinds of errors
can *never* occur when their program is run. Defer too many type checks until
runtime, and you erode that confidence.

</div>


================================================
FILE: book/functions.md
================================================
> And that is also the way the human mind works -- by the compounding of old
> ideas into new structures that become new ideas that can themselves be used in
> compounds, and round and round endlessly, growing ever more remote from the
> basic earthbound imagery that is each language's soil.
>
> <cite>Douglas R. Hofstadter, <em>I Am a Strange Loop</em></cite>

This chapter marks the culmination of a lot of hard work. The previous chapters
add useful functionality in their own right, but each also supplies a piece of a
<span name="lambda">puzzle</span>. We'll take those pieces -- expressions,
statements, variables, control flow, and lexical scope -- add a couple more, and
assemble them all into support for real user-defined functions and function
calls.

<aside name="lambda">

<img src="image/functions/lambda.png" alt="A lambda puzzle." />

</aside>

## Function Calls

You're certainly familiar with C-style function call syntax, but the grammar is
more subtle than you may realize. Calls are typically to named functions like:

```lox
average(1, 2);
```

But the <span name="pascal">name</span> of the function being called isn't
actually part of the call syntax. The thing being called -- the **callee** --
can be any expression that evaluates to a function. (Well, it does have to be a
pretty *high precedence* expression, but parentheses take care of that.) For
example:

<aside name="pascal">

The name *is* part of the call syntax in Pascal. You can call only named
functions or functions stored directly in variables.

</aside>

```lox
getCallback()();
```

There are two call expressions here. The first pair of parentheses has
`getCallback` as its callee. But the second call has the entire `getCallback()`
expression as its callee. It is the parentheses following an expression that
indicate a function call. You can think of a call as sort of like a postfix
operator that starts with `(`.

This "operator" has higher precedence than any other operator, even the unary
ones. So we slot it into the grammar by having the `unary` rule bubble up to a
new `call` rule.

<span name="curry"></span>

```ebnf
unary          → ( "!" | "-" ) unary | call ;
call           → primary ( "(" arguments? ")" )* ;
```

This rule matches a primary expression followed by zero or more function calls.
If there are no parentheses, this parses a bare primary expression. Otherwise,
each call is recognized by a pair of parentheses with an optional list of
arguments inside. The argument list grammar is:

<aside name="curry">

The rule uses `*` to allow matching a series of calls like `fn(1)(2)(3)`. Code
like that isn't common in C-style languages, but it is in the family of
languages derived from ML. There, the normal way of defining a function that
takes multiple arguments is as a series of nested functions. Each function takes
one argument and returns a new function. That function consumes the next
argument, returns yet another function, and so on. Eventually, once all of the
arguments are consumed, the last function completes the operation.

This style, called **currying**, after Haskell Curry (the same guy whose first
name graces that *other* well-known functional language), is baked directly into
the language syntax so it's not as weird looking as it would be here.

</aside>

```ebnf
arguments      → expression ( "," expression )* ;
```

This rule requires at least one argument expression, followed by zero or more
other expressions, each preceded by a comma. To handle zero-argument calls, the
`call` rule itself considers the entire `arguments` production to be optional.

I admit, this seems more grammatically awkward than you'd expect for the
incredibly common "zero or more comma-separated things" pattern. There are some
sophisticated metasyntaxes that handle this better, but in our BNF and in many
language specs I've seen, it is this cumbersome.

Over in our syntax tree generator, we add a <span name="call-ast">new
node</span>.

^code call-expr (1 before, 1 after)

<aside name="call-ast">

The generated code for the new node is in [Appendix II][appendix-call].

[appendix-call]: appendix-ii.html#call-expression

</aside>

It stores the callee expression and a list of expressions for the arguments. It
also stores the token for the closing parenthesis. We'll use that token's
location when we report a runtime error caused by a function call.

Crack open the parser. Where `unary()` used to jump straight to `primary()`,
change it to call, well, `call()`.

^code unary-call (3 before, 1 after)

Its definition is:

^code call

The code here doesn't quite line up with the grammar rules. I moved a few things
around to make the code cleaner -- one of the luxuries we have with a
handwritten parser. But it's roughly similar to how we parse infix operators.
First, we parse a primary expression, the "left operand" to the call. Then, each
time we see a `(`, we call `finishCall()` to parse the call expression using the
previously parsed expression as the callee. The returned expression becomes the
new `expr` and we loop to see if the result is itself called.

<aside name="while-true">

This code would be simpler as `while (match(LEFT_PAREN))` instead of the silly
`while (true)` and `break`. Don't worry, it will make sense when we expand the
parser later to handle properties on objects.

</aside>

The code to parse the argument list is in this helper:

^code finish-call

This is more or less the `arguments` grammar rule translated to code, except
that we also handle the zero-argument case. We check for that case first by
seeing if the next token is `)`. If it is, we don't try to parse any arguments.

Otherwise, we parse an expression, then look for a comma indicating that there
is another argument after that. We keep doing that as long as we find commas
after each expression. When we don't find a comma, then the argument list must
be done and we consume the expected closing parenthesis. Finally, we wrap the
callee and those arguments up into a call AST node.

### Maximum argument counts

Right now, the loop where we parse arguments has no bound. If you want to call a
function and pass a million arguments to it, the parser would have no problem
with it. Do we want to limit that?

Other languages have various approaches. The C standard says a conforming
implementation has to support *at least* 127 arguments to a function, but
doesn't say there's any upper limit. The Java specification says a method can
accept *no more than* <span name="254">255</span> arguments.

<aside name="254">

The limit is 25*4* arguments if the method is an instance method. That's because
`this` -- the receiver of the method -- works like an argument that is
implicitly passed to the method, so it claims one of the slots.

</aside>

Our Java interpreter for Lox doesn't really need a limit, but having a maximum
number of arguments will simplify our bytecode interpreter in [Part III][]. We
want our two interpreters to be compatible with each other, even in weird corner
cases like this, so we'll add the same limit to jlox.

[part iii]: a-bytecode-virtual-machine.html

^code check-max-arity (1 before, 1 after)

Note that the code here *reports* an error if it encounters too many arguments,
but it doesn't *throw* the error. Throwing is how we kick into panic mode which
is what we want if the parser is in a confused state and doesn't know where it
is in the grammar anymore. But here, the parser is still in a perfectly valid
state -- it just found too many arguments. So it reports the error and keeps on
keepin' on.

### Interpreting function calls

We don't have any functions we can call, so it seems weird to start implementing
calls first, but we'll worry about that when we get there. First, our
interpreter needs a new import.

^code import-array-list (1 after)

As always, interpretation starts with a new visit method for our new call
expression node.

^code visit-call

First, we evaluate the expression for the callee. Typically, this expression is
just an identifier that looks up the function by its name, but it could be
anything. Then we evaluate each of the argument expressions in order and store
the resulting values in a list.

<aside name="in-order">

This is another one of those subtle semantic choices. Since argument expressions
may have side effects, the order they are evaluated could be user visible. Even
so, some languages like Scheme and C don't specify an order. This gives
compilers freedom to reorder them for efficiency, but means users may be
unpleasantly surprised if arguments aren't evaluated in the order they expect.

</aside>

Once we've got the callee and the arguments ready, all that remains is to
perform the call. We do that by casting the callee to a <span
name="callable">LoxCallable</span> and then invoking a `call()` method on it.
The Java representation of any Lox object that can be called like a function
will implement this interface. That includes user-defined functions, naturally,
but also class objects since classes are "called" to construct new instances.
We'll also use it for one more purpose shortly.

<aside name="callable">

I stuck "Lox" before the name to distinguish it from the Java standard library's
own Callable interface. Alas, all the good simple names are already taken.

</aside>

There isn't too much to this new interface.

^code callable

We pass in the interpreter in case the class implementing `call()` needs it. We
also give it the list of evaluated argument values. The implementer's job is
then to return the value that the call expression produces.

### Call type errors

Before we get to implementing LoxCallable, we need to make the visit method a
little more robust. It currently ignores a couple of failure modes that we can't
pretend won't occur. First, what happens if the callee isn't actually something
you can call? What if you try to do this:

```lox
"totally not a function"();
```

Strings aren't callable in Lox. The runtime representation of a Lox string is a
Java string, so when we cast that to LoxCallable, the JVM will throw a
ClassCastException. We don't want our interpreter to vomit out some nasty Java
stack trace and die. Instead, we need to check the type ourselves first.

^code check-is-callable (2 before, 1 after)

We still throw an exception, but now we're throwing our own exception type, one
that the interpreter knows to catch and report gracefully.

### Checking arity

The other problem relates to the function's **arity**. Arity is the fancy term
for the number of arguments a function or operation expects. Unary operators
have arity one, binary operators two, etc. With functions, the arity is
determined by the number of parameters it declares.

```lox
fun add(a, b, c) {
  print a + b + c;
}
```

This function defines three parameters, `a`, `b`, and `c`, so its arity is
three and it expects three arguments. So what if you try to call it like this:

```lox
add(1, 2, 3, 4); // Too many.
add(1, 2);       // Too few.
```

Different languages take different approaches to this problem. Of course, most
statically typed languages check this at compile time and refuse to compile the
code if the argument count doesn't match the function's arity. JavaScript
discards any extra arguments you pass. If you don't pass enough, it fills in the
missing parameters with the magic sort-of-like-null-but-not-really value
`undefined`. Python is stricter. It raises a runtime error if the argument list
is too short or too long.

I think the latter is a better approach. Passing the wrong number of arguments
is almost always a bug, and it's a mistake I do make in practice. Given that,
the sooner the implementation draws my attention to it, the better. So for Lox,
we'll take Python's approach. Before invoking the callable, we check to see if
the argument list's length matches the callable's arity.

^code check-arity (1 before, 1 after)

That requires a new method on the LoxCallable interface to ask it its arity.

^code callable-arity (1 before, 1 after)

We *could* push the arity checking into the concrete implementation of `call()`.
But, since we'll have multiple classes implementing LoxCallable, that would end
up with redundant validation spread across a few classes. Hoisting it up into
the visit method lets us do it in one place.

## Native Functions

We can theoretically call functions, but we have no functions to call yet.
Before we get to user-defined functions, now is a good time to introduce a vital
but often overlooked facet of language implementations -- <span
name="native">**native functions**</span>. These are functions that the
interpreter exposes to user code but that are implemented in the host language
(in our case Java), not the language being implemented (Lox).

Sometimes these are called **primitives**, **external functions**, or **foreign
functions**. Since these functions can be called while the user's program is
running, they form part of the implementation's runtime. A lot of programming
language books gloss over these because they aren't conceptually interesting.
They're mostly grunt work.

<aside name="native">

Curiously, two names for these functions -- "native" and "foreign" -- are
antonyms. Maybe it depends on the perspective of the person choosing the term.
If you think of yourself as "living" within the runtime's implementation (in our
case, Java) then functions written in that are "native". But if you have the
mindset of a *user* of your language, then the runtime is implemented in some
other "foreign" language.

Or it may be that "native" refers to the machine code language of the underlying
hardware. In Java, "native" methods are ones implemented in C or C++ and
compiled to native machine code.

<img src="image/functions/foreign.png" class="above" alt="All a matter of perspective." />

</aside>

But when it comes to making your language actually good at doing useful stuff,
the native functions your implementation provides are key. They provide access
to the fundamental services that all programs are defined in terms of. If you
don't provide native functions to access the file system, a user's going to have
a hell of a time writing a program that reads and <span
name="print">displays</span> a file.

<aside name="print">

A classic native function almost every language provides is one to print text to
stdout. In Lox, I made `print` a built-in statement so that we could get stuff
on screen in the chapters before this one.

Once we have functions, we could simplify the language by tearing out the old
print syntax and replacing it with a native function. But that would mean that
examples early in the book wouldn't run on the interpreter from later chapters
and vice versa. So, for the book, I'll leave it alone.

If you're building an interpreter for your *own* language, though, you may want
to consider it.

</aside>

Many languages also allow users to provide their own native functions. The
mechanism for doing so is called a **foreign function interface** (**FFI**),
**native extension**, **native interface**, or something along those lines.
These are nice because they free the language implementer from providing access
to every single capability the underlying platform supports. We won't define an
FFI for jlox, but we will add one native function to give you an idea of what it
looks like.

### Telling time

When we get to [Part III][] and start working on a much more efficient
implementation of Lox, we're going to care deeply about performance. Performance
work requires measurement, and that in turn means **benchmarks**. These are
programs that measure the time it takes to exercise some corner of the
interpreter.

We could measure the time it takes to start up the interpreter, run the
benchmark, and exit, but that adds a lot of overhead -- JVM startup time, OS
shenanigans, etc. That stuff does matter, of course, but if you're just trying
to validate an optimization to some piece of the interpreter, you don't want
that overhead obscuring your results.

A nicer solution is to have the benchmark script itself measure the time elapsed
between two points in the code. To do that, a Lox program needs to be able to
tell time. There's no way to do that now -- you can't implement a useful clock
"from scratch" without access to the underlying clock on the computer.

So we'll add `clock()`, a native function that returns the number of seconds
that have passed since some fixed point in time. The difference between two
successive invocations tells you how much time elapsed between the two calls.
This function is defined in the global scope, so let's ensure the interpreter
has access to that.

^code global-environment (2 before, 2 after)

The `environment` field in the interpreter changes as we enter and exit local
scopes. It tracks the *current* environment. This new `globals` field holds a
fixed reference to the outermost global environment.

When we instantiate an Interpreter, we stuff the native function in that global
scope.

^code interpreter-constructor (2 before, 1 after)

This defines a <span name="lisp-1">variable</span> named "clock". Its value is a
Java anonymous class that implements LoxCallable. The `clock()` function takes
no arguments, so its arity is zero. The implementation of `call()` calls the
corresponding Java function and converts the result to a double value in
seconds.

<aside name="lisp-1">

In Lox, functions and variables occupy the same namespace. In Common Lisp, the
two live in their own worlds. A function and variable with the same name don't
collide. If you call the name, it looks up the function. If you refer to it, it
looks up the variable. This does require jumping through some hoops when you do
want to refer to a function as a first-class value.

Richard P. Gabriel and Kent Pitman coined the terms "Lisp-1" to refer to
languages like Scheme that put functions and variables in the same namespace,
and "Lisp-2" for languages like Common Lisp that partition them. Despite being
totally opaque, those names have since stuck. Lox is a Lisp-1.

</aside>

If we wanted to add other native functions -- reading input from the user,
working with files, etc. -- we could add them each as their own anonymous class
that implements LoxCallable. But for the book, this one is really all we need.

Let's get ourselves out of the function-defining business and let our users
take over...

## Function Declarations

We finally get to add a new production to the `declaration` rule we introduced
back when we added variables. Function declarations, like variables, bind a new
<span name="name">name</span>. That means they are allowed only in places where
a declaration is permitted.

<aside name="name">

A named function declaration isn't really a single primitive operation. It's
syntactic sugar for two distinct steps: (1) creating a new function object, and
(2) binding a new variable to it. If Lox had syntax for anonymous functions, we
wouldn't need function declaration statements. You could just do:

```lox
var add = fun (a, b) {
  print a + b;
};
```

However, since named functions are the common case, I went ahead and gave Lox
nice syntax for them.

</aside>

```ebnf
declaration    → funDecl
               | varDecl
               | statement ;
```

The updated `declaration` rule references this new rule:

```ebnf
funDecl        → "fun" function ;
function       → IDENTIFIER "(" parameters? ")" block ;
```

The main `funDecl` rule uses a separate helper rule `function`. A function
*declaration statement* is the `fun` keyword followed by the actual function-y
stuff. When we get to classes, we'll reuse that `function` rule for declaring
methods. Those look similar to function declarations, but aren't preceded by
<span name="fun">`fun`</span>.

<aside name="fun">

Methods are too classy to have fun.

</aside>

The function itself is a name followed by the parenthesized parameter list and
the body. The body is always a braced block, using the same grammar rule that
block statements use. The parameter list uses this rule:

```ebnf
parameters     → IDENTIFIER ( "," IDENTIFIER )* ;
```

It's like the earlier `arguments` rule, except that each parameter is an
identifier, not an expression. That's a lot of new syntax for the parser to chew
through, but the resulting AST <span name="fun-ast">node</span> isn't too bad.

^code function-ast (1 before, 1 after)

<aside name="fun-ast">

The generated code for the new node is in [Appendix II][appendix-fun].

[appendix-fun]: appendix-ii.html#function-statement

</aside>

A function node has a name, a list of parameters (their names), and then the
body. We store the body as the list of statements contained inside the curly
braces.

Over in the parser, we weave in the new declaration.

^code match-fun (1 before, 1 after)

Like other statements, a function is recognized by the leading keyword. When we
encounter `fun`, we call `function`. That corresponds to the `function` grammar
rule since we already matched and consumed the `fun` keyword. We'll build the
method up a piece at a time, starting with this:

^code parse-function

Right now, it only consumes the identifier token for the function's name. You
might be wondering about that funny little `kind` parameter. Just like we reuse
the grammar rule, we'll reuse the `function()` method later to parse methods
inside classes. When we do that, we'll pass in "method" for `kind` so that the
error messages are specific to the kind of declaration being parsed.

Next, we parse the parameter list and the pair of parentheses wrapped around it.

^code parse-parameters (1 before, 1 after)

This is like the code for handling arguments in a call, except not split out
into a helper method. The outer `if` statement handles the zero parameter case,
and the inner `while` loop parses parameters as long as we find commas to
separate them. The result is the list of tokens for each parameter's name.

Just like we do with arguments to function calls, we validate at parse time
that you don't exceed the maximum number of parameters a function is allowed to
have.

Finally, we parse the body and wrap it all up in a function node.

^code parse-body (1 before, 1 after)

Note that we consume the `{` at the beginning of the body here before calling
`block()`. That's because `block()` assumes the brace token has already been
matched. Consuming it here lets us report a more precise error message if the
`{` isn't found since we know it's in the context of a function declaration.

## Function Objects

We've got some syntax parsed so usually we're ready to interpret, but first we
need to think about how to represent a Lox function in Java. We need to keep
track of the parameters so that we can bind them to argument values when the
function is called. And, of course, we need to keep the code for the body of the
function so that we can execute it.

That's basically what the Stmt.Function class is. Could we just use that?
Almost, but not quite. We also need a class that implements LoxCallable so that
we can call it. We don't want the runtime phase of the interpreter to bleed into
the front end's syntax classes so we don't want Stmt.Function itself to
implement that. Instead, we wrap it in a new class.

^code lox-function

We implement the `call()` of LoxCallable like so:

^code function-call

This handful of lines of code is one of the most fundamental, powerful pieces of
our interpreter. As we saw in [the chapter on statements and <span
name="env">state</span>][statements], managing name environments is a core part
of a language implementation. Functions are deeply tied to that.

[statements]: statements-and-state.html

<aside name="env">

We'll dig even deeper into environments in the [next chapter][].

[next chapter]: resolving-and-binding.html

</aside>

Parameters are core to functions, especially the fact that a function
*encapsulates* its parameters -- no other code outside of the function can see
them. This means each function gets its own environment where it stores those
variables.

Further, this environment must be created dynamically. Each function *call* gets
its own environment. Otherwise, recursion would break. If there are multiple
calls to the same function in play at the same time, each needs its *own*
environment, even though they are all calls to the same function.

For example, here's a convoluted way to count to three:

```lox
fun count(n) {
  if (n > 1) count(n - 1);
  print n;
}

count(3);
```

Imagine we pause the interpreter right at the point where it's about to print 1
in the innermost nested call. The outer calls to print 2 and 3 haven't printed
their values yet, so there must be environments somewhere in memory that still
store the fact that `n` is bound to 3 in one context, 2 in another, and 1 in the
innermost, like:

<img src="image/functions/recursion.png" alt="A separate environment for each recursive call." />

That's why we create a new environment at each *call*, not at the function
*declaration*. The `call()` method we saw earlier does that. At the beginning of
the call, it creates a new environment. Then it walks the parameter and argument
lists in lockstep. For each pair, it creates a new variable with the parameter's
name and binds it to the argument's value.

So, for a program like this:

```lox
fun add(a, b, c) {
  print a + b + c;
}

add(1, 2, 3);
```

At the point of the call to `add()`, the interpreter creates something like
this:

<img src="image/functions/binding.png" alt="Binding arguments to their parameters." />

Then `call()` tells the interpreter to execute the body of the function in this
new function-local environment. Up until now, the current environment was the
environment where the function was being called. Now, we teleport from there
inside the new parameter space we've created for the function.

This is all that's required to pass data into the function. By using different
environments when we execute the body, calls to the same function with the
same code can produce different results.

Once the body of the function has finished executing, `executeBlock()` discards
that function-local environment and restores the previous one that was active
back at the callsite. Finally, `call()` returns `null`, which returns `nil` to
the caller. (We'll add return values later.)

Mechanically, the code is pretty simple. Walk a couple of lists. Bind some new
variables. Call a method. But this is where the crystalline *code* of the
function declaration becomes a living, breathing *invocation*. This is one of my
favorite snippets in this entire book. Feel free to take a moment to meditate on
it if you're so inclined.

Done? OK. Note when we bind the parameters, we assume the parameter and argument
lists have the same length. This is safe because `visitCallExpr()` checks the
arity before calling `call()`. It relies on the function reporting its arity to
do that.

^code function-arity

That's most of our object representation. While we're in here, we may as well
implement `toString()`.

^code function-to-string

This gives nicer output if a user decides to print a function value.

```lox
fun add(a, b) {
  print a + b;
}

print add; // "<fn add>".
```

### Interpreting function declarations

We'll come back and refine LoxFunction soon, but that's enough to get started.
Now we can visit a function declaration.

^code visit-function

This is similar to how we interpret other literal expressions. We take a
function *syntax node* -- a compile-time representation of the function -- and
convert it to its runtime representation. Here, that's a LoxFunction that wraps
the syntax node.

Function declarations are different from other literal nodes in that the
declaration *also* binds the resulting object to a new variable. So, after
creating the LoxFunction, we create a new binding in the current environment and
store a reference to it there.

With that, we can define and call our own functions all within Lox. Give it a
try:

```lox
fun sayHi(first, last) {
  print "Hi, " + first + " " + last + "!";
}

sayHi("Dear", "Reader");
```

I don't know about you, but that looks like an honest-to-God programming
language to me.

## Return Statements

We can get data into functions by passing parameters, but we've got no way to
get results back <span name="hotel">*out*</span>. If Lox were an
expression-oriented language like Ruby or Scheme, the body would be an
expression whose value is implicitly the function's result. But in Lox, the body
of a function is a list of statements which don't produce values, so we need
dedicated syntax for emitting a result. In other words, `return` statements. I'm
sure you can guess the grammar already.

<aside name="hotel">

The Hotel California of data.

</aside>

```ebnf
statement      → exprStmt
               | forStmt
               | ifStmt
               | printStmt
               | returnStmt
               | whileStmt
               | block ;

returnStmt     → "return" expression? ";" ;
```

We've got one more -- the final, in fact -- production under the venerable
`statement` rule. A `return` statement is the `return` keyword followed by an
optional expression and terminated with a semicolon.

The return value is optional to support exiting early from a function that
doesn't return a useful value. In statically typed languages, "void" functions
don't return a value and non-void ones do. Since Lox is dynamically typed, there
are no true void functions. The compiler has no way of preventing you from
taking the result value of a call to a function that doesn't contain a `return`
statement.

```lox
fun procedure() {
  print "don't return anything";
}

var result = procedure();
print result; // ?
```

This means every Lox function must return *something*, even if it contains no
`return` statements at all. We use `nil` for this, which is why LoxFunction's
implementation of `call()` returns `null` at the end. In that same vein, if you
omit the value in a `return` statement, we simply treat it as equivalent to:

```lox
return nil;
```

Over in our AST generator, we add a <span name="return-ast">new node</span>.

^code return-ast (1 before, 1 after)

<aside name="return-ast">

The generated code for the new node is in [Appendix II][appendix-return].

[appendix-return]: appendix-ii.html#return-statement

</aside>

It keeps the `return` keyword token so we can use its location for error
reporting, and the value being returned, if any. We parse it like other
statements, first by recognizing the initial keyword.

^code match-return (1 before, 1 after)

That branches out to:

^code parse-return-statement

After snagging the previously consumed `return` keyword, we look for a value
expression. Since many different tokens can potentially start an expression,
it's hard to tell if a return value is *present*. Instead, we check if it's
*absent*. Since a semicolon can't begin an expression, if the next token is
that, we know there must not be a value.

### Returning from calls

Interpreting a `return` statement is tricky. You can return from anywhere within
the body of a function, even deeply nested inside other statements. When the
return is executed, the interpreter needs to jump all the way out of whatever
context it's currently in and cause the function call to complete, like some
kind of jacked up control flow construct.

For example, say we're running this program and we're about to execute the
`return` statement:

```lox
fun count(n) {
  while (n < 100) {
    if (n == 3) return n; // <--
    print n;
    n = n + 1;
  }
}

count(1);
```

The Java call stack currently looks roughly like this:

```text
Interpreter.visitReturnStmt()
Interpreter.visitIfStmt()
Interpreter.executeBlock()
Interpreter.visitBlockStmt()
Interpreter.visitWhileStmt()
Interpreter.executeBlock()
LoxFunction.call()
Interpreter.visitCallExpr()
```

We need to get from the top of the stack all the way back to `call()`. I don't
know about you, but to me that sounds like exceptions. When we execute a
`return` statement, we'll use an exception to unwind the interpreter past the
visit methods of all of the containing statements back to the code that began
executing the body.

The visit method for our new AST node looks like this:

^code visit-return

If we have a return value, we evaluate it, otherwise, we use `nil`. Then we take
that value and wrap it in a custom exception class and throw it.

^code return-exception

This class wraps the return value with the accoutrements Java requires for a
runtime exception class. The weird super constructor call with those `null` and
`false` arguments disables some JVM machinery that we don't need. Since we're
using our exception class for <span name="exception">control flow</span> and not
actual error handling, we don't need overhead like stack traces.

<aside name="exception">

For the record, I'm not generally a fan of using exceptions for control flow.
But inside a heavily recursive tree-walk interpreter, it's the way to go. Since
our own syntax tree evaluation is so heavily tied to the Java call stack, we're
pressed to do some heavyweight call stack manipulation occasionally, and
exceptions are a handy tool for that.

</aside>

We want this to unwind all the way to where the function call began, the
`call()` method in LoxFunction.

^code catch-return (3 before, 1 after)

We wrap the call to `executeBlock()` in a try-catch block. When it catches a
return exception, it pulls out the value and makes that the return value from
`call()`. If it never catches one of these exceptions, it means the function
reached the end of its body without hitting a `return` statement. In that case,
it implicitly returns `nil`.

Let's try it out. We finally have enough power to support this classic
example -- a recursive function to calculate Fibonacci numbers:

<span name="slow"></span>

```lox
fun fib(n) {
  if (n <= 1) return n;
  return fib(n - 2) + fib(n - 1);
}

for (var i = 0; i < 20; i = i + 1) {
  print fib(i);
}
```

This tiny program exercises almost every language feature we have spent the past
several chapters implementing -- expressions, arithmetic, branching, looping,
variables, functions, function calls, parameter binding, and returns.

<aside name="slow">

You might notice this is pretty slow. Obviously, recursion isn't the most
efficient way to calculate Fibonacci numbers, but as a microbenchmark, it does
a good job of stress testing how fast our interpreter implements function calls.

As you can see, the answer is "not very fast". That's OK. Our C interpreter will
be faster.

</aside>

## Local Functions and Closures

Our functions are pretty full featured, but there is one hole to patch. In fact,
it's a big enough gap that we'll spend most of the [next chapter][] sealing it
up, but we can get started here.

LoxFunction's implementation of `call()` creates a new environment where it
binds the function's parameters. When I showed you that code, I glossed over one
important point: What is the *parent* of that environment?

Right now, it is always `globals`, the top-level global environment. That way,
if an identifier isn't defined inside the function body itself, the interpreter
can look outside the function in the global scope to find it. In the Fibonacci
example, that's how the interpreter is able to look up the recursive call to
`fib` inside the function's own body -- `fib` is a global variable.

But recall that in Lox, function declarations are allowed *anywhere* a name can
be bound. That includes the top level of a Lox script, but also the inside of
blocks or other functions. Lox supports **local functions** that are defined
inside another function, or nested inside a block.

Consider this classic example:

```lox
fun makeCounter() {
  var i = 0;
  fun count() {
    i = i + 1;
    print i;
  }

  return count;
}

var counter = makeCounter();
counter(); // "1".
counter(); // "2".
```

Here, `count()` uses `i`, which is declared outside of itself in the containing
function `makeCounter()`. `makeCounter()` returns a reference to the `count()`
function and then its own body finishes executing completely.

Meanwhile, the top-level code invokes the returned `count()` function. That
executes the body of `count()`, which assigns to and reads `i`, even though the
function where `i` was defined has already exited.

If you've never encountered a language with nested functions before, this might
seem crazy, but users do expect it to work. Alas, if you run it now, you get an
undefined variable error in the call to `counter()` when the body of `count()`
tries to look up `i`. That's because the environment chain in effect looks like
this:

<img src="image/functions/global.png" alt="The environment chain from count()'s body to the global scope." />

When we call `count()` (through the reference to it stored in `counter`), we
create a new empty environment for the function body. The parent of that is the
global environment. We lost the environment for `makeCounter()` where `i` is
bound.

Let's go back in time a bit. Here's what the environment chain looked like right
when we declared `count()` inside the body of `makeCounter()`:

<img src="image/functions/body.png" alt="The environment chain inside the body of makeCounter()." />

So at the point where the function is declared, we can see `i`. But when we
return from `makeCounter()` and exit its body, the interpreter discards that
environment. Since the interpreter doesn't keep the environment surrounding
`count()` around, it's up to the function object itself to hang on to it.

This data structure is called a <span name="closure">**closure**</span> because
it "closes over" and holds on to the surrounding variables where the function is
declared. Closures have been around since the early Lisp days, and language
hackers have come up with all manner of ways to implement them. For jlox, we'll
do the simplest thing that works. In LoxFunction, we add a field to store an
environment.

<aside name="closure">

"Closure" is yet another term coined by Peter J. Landin. I assume before he came
along that computer scientists communicated with each other using only primitive
grunts and pawing hand gestures.

</aside>

^code closure-field (1 before, 1 after)

We initialize that in the constructor.

^code closure-constructor (1 after)

When we create a LoxFunction, we capture the current environment.

^code visit-closure (1 before, 1 after)

This is the environment that is active when the function is *declared* not when
it's *called*, which is what we want. It represents the lexical scope
surrounding the function declaration. Finally, when we call the function, we use
that environment as the call's parent instead of going straight to `globals`.

^code call-closure (1 before, 1 after)

This creates an environment chain that goes from the function's body out through
the environments where the function is declared, all the way out to the global
scope. The runtime environment chain matches the textual nesting of the source
code like we want. The end result when we call that function looks like this:

<img src="image/functions/closure.png" alt="The environment chain with the closure." />

Now, as you can see, the interpreter can still find `i` when it needs to because
it's in the middle of the environment chain. Try running that `makeCounter()`
example now. It works!

Functions let us abstract over, reuse, and compose code. Lox is much more
powerful than the rudimentary arithmetic calculator it used to be. Alas, in our
rush to cram closures in, we have let a tiny bit of dynamic scoping leak into
the interpreter. In the [next chapter][], we will explore deeper into lexical
scope and close that hole.

[next chapter]: resolving-and-binding.html

<div class="challenges">

## Challenges

1.  Our interpreter carefully checks that the number of arguments passed to a
    function matches the number of parameters it expects. Since this check is
    done at runtime on every call, it has a performance cost. Smalltalk
    implementations don't have that problem. Why not?

1.  Lox's function declaration syntax performs two independent operations. It
    creates a function and also binds it to a name. This improves usability for
    the common case where you do want to associate a name with the function.
    But in functional-styled code, you often want to create a function to
    immediately pass it to some other function or return it. In that case, it
    doesn't need a name.

    Languages that encourage a functional style usually support **anonymous
    functions** or **lambdas** -- an expression syntax that creates a function
    without binding it to a name. Add anonymous function syntax to Lox so that
    this works:

    ```lox
    fun thrice(fn) {
      for (var i = 1; i <= 3; i = i + 1) {
        fn(i);
      }
    }

    thrice(fun (a) {
      print a;
    });
    // "1".
    // "2".
    // "3".
    ```

    How do you handle the tricky case of an anonymous function expression
    occurring in an expression statement:

    ```lox
    fun () {};
    ```

1.  Is this program valid?

    ```lox
    fun scope(a) {
      var a = "local";
    }
    ```

    In other words, are a function's parameters in the *same* scope as its local
    variables, or in an outer scope? What does Lox do? What about other
    languages you are familiar with? What do you think a language *should* do?

</div>


================================================
FILE: book/garbage-collection.md
================================================
> I wanna, I wanna,<br />
> I wanna, I wanna,<br />
> I wanna be trash.<br />
>
> <cite>The Whip, &ldquo;Trash&rdquo;</cite>

We say Lox is a "high-level" language because it frees programmers from worrying
about details irrelevant to the problem they're solving. The user becomes an
executive, giving the machine abstract goals and letting the lowly computer
figure out how to get there.

Dynamic memory allocation is a perfect candidate for automation. It's necessary
for a working program, tedious to do by hand, and yet still error-prone. The
inevitable mistakes can be catastrophic, leading to crashes, memory corruption,
or security violations. It's the kind of risky-yet-boring work that machines
excel at over humans.

This is why Lox is a **managed language**, which means that the language
implementation manages memory allocation and freeing on the user's behalf. When
a user performs an operation that requires some dynamic memory, the VM
automatically allocates it. The programmer never worries about deallocating
anything. The machine ensures any memory the program is using sticks around as
long as needed.

Lox provides the illusion that the computer has an infinite amount of memory.
Users can allocate and allocate and allocate and never once think about where
all these bytes are coming from. Of course, computers do not yet *have* infinite
memory. So the way managed languages maintain this illusion is by going behind
the programmer's back and reclaiming memory that the program no longer needs.
The component that does this is called a **garbage <span
name="recycle">collector</span>**.

<aside name="recycle">

Recycling would really be a better metaphor for this. The GC doesn't *throw
away* the memory, it reclaims it to be reused for new data. But managed
languages are older than Earth Day, so the inventors went with the analogy they
knew.

<img src="image/garbage-collection/recycle.png" class="above" alt="A recycle bin full of bits." />

</aside>

## Reachability

This raises a surprisingly difficult question: how does a VM tell what memory is
*not* needed? Memory is only needed if it is read in the future, but short of
having a time machine, how can an implementation tell what code the program
*will* execute and which data it *will* use? Spoiler alert: VMs cannot travel
into the future. Instead, the language makes a <span
name="conservative">conservative</span> approximation: it considers a piece of
memory to still be in use if it *could possibly* be read in the future.

<aside name="conservative">

I'm using "conservative" in the general sense. There is such a thing as a
"conservative garbage collector" which means something more specific. All
garbage collectors are "conservative" in that they keep memory alive if it
*could* be accessed, instead of having a Magic 8-Ball that lets them more
precisely know what data *will* be accessed.

A **conservative GC** is a special kind of collector that considers any piece of
memory to be a pointer if the value in there looks like it could be an address.
This is in contrast to a **precise GC** -- which is what we'll implement -- that
knows exactly which words in memory are pointers and which store other kinds of
values like numbers or strings.

</aside>

That sounds *too* conservative. Couldn't *any* bit of memory potentially be
read? Actually, no, at least not in a memory-safe language like Lox. Here's an
example:

```lox
var a = "first value";
a = "updated";
// GC here.
print a;
```

Say we run the GC after the assignment has completed on the second line. The
string "first value" is still sitting in memory, but there is no way for the
user's program to ever get to it. Once `a` got reassigned, the program lost any
reference to that string. We can safely free it. A value is **reachable** if
there is some way for a user program to reference it. Otherwise, like the string
"first value" here, it is **unreachable**.

Many values can be directly accessed by the VM. Take a look at:

```lox
var global = "string";
{
  var local = "another";
  print global + local;
}
```

Pause the program right after the two strings have been concatenated but before
the `print` statement has executed. The VM can reach `"string"` by looking
through the global variable table and finding the entry for `global`. It can
find `"another"` by walking the value stack and hitting the slot for the local
variable `local`. It can even find the concatenated string `"stringanother"`
since that temporary value is also sitting on the VM's stack at the point when
we paused our program.

All of these values are called **roots**. A root is any object that the VM can
reach directly without going through a reference in some other object. Most
roots are global variables or on the stack, but as we'll see, there are a couple
of other places the VM stores references to objects that it can find.

Other values can be found by going through a reference inside another value.
<span name="class">Fields</span> on instances of classes are the most obvious
case, but we don't have those yet. Even without those, our VM still has indirect
references. Consider:

<aside name="class">

We'll get there [soon][classes], though!

[classes]: classes-and-instances.html

</aside>

```lox
fun makeClosure() {
  var a = "data";

  fun f() { print a; }
  return f;
}

{
  var closure = makeClosure();
  // GC here.
  closure();
}
```

Say we pause the program on the marked line and run the garbage collector. When
the collector is done and the program resumes, it will call the closure, which
will in turn print `"data"`. So the collector needs to *not* free that string.
But here's what the stack looks like when we pause the program:

<img src="image/garbage-collection/stack.png" alt="The stack, containing only the script and closure." />

The `"data"` string is nowhere on it. It has already been hoisted off the stack
and moved into the closed upvalue that the closure uses. The closure itself is
on the stack. But to get to the string, we need to trace through the closure and
its upvalue array. Since it *is* possible for the user's program to do that, all
of these indirectly accessible objects are also considered reachable.

<img src="image/garbage-collection/reachable.png" class="wide" alt="All of the referenced objects from the closure, and the path to the 'data' string from the stack." />

This gives us an inductive definition of reachability:

*   All roots are reachable.

*   Any object referred to from a reachable object is itself reachable.

These are the values that are still "live" and need to stay in memory. Any value
that *doesn't* meet this definition is fair game for the collector to reap.
That recursive pair of rules hints at a recursive algorithm we can use to free
up unneeded memory:

1.  Starting with the roots, traverse through object references to find the
    full set of reachable objects.

2.  Free all objects *not* in that set.

Many <span name="handbook">different</span> garbage collection algorithms are in
use today, but they all roughly follow that same structure. Some may interleave
the steps or mix them, but the two fundamental operations are there. They mostly
differ in *how* they perform each step.

<aside name="handbook">

If you want to explore other GC algorithms,
[*The Garbage Collection Handbook*][gc book] (Jones, et al.) is the canonical
reference. For a large book on such a deep, narrow topic, it is quite enjoyable
to read. Or perhaps I have a strange idea of fun.

[gc book]: http://gchandbook.org/

</aside>

## Mark-Sweep Garbage Collection

The first managed language was Lisp, the second "high-level" language to be
invented, right after Fortran. John McCarthy considered using manual memory
management or reference counting, but <span
name="procrastination">eventually</span> settled on (and coined) garbage
collection -- once the program was out of memory, it would go back and find
unused storage it could reclaim.

<aside name="procrastination">

In John McCarthy's "History of Lisp", he notes: "Once we decided on garbage
collection, its actual implementation could be postponed, because only toy
examples were being done." Our choice to procrastinate adding the GC to clox
follows in the footsteps of giants.

</aside>

He designed the very first, simplest garbage collection algorithm, called
**mark-and-sweep** or just **mark-sweep**. Its description fits in three short
paragraphs in the initial paper on Lisp. Despite its age and simplicity, the
same fundamental algorithm underlies many modern memory managers. Some corners
of CS seem to be timeless.

As the name implies, mark-sweep works in two phases:

*   **Marking:** We start with the roots and traverse or <span
    name="trace">*trace*</span> through all of the objects those roots refer to.
    This is a classic graph traversal of all of the reachable objects. Each time
    we visit an object, we *mark* it in some way. (Implementations differ in how
    they record the mark.)

*   **Sweeping:** Once the mark phase completes, every reachable object
    in the heap has been marked. That means any unmarked object is unreachable and
    ripe for reclamation. We go through all the unmarked objects and free each
    one.

It looks something like this:

<img src="image/garbage-collection/mark-sweep.png" class="wide" alt="Starting from a graph of objects, first the reachable ones are marked, the remaining are swept, and then only the reachable remain." />

<aside name="trace">

A **tracing garbage collector** is any algorithm that traces through the graph
of object references. This is in contrast with reference counting, which has a
different strategy for tracking the reachable objects.

</aside>

That's what we're gonna implement. Whenever we decide it's time to reclaim some
bytes, we'll trace everything and mark all the reachable objects, free what
didn't get marked, and then resume the user's program.

### Collecting garbage

This entire chapter is about implementing this one <span
name="one">function</span>:

<aside name="one">

Of course, we'll end up adding a bunch of helper functions too.

</aside>

^code collect-garbage-h (1 before, 1 after)

We'll work our way up to a full implementation starting with this empty shell:

^code collect-garbage

The first question you might ask is, When does this function get called? It
turns out that's a subtle question that we'll spend some time on later in the
chapter. For now we'll sidestep the issue and build ourselves a handy diagnostic
tool in the process.

^code define-stress-gc (1 before, 2 after)

We'll add an optional "stress test" mode for the garbage collector. When this
flag is defined, the GC runs as often as it possibly can. This is, obviously,
horrendous for performance. But it's great for flushing out memory management
bugs that occur only when a GC is triggered at just the right moment. If *every*
moment triggers a GC, you're likely to find those bugs.

^code call-collect (1 before, 1 after)

Whenever we call `reallocate()` to acquire more memory, we force a collection to
run. The if check is because `reallocate()` is also called to free or shrink an
allocation. We don't want to trigger a GC for that -- in particular because the
GC itself will call `reallocate()` to free memory.

Collecting right before <span name="demand">allocation</span> is the classic way
to wire a GC into a VM. You're already calling into the memory manager, so it's
an easy place to hook in the code. Also, allocation is the only time when you
really *need* some freed up memory so that you can reuse it. If you *don't* use
allocation to trigger a GC, you have to make sure every possible place in code
where you can loop and allocate memory also has a way to trigger the collector.
Otherwise, the VM can get into a starved state where it needs more memory but
never collects any.

<aside name="demand">

More sophisticated collectors might run on a separate thread or be interleaved
periodically during program execution -- often at function call boundaries or
when a backward jump occurs.

</aside>

### Debug logging

While we're on the subject of diagnostics, let's put some more in. A real
challenge I've found with garbage collectors is that they are opaque. We've been
running lots of Lox programs just fine without any GC *at all* so far. Once we
add one, how do we tell if it's doing anything useful? Can we tell only if we
write programs that plow through acres of memory? How do we debug that?

An easy way to shine a light into the GC's inner workings is with some logging.

^code define-log-gc (1 before, 2 after)

When this is enabled, clox prints information to the console when it does
something with dynamic memory.

We need a couple of includes.

^code debug-log-includes (1 before, 2 after)

We don't have a collector yet, but we can start putting in some of the logging
now. We'll want to know when a collection run starts.

^code log-before-collect (1 before, 1 after)

Eventually we will log some other operations during the collection, so we'll
also want to know when the show's over.

^code log-after-collect (2 before, 1 after)

We don't have any code for the collector yet, but we do have functions for
allocating and freeing, so we can instrument those now.

^code debug-log-allocate (1 before, 1 after)

And at the end of an object's lifespan:

^code log-free-object (1 before, 1 after)

With these two flags, we should be able to see that we're making progress as we
work through the rest of the chapter.

## Marking the Roots

Objects are scattered across the heap like stars in the inky night sky. A
reference from one object to another forms a connection, and these
constellations are the graph that the mark phase traverses. Marking begins at
the roots.

^code call-mark-roots (3 before, 2 after)

Most roots are local variables or temporaries sitting right in the VM's stack,
so we start by walking that.

^code mark-roots

To mark a Lox value, we use this new function:

^code mark-value-h (1 before, 1 after)

Its implementation is here:

^code mark-value

Some Lox values -- numbers, Booleans, and `nil` -- are stored directly inline in
Value and require no heap allocation. The garbage collector doesn't need to
worry about them at all, so the first thing we do is ensure that the value is an
actual heap object. If so, the real work happens in this function:

^code mark-object-h (1 before, 1 after)

Which is defined here:

^code mark-object

The `NULL` check is unnecessary when called from `markValue()`. A Lox Value that
is some kind of Obj type will always have a valid pointer. But later we will
call this function directly from other code, and in some of those places, the
object being pointed to is optional.

Assuming we do have a valid object, we mark it by setting a flag. That new field
lives in the Obj header struct all objects share.

^code is-marked-field (1 before, 1 after)

Every new object begins life unmarked because we haven't yet determined if it is
reachable or not.

^code init-is-marked (1 before, 2 after)

Before we go any farther, let's add some logging to `markObject()`.

^code log-mark-object (2 before, 1 after)

This way we can see what the mark phase is doing. Marking the stack takes care
of local variables and temporaries. The other main source of roots are the
global variables.

^code mark-globals (2 before, 1 after)

Those live in a hash table owned by the VM, so we'll declare another helper
function for marking all of the objects in a table.

^code mark-table-h (2 before, 2 after)

We implement that in the "table" module here:

^code mark-table

Pretty straightforward. We walk the entry array. For each one, we mark its
value. We also mark the key strings for each entry since the GC manages those
strings too.

### Less obvious roots

Those cover the roots that we typically think of -- the values that are
obviously reachable because they're stored in variables the user's program can
see. But the VM has a few of its own hidey-holes where it squirrels away
references to values that it directly accesses.

Most function call state lives in the value stack, but the VM maintains a
separate stack of CallFrames. Each CallFrame contains a pointer to the closure
being called. The VM uses those pointers to access constants and upvalues, so
those closures need to be kept around too.

^code mark-closures (1 before, 2 after)

Speaking of upvalues, the open upvalue list is another set of values that the
VM can directly reach.

^code mark-open-upvalues (3 before, 2 after)

Remember also that a collection can begin during *any* allocation. Those
allocations don't just happen while the user's program is running. The compiler
itself periodically grabs memory from the heap for literals and the constant
table. If the GC runs while we're in the middle of compiling, then any values
the compiler directly accesses need to be treated as roots too.

To keep the compiler module cleanly separated from the rest of the VM, we'll do
that in a separate function.

^code call-mark-compiler-roots (1 before, 1 after)

It's declared here:

^code mark-compiler-roots-h (1 before, 2 after)

Which means the "memory" module needs an include.

^code memory-include-compiler (2 before, 1 after)

And the definition is over in the "compiler" module.

^code mark-compiler-roots

Fortunately, the compiler doesn't have too many values that it hangs on to. The
only object it uses is the ObjFunction it is compiling into. Since function
declarations can nest, the compiler has a linked list of those and we walk the
whole list.

Since the "compiler" module is calling `markObject()`, it also needs an include.

^code compiler-include-memory (1 before, 1 after)

Those are all the roots. After running this, every object that the VM -- runtime
and compiler -- can get to *without* going through some other object has its
mark bit set.

## Tracing Object References

The next step in the marking process is tracing through the graph of references
between objects to find the indirectly reachable values. We don't have instances
with fields yet, so there aren't many objects that contain references, but we do
have <span name="some">some</span>. In particular, ObjClosure has the list of
ObjUpvalues it closes over as well as a reference to the raw ObjFunction that it
wraps. ObjFunction, in turn, has a constant table containing references to all
of the literals created in the function's body. This is enough to build a fairly
complex web of objects for our collector to crawl through.

<aside name="some">

I slotted this chapter into the book right here specifically *because* we now
have closures which give us interesting objects for the garbage collector to
process.

</aside>

Now it's time to implement that traversal. We can go breadth-first, depth-first,
or in some other order. Since we just need to find the *set* of all reachable
objects, the order we visit them <span name="dfs">mostly</span> doesn't matter.

<aside name="dfs">

I say "mostly" because some garbage collectors move objects in the order that
they are visited, so traversal order determines which objects end up adjacent in
memory. That impacts performance because the CPU uses locality to determine
which memory to preload into the caches.

Even when traversal order does matter, it's not clear which order is *best*.
It's very difficult to determine which order objects will be used in in the
future, so it's hard for the GC to know which order will help performance.

</aside>

### The tricolor abstraction

As the collector wanders through the graph of objects, we need to make sure it
doesn't lose track of where it is or get stuck going in circles. This is
particularly a concern for advanced implementations like incremental GCs that
interleave marking with running pieces of the user's program. The collector
needs to be able to pause and then pick up where it left off later.

To help us soft-brained humans reason about this complex process, VM hackers
came up with a metaphor called the <span name="color"></span>**tricolor
abstraction**. Each object has a conceptual "color" that tracks what state the
object is in, and what work is left to do.

<aside name="color">

Advanced garbage collection algorithms often add other colors to the
abstraction. I've seen multiple shades of gray, and even purple in some designs.
My puce-chartreuse-fuchsia-malachite collector paper was, alas, not accepted for
publication.

</aside>

*   **<img src="image/garbage-collection/white.png" alt="A white circle."
    class="dot" /> White:** At the beginning of a garbage collection, every
    object is white. This color means we have not reached or processed the
    object at all.

*   **<img src="image/garbage-collection/gray.png" alt="A gray circle."
    class="dot" /> Gray:** During marking, when we first reach an object, we
    darken it gray. This color means we know the object itself is reachable and
    should not be collected. But we have not yet traced *through* it to see what
    *other* objects it references. In graph algorithm terms, this is the
    *worklist* -- the set of objects we know about but haven't processed yet.

*   **<img src="image/garbage-collection/black.png" alt="A black circle."
    class="dot" /> Black:** When
    we take a gray object and mark all of the objects it references, we then
    turn the gray object black. This color means the mark phase is done
    processing that object.

In terms of that abstraction, the marking process now looks like this:

1.  Start off with all objects white.

2.  Find all the roots and mark them gray.

3.  Repeat as long as there are still gray objects:

    1.  Pick a gray object. Turn any white objects that the object mentions to
        gray.

    2.  Mark the original gray object black.

I find it helps to visualize this. You have a web of objects with references
between them. Initially, they are all little white dots. Off to the side are
some incoming edges from the VM that point to the roots. Those roots turn gray.
Then each gray object's siblings turn gray while the object itself turns black.
The full effect is a gray wavefront that passes through the graph, leaving a
field of reachable black objects behind it. Unreachable objects are not touched
by the wavefront and stay white.

<img src="image/garbage-collection/tricolor-trace.png" class="wide" alt="A gray wavefront working through a graph of nodes." />

At the <span name="invariant">end</span>, you're left with a sea of reached,
black objects sprinkled with islands of white objects that can be swept up and
freed. Once the unreachable objects are freed, the remaining objects -- all
black -- are reset to white for the next garbage collection cycle.

<aside name="invariant">

Note that at every step of this process no black node ever points to a white
node. This property is called the **tricolor invariant**. The traversal process
maintains this invariant to ensure that no reachable object is ever collected.

</aside>

### A worklist for gray objects

In our implementation we have already marked the roots. They're all gray. The
next step is to start picking them and traversing their references. But we don't
have any easy way to find them. We set a field on the object, but that's it. We
don't want to have to traverse the entire object list looking for objects with
that field set.

Instead, we'll create a separate worklist to keep track of all of the gray
objects. When an object turns gray, in addition to setting the mark field we'll
also add it to the worklist.

^code add-to-gray-stack (1 before, 1 after)

We could use any kind of data structure that lets us put items in and take them
out easily. I picked a stack because that's the simplest to implement with a
dynamic array in C. It works mostly like other dynamic arrays we've built in
Lox, *except*, note that it calls the *system* `realloc()` function and not our
own `reallocate()` wrapper. The memory for the gray stack itself is *not*
managed by the garbage collector. We don't want growing the gray stack during a
GC to cause the GC to recursively start a new GC. That could tear a hole in the
space-time continuum.

We'll manage its memory ourselves, explicitly. The VM owns the gray stack.

^code vm-gray-stack (1 before, 1 after)

It starts out empty.

^code init-gray-stack (1 before, 2 after)

And we need to free it when the VM shuts down.

^code free-gray-stack (2 before, 1 after)

<span name="robust">We</span> take full responsibility for this array. That
includes allocation failure. If we can't create or grow the gray stack, then we
can't finish the garbage collection. This is bad news for the VM, but
fortunately rare since the gray stack tends to be pretty small. It would be nice
to do something more graceful, but to keep the code in this book simple, we just
abort.

<aside name="robust">

To be more robust, we can allocate a "rainy day fund" block of memory when we
start the VM. If the gray stack allocation fails, we free the rainy day block
and try again. That may give us enough wiggle room on the heap to create the
gray stack, finish the GC, and free up more memory.

</aside>

^code exit-gray-stack (2 before, 1 after)

### Processing gray objects

OK, now when we're done marking the roots, we have both set a bunch of fields
and filled our work list with objects to chew through. It's time for the next
phase.

^code call-trace-references (1 before, 2 after)

Here's the implementation:

^code trace-references

It's as close to that textual algorithm as you can get. Until the stack empties,
we keep pulling out gray objects, traversing their references, and then marking
them black. Traversing an object's references may turn up new white objects that
get marked gray and added to the stack. So this function swings back and forth
between turning white objects gray and gray objects black, gradually advancing
the entire wavefront forward.

Here's where we traverse a single object's references:

^code blacken-object

Each object <span name="leaf">kind</span> has different fields that might
reference other objects, so we need a specific blob of code for each type. We
start with the easy ones -- strings and native function objects contain no
outgoing references so there is nothing to traverse.

<aside name="leaf">

An easy optimization we could do in `markObject()` is to skip adding strings and
native functions to the gray stack at all since we know they don't need to be
processed. Instead, they could darken from white straight to black.

</aside>

Note that we don't set any state in the traversed object itself. There is no
direct encoding of "black" in the object's state. A black object is any object
whose `isMarked` field is <span name="field">set</span> and that is no longer in
the gray stack.

<aside name="field">

You may rightly wonder why we have the `isMarked` field at all. All in good
time, friend.

</aside>

Now let's start adding in the other object types. The simplest is upvalues.

^code blacken-upvalue (2 before, 1 after)

When an upvalue is closed, it contains a reference to the closed-over value.
Since the value is no longer on the stack, we need to make sure we trace the
reference to it from the upvalue.

Next are functions.

^code blacken-function (1 before, 1 after)

Each function has a reference to an ObjString containing the function's name.
More importantly, the function has a constant table packed full of references to
other objects. We trace all of those using this helper:

^code mark-array

The last object type we have now -- we'll add more in later chapters -- is
closures.

^code blacken-closure (1 before, 1 after)

Each closure has a reference to the bare function it wraps, as well as an array
of pointers to the upvalues it captures. We trace all of those.

That's the basic mechanism for processing a gray object, but there are two loose
ends to tie up. First, some logging.

^code log-blacken-object (1 before, 1 after)

This way, we can watch the tracing percolate through the object graph. Speaking
of which, note that I said *graph*. References between objects are directed, but
that doesn't mean they're *acyclic!* It's entirely possible to have cycles of
objects. When that happens, we need to ensure our collector doesn't get stuck in
an infinite loop as it continually re-adds the same series of objects to the
gray stack.

The fix is easy.

^code check-is-marked (1 before, 1 after)

If the object is already marked, we don't mark it again and thus don't add it to
the gray stack. This ensures that an already-gray object is not redundantly
added and that a black object is not inadvertently turned back to gray. In other
words, it keeps the wavefront moving forward through only the white objects.

## Sweeping Unused Objects

When the loop in `traceReferences()` exits, we have processed all the objects we
could get our hands on. The gray stack is empty, and every object in the heap is
either black or white. The black objects are reachable, and we want to hang on to
them. Anything still white never got touched by the trace and is thus garbage.
All that's left is to reclaim them.

^code call-sweep (1 before, 2 after)

All of the logic lives in one function.

^code sweep

I know that's kind of a lot of code and pointer shenanigans, but there isn't
much to it once you work through it. The outer `while` loop walks the linked
list of every object in the heap, checking their mark bits. If an object is
marked (black), we leave it alone and continue past it. If it is unmarked
(white), we unlink it from the list and free it using the `freeObject()`
function we already wrote.

<img src="image/garbage-collection/unlink.png" alt="A recycle bin full of bits." />

Most of the other code in here deals with the fact that removing a node from a
singly linked list is cumbersome. We have to continuously remember the previous
node so we can unlink its next pointer, and we have to handle the edge case
where we are freeing the first node. But, otherwise, it's pretty simple --
delete every node in a linked list that doesn't have a bit set in it.

There's one little addition:

^code unmark (1 before, 1 after)

After `sweep()` completes, the only remaining objects are the live black ones
with their mark bits set. That's correct, but when the *next* collection cycle
starts, we need every object to be white. So whenever we reach a black object,
we go ahead and clear the bit now in anticipation of the next run.

### Weak references and the string pool

We are almost done collecting. There is one remaining corner of the VM that has
some unusual requirements around memory. Recall that when we added strings to
clox we made the VM intern them all. That means the VM has a hash table
containing a pointer to every single string in the heap. The VM uses this to
de-duplicate strings.

During the mark phase, we deliberately did *not* treat the VM's string table as
a source of roots. If we had, no <span name="intern">string</span> would *ever*
be collected. The string table would grow and grow and never yield a single byte
of memory back to the operating system. That would be bad.

<aside name="intern">

This can be a real problem. Java does not intern *all* strings, but it does
intern string *literals*. It also provides an API to add strings to the string
table. For many years, the capacity of that table was fixed, and strings added
to it could never be removed. If users weren't careful about their use of
`String.intern()`, they could run out of memory and crash.

Ruby had a similar problem for years where symbols -- interned string-like
values -- were not garbage collected. Both eventually enabled the GC to collect
these strings.

</aside>

At the same time, if we *do* let the GC free strings, then the VM's string table
will be left with dangling pointers to freed memory. That would be even worse.

The string table is special and we need special support for it. In particular,
it needs a special kind of reference. The table should be able to refer to a
string, but that link should not be considered a root when determining
reachability. That implies that the referenced object can be freed. When that
happens, the dangling reference must be fixed too, sort of like a magic,
self-clearing pointer. This particular set of semantics comes up frequently
enough that it has a name: a [**weak reference**][weak].

[weak]: https://en.wikipedia.org/wiki/Weak_reference

We have already implicitly implemented half of the string table's unique
behavior by virtue of the fact that we *don't* traverse it during marking. That
means it doesn't force strings to be reachable. The remaining piece is clearing
out any dangling pointers for strings that are freed.

To remove references to unreachable strings, we need to know which strings *are*
unreachable. We don't know that until after the mark phase has completed. But we
can't wait until after the sweep phase is done because by then the objects --
and their mark bits -- are no longer around to check. So the right time is
exactly between the marking and sweeping phases.

^code sweep-strings (1 before, 1 after)

The logic for removing the about-to-be-deleted strings exists in a new function
in the "table" module.

^code table-remove-white-h (2 before, 2 after)

The implementation is here:

^code table-remove-white

We walk every entry in the table. The string intern table uses only the key of
each entry -- it's basically a hash *set* not a hash *map*. If the key string
object's mark bit is not set, then it is a white object that is moments from
being swept away. We delete it from the hash table first and thus ensure we
won't see any dangling pointers.

## When to Collect

We have a fully functioning mark-sweep garbage collector now. When the stress
testing flag is enabled, it gets called all the time, and with the logging
enabled too, we can watch it do its thing and see that it is indeed reclaiming
memory. But, when the stress testing flag is off, it never runs at all. It's
time to decide when the collector should be invoked during normal program
execution.

As far as I can tell, this question is poorly answered by the literature. When
garbage collectors were first invented, computers had a tiny, fixed amount of
memory. Many of the early GC papers assumed that you set aside a few thousand
words of memory -- in other words, most of it -- and invoked the collector
whenever you ran out. Simple.

Modern machines have gigs of physical RAM, hidden behind the operating system's
even larger virtual memory abstraction, which is shared among a slew of other
programs all fighting for their chunk of memory. The operating system will let
your program request as much as it wants and then page in and out from the disc
when physical memory gets full. You never really "run out" of memory, you just
get slower and slower.

### Latency and throughput

It no longer makes sense to wait until you "have to", to run the GC, so we need
a more subtle timing strategy. To reason about this more precisely, it's time to
introduce two fundamental numbers used when measuring a memory manager's
performance: *throughput* and *latency*.

Every managed language pays a performance price compared to explicit,
user-authored deallocation. The time spent actually freeing memory is the same,
but the GC spends cycles figuring out *which* memory to free. That is time *not*
spent running the user's code and doing useful work. In our implementation,
that's the entirety of the mark phase. The goal of a sophisticated garbage
collector is to minimize that overhead.

There are two key metrics we can use to understand that cost better:

*   **Throughput** is the total fraction of time spent running user code versus
    doing garbage collection work. Say you run a clox program for ten seconds
    and it spends a second of that inside `collectGarbage()`. That means the
    throughput is 90% -- it spent 90% of the time running the program and 10%
    on GC overhead.

    Throughput is the most fundamental measure because it tracks the total cost
    of collection overhead. All else being equal, you want to maximize
    throughput. Up until this chapter, clox had no GC at all and thus <span
    name="hundred">100%</span> throughput. That's pretty hard to beat. Of
    course, it came at the slight expense of potentially running out of memory
    and crashing if the user's program ran long enough. You can look at the goal
    of a GC as fixing that "glitch" while sacrificing as little throughput as
    possible.

<aside name="hundred">

Well, not *exactly* 100%. It did still put the allocated objects into a linked
list, so there was some tiny overhead for setting those pointers.

</aside>

*   **Latency** is the longest *continuous* chunk of time where the user's
    program is completely paused while garbage collection happens. It's a
    measure of how "chunky" the collector is. Latency is an entirely different
    metric than throughput.

    Consider two runs of a clox program that both take ten seconds. In the first
    run, the GC kicks in once and spends a solid second in `collectGarbage()` in
    one massive collection. In the second run, the GC gets invoked five times,
    each for a fifth of a second. The *total* amount of time spent collecting is
    still a second, so the throughput is 90% in both cases. But in the second
    run, the latency is only 1/5th of a second, five times less than in the
    first.

<span name="latency"></span>

<img src="image/garbage-collection/latency-throughput.png" alt="A bar representing execution time with slices for running user code and running the GC. The largest GC slice is latency. The size of all of the user code slices is throughput." />

<aside name="latency">

The bar represents the execution of a program, divided into time spent running
user code and time spent in the GC. The size of the largest single slice of time
running the GC is the latency. The size of all of the user code slices added up
is the throughput.

</aside>

If you like analogies, imagine your program is a bakery selling fresh-baked
bread to customers. Throughput is the total number of warm, crusty baguettes you
can serve to customers in a single day. Latency is how long the unluckiest
customer has to wait in line before they get served.

<span name="dishwasher">Running</span> the garbage collector is like shutting
down the bakery temporarily to go through all of the dishes, sort out the dirty
from the clean, and then wash the used ones. In our analogy, we don't have
dedicated dishwashers, so while this is going on, no baking is happening. The
baker is washing up.

<aside name="dishwasher">

If each person represents a thread, then an obvious optimization is to have
separate threads running garbage collection, giving you a **concurrent garbage
collector**. In other words, hire some dishwashers to clean while others bake.
This is how very sophisticated GCs work because it does let the bakers
-- the worker threads -- keep running user code with little interruption.

However, coordination is required. You don't want a dishwasher grabbing a bowl
out of a baker's hands! This coordination adds overhead and a lot of complexity.
Concurrent collectors are fast, but challenging to implement correctly.

<img src="image/garbage-collection/baguette.png" class="above" alt="Un baguette." />

</aside>

Selling fewer loaves of bread a day is bad, and making any particular customer
sit and wait while you clean all the dishes is too. The goal is to maximize
throughput and minimize latency, but there is no free lunch, even inside a
bakery. Garbage collectors make different trade-offs between how much throughput
they sacrifice and latency they tolerate.

Being able to make these trade-offs is useful because different user programs
have different needs. An overnight batch job that is generating a report from a
terabyte of data just needs to get as much work done as fast as possible.
Throughput is queen. Meanwhile, an app running on a user's smartphone needs to
always respond immediately to user input so that dragging on the screen feels
<span name="butter">buttery</span> smooth. The app can't freeze for a few
seconds while the GC mucks around in the heap.

<aside name="butter">

Clearly the baking analogy is going to my head.

</aside>

As a garbage collector author, you control some of the trade-off between
throughput and latency by your choice of collection algorithm. But even within a
single algorithm, we have a lot of control over *how frequently* the collector
runs.

Our collector is a <span name="incremental">**stop-the-world GC**</span> which
means the user's program is paused until the entire garbage collection process
has completed. If we wait a long time before we run the collector, then a large
number of dead objects will accumulate. That leads to a very long pause while
the collector runs, and thus high latency. So, clearly, we want to run the
collector really frequently.

<aside name="incremental">

In contrast, an **incremental garbage collector** can do a little collection,
then run some user code, then collect a little more, and so on.

</aside>

But every time the collector runs, it spends some time visiting live objects.
That doesn't really *do* anything useful (aside from ensuring that they don't
incorrectly get deleted). Time visiting live objects is time not freeing memory
and also time not running user code. If you run the GC *really* frequently, then
the user's program doesn't have enough time to even generate new garbage for the
VM to collect. The VM will spend all of its time obsessively revisiting the same
set of live objects over and over, and throughput will suffer. So, clearly, we
want to run the collector really *in*frequently.

In fact, we want something in the middle, and the frequency of when the
collector runs is one of our main knobs for tuning the trade-off between latency
and throughput.

### Self-adjusting heap

We want our GC to run frequently enough to minimize latency but infrequently
enough to maintain decent throughput. But how do we find the balance between
these when we have no idea how much memory the user's program needs and how
often it allocates? We could pawn the problem onto the user and force them to
pick by exposing GC tuning parameters. Many VMs do this. But if we, the GC
authors, don't know how to tune it well, odds are good most users won't either.
They deserve a reasonable default behavior.

I'll be honest with you, this is not my area of expertise. I've talked to a
number of professional GC hackers -- this is something you can build an entire
career on -- and read a lot of the literature, and all of the answers I got
were... vague. The strategy I ended up picking is common, pretty simple, and (I
hope!) good enough for most uses.

The idea is that the collector frequency automatically adjusts based on the live
size of the heap. We track the total number of bytes of managed memory that the
VM has allocated. When it goes above some threshold, we trigger a GC. After
that, we note how many bytes of memory remain -- how many were *not* freed. Then
we adjust the threshold to some value larger than that.

The result is that as the amount of live memory increases, we collect less
frequently in order to avoid sacrificing throughput by re-traversing the growing
pile of live objects. As the amount of live memory goes down, we collect more
frequently so that we don't lose too much latency by waiting too long.

The implementation requires two new bookkeeping fields in the VM.

^code vm-fields (1 before, 1 after)

The first is a running total of the number of bytes of managed memory the VM has
allocated. The second is the threshold that triggers the next collection. We
initialize them when the VM starts up.

^code init-gc-fields (1 before, 2 after)

The starting threshold here is <span name="lab">arbitrary</span>. It's similar
to the initial capacity we picked for our various dynamic arrays. The goal is to
not trigger the first few GCs *too* quickly but also to not wait too long. If we
had some real-world Lox programs, we could profile those to tune this. But since
all we have are toy programs, I just picked a number.

<aside name="lab">

A challenge with learning garbage collectors is that it's *very* hard to
discover the best practices in an isolated lab environment. You don't see how a
collector actually performs unless you run it on the kind of large, messy
real-world programs it is actually intended for. It's like tuning a rally car
-- you need to take it out on the course.

</aside>

Every time we allocate or free some memory, we adjust the counter by that delta.

^code updated-bytes-allocated (1 before, 1 after)

When the total crosses the limit, we run the collector.

^code collect-on-next (2 before, 1 after)

Now, finally, our garbage collector actually does something when the user runs a
program without our hidden diagnostic flag enabled. The sweep phase frees
objects by calling `reallocate()`, which lowers the value of `bytesAllocated`,
so after the collection completes, we know how many live bytes remain. We adjust
the threshold of the next GC based on that.

^code update-next-gc (1 before, 2 after)

The threshold is a multiple of the heap size. This way, as the amount of memory
the program uses grows, the threshold moves farther out to limit the total time
spent re-traversing the larger live set. Like other numbers in this chapter, the
scaling factor is basically arbitrary.

^code heap-grow-factor (1 before, 2 after)

You'd want to tune this in your implementation once you had some real programs
to benchmark it on. Right now, we can at least log some of the statistics that
we have. We capture the heap size before the collection.

^code log-before-size (1 before, 1 after)

And then print the results at the end.

^code log-collected-amount (1 before, 1 after)

This way we can see how much the garbage collector accomplished while it ran.

## Garbage Collection Bugs

In theory, we are all done now. We have a GC. It kicks in periodically, collects
what it can, and leaves the rest. If this were a typical textbook, we would wipe
the dust from our hands and bask in the soft glow of the flawless marble edifice
we have created.

But I aim to teach you not just the theory of programming languages but the
sometimes painful reality. I am going to roll over a rotten log and show you the
nasty bugs that live under it, and garbage collector bugs really are some of the
grossest invertebrates out there.

The collector's job is to free dead objects and preserve live ones. Mistakes are
easy to make in both directions. If the VM fails to free objects that aren't
needed, it slowly leaks memory. If it frees an object that is in use, the user's
program can access invalid memory. These failures often don't immediately cause
a crash, which makes it hard for us to trace backward in time to find the bug.

This is made harder by the fact that we don't know when the collector will run.
Any call that eventually allocates some memory is a place in the VM where a
collection could happen. It's like musical chairs. At any point, the GC might
stop the music. Every single heap-allocated object that we want to keep needs to
find a chair quickly -- get marked as a root or stored as a reference in some
other object -- before the sweep phase comes to kick it out of the game.

How is it possible for the VM to use an object later -- one that the GC itself
doesn't see? How can the VM find it? The most common answer is through a pointer
stored in some local variable on the C stack. The GC walks the *VM's* value and
CallFrame stacks, but the C stack is <span name="c">hidden</span> to it.

<aside name="c">

Our GC can't find addresses in the C stack, but many can. Conservative garbage
collectors look all through memory, including the native stack. The most
well-known of this variety is the [**Boehm–Demers–Weiser garbage
collector**][boehm], usually just called the "Boehm collector". (The shortest
path to fame in CS is a last name that's alphabetically early so that it shows
up first in sorted lists of names.)

[boehm]: https://en.wikipedia.org/wiki/Boehm_garbage_collector

Many precise GCs walk the C stack too. Even those have to be careful about
pointers to live objects that exist only in *CPU registers*.

</aside>

In previous chapters, we wrote seemingly pointless code that pushed an object
onto the VM's value stack, did a little work, and then popped it right back off.
Most times, I said this was for the GC's benefit. Now you see why. The code
between pushing and popping potentially allocates memory and thus can trigger a
GC. We had to make sure the object was on the value stack so that the
collector's mark phase would find it and keep it alive.

I wrote the entire clox implementation before splitting it into chapters and
writing the prose, so I had plenty of time to find all of these corners and
flush out most of these bugs. The stress testing code we put in at the beginning
of this chapter and a pretty good test suite were very helpful.

But I fixed only *most* of them. I left a couple in because I want to give you a
hint of what it's like to encounter these bugs in the wild. If you enable the
stress test flag and run some toy Lox programs, you can probably stumble onto a
few. Give it a try and *see if you can fix any yourself*.


### Adding to the constant table

You are very likely to hit the first bug. The constant table each chunk owns is
a dynamic array. When the compiler adds a new constant to the current function's
table, that array may need to grow. The constant itself may also be some
heap-allocated object like a string or a nested function.

The new object being added to the constant table is passed to `addConstant()`.
At that moment, the object can be found only in the parameter to that function
on the C stack. That function appends the object to the constant table. If the
table doesn't have enough capacity and needs to grow, it calls `reallocate()`.
That in turn triggers a GC, which fails to mark the new constant object and
thus sweeps it right before we have a chance to add it to the table. Crash.

The fix, as you've seen in other places, is to push the constant onto the stack
temporarily.

^code add-constant-push (1 before, 1 after)

Once the constant table contains the object, we pop it off the stack.

^code add-constant-pop (1 before, 1 after)

When the GC is marking roots, it walks the chain of compilers and marks each of
their functions, so the new constant is reachable now. We do need an include
to call into the VM from the "chunk" module.

^code chunk-include-vm (1 before, 2 after)

### Interning strings

Here's another similar one. All strings are interned in clox, so whenever we
create a new string, we also add it to the intern table. You can see where this
is going. Since the string is brand new, it isn't reachable anywhere. And
resizing the string pool can trigger a collection. Again, we go ahead and stash
the string on the stack first.

^code push-string (2 before, 1 after)

And then pop it back off once it's safely nestled in the table.

^code pop-string (1 before, 2 after)

This ensures the string is safe while the table is being resized. Once it
survives that, `allocateString()` will return it to some caller which can then
take responsibility for ensuring the string is still reachable before the next
heap allocation occurs.

### Concatenating strings

One last example: Over in the interpreter, the `OP_ADD` instruction can be used
to concatenate two strings. As it does with numbers, it pops the two operands
from the stack, computes the result, and pushes that new value back onto the
stack. For numbers that's perfectly safe.

But concatenating two strings requires allocating a new character array on the
heap, which can in turn trigger a GC. Since we've already popped the operand
strings by that point, they can potentially be missed by the mark phase and get
swept away. Instead of popping them off the stack eagerly, we peek them.

^code concatenate-peek (1 before, 2 after)

That way, they are still hanging out on the stack when we create the result
string. Once that's done, we can safely pop them off and replace them with the
result.

^code concatenate-pop (1 before, 1 after)

Those were all pretty easy, especially because I *showed* you where the fix was.
In practice, *finding* them is the hard part. All you see is an object that
*should* be there but isn't. It's not like other bugs where you're looking for
the code that *causes* some problem. You're looking for the *absence* of code
which fails to *prevent* a problem, and that's a much harder search.

But, for now at least, you can rest easy. As far as I know, we've found all of
the collection bugs in clox, and now we have a working, robust, self-tuning,
mark-sweep garbage collector.

<div class="challenges">

## Challenges

1.  The Obj header struct at the top of each object now has three fields:
    `type`, `isMarked`, and `next`. How much memory do those take up (on your
    machine)? Can you come up with something more compact? Is there a runtime
    cost to doing so?

1.  When the sweep phase traverses a live object, it clears the `isMarked`
    field to prepare it for the next collection cycle. Can you come up with a
    more efficient approach?

1.  Mark-sweep is only one of a variety of garbage collection algorithms out
    there. Explore those by replacing or augmenting the current collector with
    another one. Good candidates to consider are reference counting, Cheney's
    algorithm, or the Lisp 2 mark-compact algorithm.

</div>

<div class="design-note">

## Design Note: Generational Collectors

A collector loses throughput if it spends a long time re-visiting objects that
are still alive. But it can increase latency if it avoids collecting and
accumulates a large pile of garbage to wade through. If only there were some way
to tell which objects were likely to be long-lived and which weren't. Then the
GC could avoid revisiting the long-lived ones as often and clean up the
ephemeral ones more frequently.

It turns out there kind of is. Many years ago, GC researchers gathered metrics
on the lifetime of objects in real-world running programs. They tracked every
object when it was allocated, and eventually when it was no longer needed, and
then graphed out how long objects tended to live.

They discovered something they called the **generational hypothesis**, or the
much less tactful term **infant mortality**. Their observation was that most
objects are very short-lived but once they survive beyond a certain age, they
tend to stick around quite a long time. The longer an object *has* lived, the
longer it likely will *continue* to live. This observation is powerful because
it gave them a handle on how to partition objects into groups that benefit from
frequent collections and those that don't.

They designed a technique called **generational garbage collection**. It works
like this: Every time a new object is allocated, it goes into a special,
relatively small region of the heap called the "nursery". Since objects tend to
die young, the garbage collector is invoked <span
name="nursery">frequently</span> over the objects just in this region.

<aside name="nursery">

Nurseries are also usually managed using a copying collector which is faster at
allocating and freeing objects than a mark-sweep collector.

</aside>

Each time the GC runs over the nursery is called a "generation". Any objects
that are no longer needed get freed. Those that survive are now considered one
generation older, and the GC tracks this for each object. If an object survives
a certain number of generations -- often just a single collection -- it gets
*tenured*. At this point, it is copied out of the nursery into a much larger
heap region for long-lived objects. The garbage collector runs over that region
too, but much less frequently since odds are good that most of those objects
will still be alive.

Generational collectors are a beautiful marriage of empirical data -- the
observation that object lifetimes are *not* evenly distributed -- and clever
algorithm design that takes advantage of that fact. They're also conceptually
quite simple. You can think of one as just two separately tuned GCs and a pretty
simple policy for moving objects from one to the other.

</div>


================================================
FILE: book/global-variables.md
================================================
> If only there could be an invention that bottled up a memory, like scent. And
> it never faded, and it never got stale. And then, when one wanted it, the
> bottle could be uncorked, and it would be like living the moment all over
> again.
>
> <cite>Daphne du Maurier, <em>Rebecca</em></cite>

The [previous chapter][hash] was a long exploration of one big, deep,
fundamental computer science data structure. Heavy on theory and concept. There
may have been some discussion of big-O notation and algorithms. This chapter has
fewer intellectual pretensions. There are no large ideas to learn. Instead, it's
a handful of straightforward engineering tasks. Once we've completed them, our
virtual machine will support variables.

Actually, it will support only *global* variables. Locals are coming in the
[next chapter][]. In jlox, we managed to cram them both into a single chapter
because we used the same implementation technique for all variables. We built a
chain of environments, one for each scope, all the way up to the top. That was a
simple, clean way to learn how to manage state.

[next chapter]: local-variables.html

But it's also *slow*. Allocating a new hash table each time you enter a block or
call a function is not the road to a fast VM. Given how much code is concerned
with using variables, if variables go slow, everything goes slow. For clox,
we'll improve that by using a much more efficient strategy for <span
name="different">local</span> variables, but globals aren't as easily optimized.

<aside name="different">

This is a common meta-strategy in sophisticated language implementations. Often,
the same language feature will have multiple implementation techniques, each
tuned for different use patterns. For example, JavaScript VMs often have a
faster representation for objects that are used more like instances of classes
compared to other objects whose set of properties is more freely modified. C and
C++ compilers usually have a variety of ways to compile `switch` statements
based on the number of cases and how densely packed the case values are.

</aside>

[hash]: hash-tables.html

A quick refresher on Lox semantics: Global variables in Lox are "late bound", or
resolved dynamically. This means you can compile a chunk of code that refers to
a global variable before it's defined. As long as the code doesn't *execute*
before the definition happens, everything is fine. In practice, that means you
can refer to later variables inside the body of functions.

```lox
fun showVariable() {
  print global;
}

var global = "after";
showVariable();
```

Code like this might seem odd, but it's handy for defining mutually recursive
functions. It also plays nicer with the REPL. You can write a little function in
one line, then define the variable it uses in the next.

Local variables work differently. Since a local variable's declaration *always*
occurs before it is used, the VM can resolve them at compile time, even in a
simple single-pass compiler. That will let us use a smarter representation for
locals. But that's for the next chapter. Right now, let's just worry about
globals.

## Statements

Variables come into being using variable declarations, which means now is also
the time to add support for statements to our compiler. If you recall, Lox
splits statements into two categories. "Declarations" are those statements that
bind a new name to a value. The other kinds of statements -- control flow,
print, etc. -- are just called "statements". We disallow declarations directly
inside control flow statements, like this:

```lox
if (monday) var croissant = "yes"; // Error.
```

Allowing it would raise confusing questions around the scope of the variable.
So, like other languages, we prohibit it syntactically by having a separate
grammar rule for the subset of statements that *are* allowed inside a control
flow body.

```ebnf
statement      → exprStmt
               | forStmt
               | ifStmt
               | printStmt
               | returnStmt
               | whileStmt
               | block ;
```

Then we use a separate rule for the top level of a script and inside a block.

```ebnf
declaration    → classDecl
               | funDecl
               | varDecl
               | statement ;
```

The `declaration` rule contains the statements that declare names, and also
includes `statement` so that all statement types are allowed. Since `block`
itself is in `statement`, you can put declarations <span
name="parens">inside</span> a control flow construct by nesting them inside a
block.

<aside name="parens">

Blocks work sort of like parentheses do for expressions. A block lets you put
the "lower-precedence" declaration statements in places where only a
"higher-precedence" non-declaring statement is allowed.

</aside>

In this chapter, we'll cover only a couple of statements and one
declaration.

```ebnf
statement      → exprStmt
               | printStmt ;

declaration    → varDecl
               | statement ;
```

Up to now, our VM considered a "program" to be a single expression since that's
all we could parse and compile. In a full Lox implementation, a program is a
sequence of declarations. We're ready to support that now.

^code compile (1 before, 1 after)

We keep compiling declarations until we hit the end of the source file. We
compile a single declaration using this:

^code declaration

We'll get to variable declarations later in the chapter, so for now, we simply
forward to `statement()`.

^code statement

Blocks can contain declarations, and control flow statements can contain other
statements. That means these two functions will eventually be recursive. We may
as well write out the forward declarations now.

^code forward-declarations (1 before, 1 after)

### Print statements

We have two statement types to support in this chapter. Let's start with `print`
statements, which begin, naturally enough, with a `print` token. We detect that
using this helper function:

^code match

You may recognize it from jlox. If the current token has the given type, we
consume the token and return `true`. Otherwise we leave the token alone and
return `false`. This <span name="turtles">helper</span> function is implemented
in terms of this other helper:

<aside name="turtles">

It's helpers all the way down!

</aside>

^code check

The `check()` function returns `true` if the current token has the given type.
It seems a little <span name="read">silly</span> to wrap this in a function, but
we'll use it more later, and I think short verb-named functions like this make
the parser easier to read.

<aside name="read">

This sounds trivial, but handwritten parsers for non-toy languages get pretty
big. When you have thousands of lines of code, a utility function that turns two
lines into one and makes the result a little more readable easily earns its
keep.

</aside>

If we did match the `print` token, then we compile the rest of the statement
here:

^code print-statement

A `print` statement evaluates an expression and prints the result, so we first
parse and compile that expression. The grammar expects a semicolon after that,
so we consume it. Finally, we emit a new instruction to print the result.

^code op-print (1 before, 1 after)

At runtime, we execute this instruction like so:

^code interpret-print (1 before, 1 after)

When the interpreter reaches this instruction, it has already executed the code
for the expression, leaving the result value on top of the stack. Now we simply
pop and print it.

Note that we don't push anything else after that. This is a key difference
between expressions and statements in the VM. Every bytecode instruction has a
<span name="effect">**stack effect**</span> that describes how the instruction
modifies the stack. For example, `OP_ADD` pops two values and pushes one,
leaving the stack one element smaller than before.

<aside name="effect">

The stack is one element shorter after an `OP_ADD`, so its effect is -1:

<img src="image/global-variables/stack-effect.png" alt="The stack effect of an OP_ADD instruction." />

</aside>

You can sum the stack effects of a series of instructions to get their total
effect. When you add the stack effects of the series of instructions compiled
from any complete expression, it will total one. Each expression leaves one
result value on the stack.

The bytecode for an entire statement has a total stack effect of zero. Since a
statement produces no values, it ultimately leaves the stack unchanged, though
it of course uses the stack while it's doing its thing. This is important
because when we get to control flow and looping, a program might execute a long
series of statements. If each statement grew or shrank the stack, it might
eventually overflow or underflow.

While we're in the interpreter loop, we should delete a bit of code.

^code op-return (1 before, 1 after)

When the VM only compiled and evaluated a single expression, we had some
temporary code in `OP_RETURN` to output the value. Now that we have statements
and `print`, we don't need that anymore. We're one <span
name="return">step</span> closer to the complete implementation of clox.

<aside name="return">

We're only one step closer, though. We will revisit `OP_RETURN` again when we
add functions. Right now, it exits the entire interpreter loop.

</aside>

As usual, a new instruction needs support in the disassembler.

^code disassemble-print (1 before, 1 after)

That's our `print` statement. If you want, give it a whirl:

```lox
print 1 + 2;
print 3 * 4;
```

Exciting! OK, maybe not thrilling, but we can build scripts that contain as many
statements as we want now, which feels like progress.

### Expression statements

Wait until you see the next statement. If we *don't* see a `print` keyword, then
we must be looking at an expression statement.

^code parse-expressions-statement (1 before, 1 after)

It's parsed like so:

^code expression-statement

An "expression statement" is simply an expression followed by a semicolon.
They're how you write an expression in a context where a statement is expected.
Usually, it's so that you can call a function or evaluate an assignment for its
side effect, like this:

```lox
brunch = "quiche";
eat(brunch);
```

Semantically, an expression statement evaluates the expression and discards the
result. The compiler directly encodes that behavior. It compiles the expression,
and then emits an `OP_POP` instruction.

^code pop-op (1 before, 1 after)

As the name implies, that instruction pops the top value off the stack and
forgets it.

^code interpret-pop (1 before, 1 after)

We can disassemble it too.

^code disassemble-pop (1 before, 1 after)

Expression statements aren't very useful yet since we can't create any
expressions that have side effects, but they'll be essential when we
[add functions later][functions]. The <span name="majority">majority</span> of
statements in real-world code in languages like C are expression statements.

<aside name="majority">

By my count, 80 of the 149 statements, in the version of "compiler.c" that we
have at the end of this chapter are expression statements.

</aside>

[functions]: calls-and-functions.html

### Error synchronization

While we're getting this initial work done in the compiler, we can tie off a
loose end we left [several chapters back][errors]. Like jlox, clox uses panic
mode error recovery to minimize the number of cascaded compile errors that it
reports. The compiler exits panic mode when it reaches a synchronization point.
For Lox, we chose statement boundaries as that point. Now that we have
statements, we can implement synchronization.

[errors]: compiling-expressions.html#handling-syntax-errors

^code call-synchronize (1 before, 1 after)

If we hit a compile error while parsing the previous statement, we enter panic
mode. When that happens, after the statement we start synchronizing.

^code synchronize

We skip tokens indiscriminately until we reach something that looks like a
statement boundary. We recognize the boundary by looking for a preceding token
that can end a statement, like a semicolon. Or we'll look for a subsequent token
that begins a statement, usually one of the control flow or declaration
keywords.

## Variable Declarations

Merely being able to *print* doesn't win your language any prizes at the
programming language <span name="fair">fair</span>, so let's move on to
something a little more ambitious and get variables going. There are three
operations we need to support:

<aside name="fair">

I can't help but imagine a "language fair" like some country 4H thing. Rows of
straw-lined stalls full of baby languages *moo*ing and *baa*ing at each other.

</aside>

*   Declaring a new variable using a `var` statement.
*   Accessing the value of a variable using an identifier expression.
*   Storing a new value in an existing variable using an assignment expression.

We can't do either of the last two until we have some variables, so we start
with declarations.

^code match-var (1 before, 2 after)

The placeholder parsing function we sketched out for the declaration grammar
rule has an actual production now. If we match a `var` token, we jump here:

^code var-declaration

The keyword is followed by the variable name. That's compiled by
`parseVariable()`, which we'll get to in a second. Then we look for an `=`
followed by an initializer expression. If the user doesn't initialize the
variable, the compiler implicitly initializes it to <span
name="nil">`nil`</span> by emitting an `OP_NIL` instruction. Either way, we
expect the statement to be terminated with a semicolon.

<aside name="nil" class="bottom">

Essentially, the compiler desugars a variable declaration like:

```lox
var a;
```

into:

```lox
var a = nil;
```

The code it generates for the former is identical to what it produces for the
latter.

</aside>

There are two new functions here for working with variables and identifiers.
Here is the first:

^code parse-variable (2 before)

It requires the next token to be an identifier, which it consumes and sends
here:

^code identifier-constant (2 before)

This function takes the given token and adds its lexeme to the chunk's constant
table as a string. It then returns the index of that constant in the constant
table.

Global variables are looked up *by name* at runtime. That means the VM -- the
bytecode interpreter loop -- needs access to the name. A whole string is too big
to stuff into the bytecode stream as an operand. Instead, we store the string in
the constant table and the instruction then refers to the name by its index in
the table.

This function returns that index all the way to `varDeclaration()` which later
hands it over to here:

^code define-variable

<span name="helper">This</span> outputs the bytecode instruction that defines
the new variable and stores its initial value. The index of the variable's name
in the constant table is the instruction's operand. As usual in a stack-based
VM, we emit this instruction last. At runtime, we execute the code for the
variable's initializer first. That leaves the value on the stack. Then this
instruction takes that value and stores it away for later.

<aside name="helper">

I know some of these functions seem pretty pointless right now. But we'll get
more mileage out of them as we add more language features for working with
names. Function and class declarations both declare new variables, and variable
and assignment expressions access them.

</aside>

Over in the runtime, we begin with this new instruction:

^code define-global-op (1 before, 1 after)

Thanks to our handy-dandy hash table, the implementation isn't too hard.

^code interpret-define-global (1 before, 1 after)

We get the name of the variable from the constant table. Then we <span
name="pop">take</span> the value from the top of the stack and store it in a
hash table with that name as the key.

<aside name="pop">

Note that we don't *pop* the value until *after* we add it to the hash table.
That ensures the VM can still find the value if a garbage collection is
triggered right in the middle of adding it to the hash table. That's a distinct
possibility since the hash table requires dynamic allocation when it resizes.

</aside>

This code doesn't check to see if the key is already in the table. Lox is pretty
lax with global variables and lets you redefine them without error. That's
useful in a REPL session, so the VM supports that by simply overwriting the
value if the key happens to already be in the hash table.

There's another little helper macro:

^code read-string (1 before, 1 after)

It reads a one-byte operand from the bytecode chunk. It treats that as an index
into the chunk's constant table and returns the string at that index. It doesn't
check that the value *is* a string -- it just indiscriminately casts it. That's
safe because the compiler never emits an instruction that refers to a non-string
constant.

Because we care about lexical hygiene, we also undefine this macro at the end of
the interpret function.

^code undef-read-string (1 before, 1 after)

I keep saying "the hash table", but we don't actually have one yet. We need a
place to store these globals. Since we want them to persist as long as clox is
running, we store them right in the VM.

^code vm-globals (1 before, 1 after)

As we did with the string table, we need to initialize the hash table to a valid
state when the VM boots up.

^code init-globals (1 before, 1 after)

And we <span name="tear">tear</span> it down when we exit.

<aside name="tear">

The process will free everything on exit, but it feels undignified to require
the operating system to clean up our mess.

</aside>

^code free-globals (1 before, 1 after)

As usual, we want to be able to disassemble the new instruction too.

^code disassemble-define-global (1 before, 1 after)

And with that, we can define global variables. Not that users can *tell* that
they've done so, because they can't actually *use* them. So let's fix that next.

## Reading Variables

As in every programming language ever, we access a variable's value using its
name. We hook up identifier tokens to the expression parser here:

^code table-identifier (1 before, 1 after)

That calls this new parser function:

^code variable-without-assign

Like with declarations, there are a couple of tiny helper functions that seem
pointless now but will become more useful in later chapters. I promise.

^code read-named-variable

This calls the same `identifierConstant()` function from before to take the
given identifier token and add its lexeme to the chunk's constant table as a
string. All that remains is to emit an instruction that loads the global
variable with that name. Here's the instruction:

^code get-global-op (1 before, 1 after)

Over in the interpreter, the implementation mirrors `OP_DEFINE_GLOBAL`.

^code interpret-get-global (1 before, 1 after)

We pull the constant table index from the instruction's operand and get the
variable name. Then we use that as a key to look up the variable's value in the
globals hash table.

If the key isn't present in the hash table, it means that global variable has
never been defined. That's a runtime error in Lox, so we report it and exit the
interpreter loop if that happens. Otherwise, we take the value and push it
onto the stack.

^code disassemble-get-global (1 before, 1 after)

A little bit of disassembling, and we're done. Our interpreter is now able to
run code like this:

```lox
var beverage = "cafe au lait";
var breakfast = "beignets with " + beverage;
print breakfast;
```

There's only one operation left.

## Assignment

Throughout this book, I've tried to keep you on a fairly safe and easy path. I
don't avoid hard *problems*, but I try to not make the *solutions* more complex
than they need to be. Alas, other design choices in our <span
name="jlox">bytecode</span> compiler make assignment annoying to implement.

<aside name="jlox">

If you recall, assignment was pretty easy in jlox.

</aside>

Our bytecode VM uses a single-pass compiler. It parses and generates bytecode
on the fly without any intermediate AST. As soon as it recognizes a piece of
syntax, it emits code for it. Assignment doesn't naturally fit that. Consider:

```lox
menu.brunch(sunday).beverage = "mimosa";
```

In this code, the parser doesn't realize `menu.brunch(sunday).beverage` is the
target of an assignment and not a normal expression until it reaches `=`, many
tokens after the first `menu`. By then, the compiler has already emitted
bytecode for the whole thing.

The problem is not as dire as it might seem, though. Look at how the parser sees that example:

<img src="image/global-variables/setter.png" alt="The 'menu.brunch(sunday).beverage = &quot;mimosa&quot;' statement, showing that 'menu.brunch(sunday)' is an expression." />

Even though the `.beverage` part must not be compiled as a get expression,
everything to the left of the `.` is an expression, with the normal expression
semantics. The `menu.brunch(sunday)` part can be compiled and executed as usual.

Fortunately for us, the only semantic differences on the left side of an
assignment appear at the very right-most end of the tokens, immediately
preceding the `=`. Even though the receiver of a setter may be an arbitrarily
long expression, the part whose behavior differs from a get expression is only
the trailing identifier, which is right before the `=`. We don't need much
lookahead to realize `beverage` should be compiled as a set expression and not a
getter.

Variables are even easier since they are just a single bare identifier before an
`=`. The idea then is that right *before* compiling an expression that can also
be used as an assignment target, we look for a subsequent `=` token. If we see
one, we compile it as an assignment or setter instead of a variable access or
getter.

We don't have setters to worry about yet, so all we need to handle are variables.

^code named-variable (1 before, 1 after)

In the parse function for identifier expressions, we look for an equals sign
after the identifier. If we find one, instead of emitting code for a variable
access, we compile the assigned value and then emit an assignment instruction.

That's the last instruction we need to add in this chapter.

^code set-global-op (1 before, 1 after)

As you'd expect, its runtime behavior is similar to defining a new variable.

^code interpret-set-global (1 before, 1 after)

The main difference is what happens when the key doesn't already exist in the
globals hash table. If the variable hasn't been defined yet, it's a runtime
error to try to assign to it. Lox [doesn't do implicit variable
declaration][implicit].

<aside name="delete">

The call to `tableSet()` stores the value in the global variable table even if
the variable wasn't previously defined. That fact is visible in a REPL session,
since it keeps running even after the runtime error is reported. So we also take
care to delete that zombie value from the table.

</aside>

The other difference is that setting a variable doesn't pop the value off the
stack. Remember, assignment is an expression, so it needs to leave that value
there in case the assignment is nested inside some larger expression.

[implicit]: statements-and-state.html#design-note

Add a dash of disassembly:

^code disassemble-set-global (2 before, 1 after)

So we're done, right? Well... not quite. We've made a mistake! Take a gander at:

```lox
a * b = c + d;
```

According to Lox's grammar, `=` has the lowest precedence, so this should be
parsed roughly like:

<img src="image/global-variables/ast-good.png" alt="The expected parse, like '(a * b) = (c + d)'." />

Obviously, `a * b` isn't a <span name="do">valid</span> assignment target, so
this should be a syntax error. But here's what our parser does:

<aside name="do">

Wouldn't it be wild if `a * b` *was* a valid assignment target, though? You
could imagine some algebra-like language that tried to divide the assigned value
up in some reasonable way and distribute it to `a` and `b`... that's probably
a terrible idea.

</aside>

1.  First, `parsePrecedence()` parses `a` using the `variable()` prefix parser.
1.  After that, it enters the infix parsing loop.
1.  It reaches the `*` and calls `binary()`.
1.  That recursively calls `parsePrecedence()` to parse the right-hand operand.
1.  That calls `variable()` again for parsing `b`.
1.  Inside that call to `variable()`, it looks for a trailing `=`. It sees one
    and thus parses the rest of the line as an assignment.

In other words, the parser sees the above code like:

<img src="image/global-variables/ast-bad.png" alt="The actual parse, like 'a * (b = c + d)'." />

We've messed up the precedence handling because `variable()` doesn't take into
account the precedence of the surrounding expression that contains the variable.
If the variable happens to be the right-hand side of an infix operator, or the
operand of a unary operator, then that containing expression is too high
precedence to permit the `=`.

To fix this, `variable()` should look for and consume the `=` only if it's in
the context of a low-precedence expression. The code that knows the current
precedence is, logically enough, `parsePrecedence()`. The `variable()` function
doesn't need to know the actual level. It just cares that the precedence is low
enough to allow assignment, so we pass that fact in as a Boolean.

^code prefix-rule (4 before, 2 after)

Since assignment is the lowest-precedence expression, the only time we allow an
assignment is when parsing an assignment expression or top-level expression like
in an expression statement. That flag makes its way to the parser function here:

^code variable

Which passes it through a new parameter:

^code named-variable-signature (1 after)

And then finally uses it here:

^code named-variable-can-assign (2 before, 1 after)

That's a lot of plumbing to get literally one bit of data to the right place in
the compiler, but arrived it has. If the variable is nested inside some
expression with higher precedence, `canAssign` will be `false` and this will
ignore the `=` even if there is one there. Then `namedVariable()` returns, and
execution eventually makes its way back to `parsePrecedence()`.

Then what? What does the compiler do with our broken example from before? Right
now, `variable()` won't consume the `=`, so that will be the current token. The
compiler returns back to `parsePrecedence()` from the `variable()` prefix parser
and then tries to enter the infix parsing loop. There is no parsing function
associated with `=`, so it skips that loop.

Then `parsePrecedence()` silently returns back to the caller. That also isn't
right. If the `=` doesn't get consumed as part of the expression, nothing else
is going to consume it. It's an error and we should report it.

^code invalid-assign (2 before, 1 after)

With that, the previous bad program correctly gets an error at compile time. OK,
*now* are we done? Still not quite. See, we're passing an argument to one of the
parse functions. But those functions are stored in a table of function pointers,
so all of the parse functions need to have the same type. Even though most parse
functions don't support being used as an assignment target -- setters are the
<span name="index">only</span> other one -- our friendly C compiler requires
them *all* to accept the parameter.

<aside name="index">

If Lox had arrays and subscript operators like `array[index]` then an infix `[`
would also allow assignment to support `array[index] = value`.

</aside>

So we're going to finish off this chapter with some grunt work. First, let's go
ahead and pass the flag to the infix parse functions.

^code infix-rule (1 before, 1 after)

We'll need that for setters eventually. Then we'll fix the typedef for the
function type.

^code parse-fn-type (2 before, 2 after)

And some completely tedious code to accept this parameter in all of our existing
parse functions. Here:

^code binary (1 after)

And here:

^code parse-literal (1 after)

And here:

^code grouping (1 after)

And here:

^code number (1 after)

And here too:

^code string (1 after)

And, finally:

^code unary (1 after)

Phew! We're back to a C program we can compile. Fire it up and now you can run
this:

```lox
var breakfast = "beignets";
var beverage = "cafe au lait";
breakfast = "beignets with " + beverage;

print breakfast;
```

It's starting to look like real code for an actual language!

<div class="challenges">

## Challenges

1.  The compiler adds a global variable's name to the constant table as a string
    every time an identifier is encountered. It creates a new constant each
    time, even if that variable name is already in a previous slot in the
    constant table. That's wasteful in cases where the same variable is
    referenced multiple times by the same function. That, in turn, increases the
    odds of filling up the constant table and running out of slots since we
    allow only 256 constants in a single chunk.

    Optimize this. How does your optimization affect the performance of the
    compiler compared to the runtime? Is this the right trade-off?

2.  Looking up a global variable by name in a hash table each time it is used
    is pretty slow, even with a good hash table. Can you come up with a more
    efficient way to store and access global variables without changing the
    semantics?

3.  When running in the REPL, a user might write a function that references an
    unknown global variable. Then, in the next line, they declare the variable.
    Lox should handle this gracefully by not reporting an "unknown variable"
    compile error when the function is first defined.

    But when a user runs a Lox *script*, the compiler has access to the full
    text of the entire program before any code is run. Consider this program:

    ```lox
    fun useVar() {
      print oops;
    }

    var ooops = "too many o's!";
    ```

    Here, we can tell statically that `oops` will not be defined because there
    is *no* declaration of that global anywhere in the program. Note that
    `useVar()` is never called either, so even though the variable isn't
    defined, no runtime error will occur because it's never used either.

    We could report mistakes like this as compile errors, at least when running
    from a script. Do you think we should? Justify your answer. What do other
    scripting languages you know do?

</div>


================================================
FILE: book/hash-tables.md
================================================
> Hash, x. There is no definition for this word -- nobody knows what hash is.
>
> <cite>Ambrose Bierce, <em>The Unabridged Devil's Dictionary</em></cite>

Before we can add variables to our burgeoning virtual machine, we need some way
to look up a value given a variable's name. Later, when we add classes, we'll
also need a way to store fields on instances. The perfect data structure for
these problems and others is a hash table.

You probably already know what a hash table is, even if you don't know it by
that name. If you're a Java programmer, you call them "HashMaps". C# and Python
users call them "dictionaries". In C++, it's an "unordered map". "Objects" in
JavaScript and "tables" in Lua are hash tables under the hood, which is what
gives them their flexibility.

A hash table, whatever your language calls it, associates a set of **keys** with
a set of **values**. Each key/value pair is an **entry** in the table. Given a
key, you can look up its corresponding value. You can add new key/value pairs
and remove entries by key. If you add a new value for an existing key, it
replaces the previous entry.

Hash tables appear in so many languages because they are incredibly powerful.
Much of this power comes from one metric: given a key, a hash table returns the
corresponding value in <span name="constant">constant time</span>, *regardless
of how many keys are in the hash table*.

<aside name="constant">

More specifically, the *average-case* lookup time is constant. Worst-case
performance can be, well, worse. In practice, it's easy to avoid degenerate
behavior and stay on the happy path.

</aside>

That's pretty remarkable when you think about it. Imagine you've got a big stack
of business cards and I ask you to find a certain person. The bigger the pile
is, the longer it will take. Even if the pile is nicely sorted and you've got
the manual dexterity to do a binary search by hand, you're still talking
*O(log n)*. But with a <span name="rolodex">hash table</span>, it takes the
same time to find that business card when the stack has ten cards as when it has
a million.

<aside name="rolodex">

Stuff all those cards in a Rolodex -- does anyone even remember those things
anymore? -- with dividers for each letter, and you improve your speed
dramatically. As we'll see, that's not too far from the trick a hash table uses.

</aside>

## An Array of Buckets

A complete, fast hash table has a couple of moving parts. I'll introduce them
one at a time by working through a couple of toy problems and their solutions.
Eventually, we'll build up to a data structure that can associate any set of
names with their values.

For now, imagine if Lox was a *lot* more restricted in variable names. What if a
variable's name could only be a <span name="basic">single</span> lowercase
letter. How could we very efficiently represent a set of variable names and
their values?

<aside name="basic">

This limitation isn't *too* far-fetched. The initial versions of BASIC out of
Dartmouth allowed variable names to be only a single letter followed by one
optional digit.

</aside>

With only 26 possible variables (27 if you consider underscore a "letter", I
guess), the answer is easy. Declare a fixed-size array with 26 elements. We'll
follow tradition and call each element a **bucket**. Each represents a variable
with `a` starting at index zero. If there's a value in the array at some
letter's index, then that key is present with that value. Otherwise, the bucket
is empty and that key/value pair isn't in the data structure.

<aside name="bucket">

<img src="image/hash-tables/bucket-array.png" alt="A row of buckets, each
labeled with a letter of the alphabet." />

</aside>

Memory usage is great -- just a single, reasonably sized <span
name="bucket">array</span>. There's some waste from the empty buckets, but it's
not huge. There's no overhead for node pointers, padding, or other stuff you'd
get with something like a linked list or tree.

Performance is even better. Given a variable name -- its character -- you can
subtract the ASCII value of `a` and use the result to index directly into the
array. Then you can either look up the existing value or store a new value
directly into that slot. It doesn't get much faster than that.

This is sort of our Platonic ideal data structure. Lightning fast, dead simple,
and compact in memory. As we add support for more complex keys, we'll have to
make some concessions, but this is what we're aiming for. Even once you add in
hash functions, dynamic resizing, and collision resolution, this is still the
core of every hash table out there -- a contiguous array of buckets that you
index directly into.

### Load factor and wrapped keys

Confining Lox to single-letter variables would make our job as implementers
easier, but it's probably no fun programming in a language that gives you only
26 storage locations. What if we loosened it a little and allowed variables up
to <span name="six">eight</span> characters long?

<aside name="six">

Again, this restriction isn't so crazy. Early linkers for C treated only the
first six characters of external identifiers as meaningful. Everything after
that was ignored. If you've ever wondered why the C standard library is so
enamored of abbreviation -- looking at you, `strncmp()` -- it turns out it
wasn't entirely because of the small screens (or teletypes!) of the day.

</aside>

That's small enough that we can pack all eight characters into a 64-bit integer
and easily turn the string into a number. We can then use it as an array index.
Or, at least, we could if we could somehow allocate a 295,148 *petabyte* array.
Memory's gotten cheaper over time, but not quite *that* cheap. Even if we could
make an array that big, it would be heinously wasteful. Almost every bucket
would be empty unless users started writing way bigger Lox programs than we've
anticipated.

Even though our variable keys cover the full 64-bit numeric range, we clearly
don't need an array that large. Instead, we allocate an array with more than
enough capacity for the entries we need, but not unreasonably large. We map the
full 64-bit keys down to that smaller range by taking the value modulo the size
of the array. Doing that essentially folds the larger numeric range onto itself
until it fits the smaller range of array elements.

For example, say we want to store "bagel". We allocate an array with eight
elements, plenty enough to store it and more later. We treat the key string as a
64-bit integer. On a little-endian machine like Intel, packing those characters
into a 64-bit word puts the first letter, "b" (ASCII value 98), in the
least-significant byte. We take that integer modulo the array size (<span
name="power-of-two">8</span>) to fit it in the bounds and get a bucket index, 2.
Then we store the value there as usual.

<aside name="power-of-two">

I'm using powers of two for the array sizes here, but they don't need to be.
Some styles of hash tables work best with powers of two, including the one we'll
build in this book. Others prefer prime number array sizes or have other rules.

</aside>

Using the array size as a modulus lets us map the key's numeric range down to
fit an array of any size. We can thus control the number of buckets
independently of the key range. That solves our waste problem, but introduces a
new one. Any two variables whose key number has the same remainder when divided
by the array size will end up in the same bucket. Keys can **collide**. For
example, if we try to add "jam", it also ends up in bucket 2.

<img src="image/hash-tables/collision.png" alt="'Bagel' and 'jam' both end up in bucket index 2." />

We have some control over this by tuning the array size. The bigger the array,
the fewer the indexes that get mapped to the same bucket and the fewer the
collisions that are likely to occur. Hash table implementers track this
collision likelihood by measuring the table's **load factor**. It's defined as
the number of entries divided by the number of buckets. So a hash table with
five entries and an array of 16 elements has a load factor of 0.3125. The higher
the load factor, the greater the chance of collisions.

One way we mitigate collisions is by resizing the array. Just like the dynamic
arrays we implemented earlier, we reallocate and grow the hash table's array as
it fills up. Unlike a regular dynamic array, though, we won't wait until the
array is *full*. Instead, we pick a desired load factor and grow the array when
it goes over that.

## Collision Resolution

Even with a very low load factor, collisions can still occur. The [*birthday
paradox*][birthday] tells us that as the number of entries in the hash table
increases, the chance of collision increases very quickly. We can pick a large
array size to reduce that, but it's a losing game. Say we wanted to store a
hundred items in a hash table. To keep the chance of collision below a
still-pretty-high 10%, we need an array with at least 47,015 elements. To get
the chance below 1% requires an array with 492,555 elements, over 4,000 empty
buckets for each one in use.

[birthday]: https://en.wikipedia.org/wiki/Birthday_problem

A low load factor can make collisions <span name="pigeon">rarer</span>, but the
[*pigeonhole principle*][pigeon] tells us we can never eliminate them entirely.
If you've got five pet pigeons and four holes to put them in, at least one hole
is going to end up with more than one pigeon. With 18,446,744,073,709,551,616
different variable names, any reasonably sized array can potentially end up with
multiple keys in the same bucket.

[pigeon]: https://en.wikipedia.org/wiki/Pigeonhole_principle

Thus we still have to handle collisions gracefully when they occur. Users don't
like it when their programming language can look up variables correctly only
*most* of the time.

<aside name="pigeon">

Put these two funny-named mathematical rules together and you get this
observation: Take a birdhouse containing 365 pigeonholes, and use each pigeon's
birthday to assign it to a pigeonhole. You'll need only about 26 randomly chosen
pigeons before you get a greater than 50% chance of two pigeons in the same box.

<img src="image/hash-tables/pigeons.png" alt="Two pigeons in the same hole." />

</aside>

### Separate chaining

Techniques for resolving collisions fall into two broad categories. The first is
**separate chaining**. Instead of each bucket containing a single entry, we let
it contain a collection of them. In the classic implementation, each bucket
points to a linked list of entries. To look up an entry, you find its bucket and
then walk the list until you find an entry with the matching key.

<img src="image/hash-tables/chaining.png" alt="An array with eight buckets. Bucket 2 links to a chain of two nodes. Bucket 5 links to a single node." />

In catastrophically bad cases where every entry collides in the same bucket, the
data structure degrades into a single unsorted linked list with *O(n)* lookup.
In practice, it's easy to avoid that by controlling the load factor and how
entries get scattered across buckets. In typical separate-chained hash tables,
it's rare for a bucket to have more than one or two entries.

Separate chaining is conceptually simple -- it's literally an array of linked
lists. Most operations are straightforward to implement, even deletion which, as
we'll see, can be a pain. But it's not a great fit for modern CPUs. It has a lot
of overhead from pointers and tends to scatter little linked list <span
name="node">nodes</span> around in memory which isn't great for cache usage.

<aside name="node">

There are a few tricks to optimize this. Many implementations store the first
entry right in the bucket so that in the common case where there's only one, no
extra pointer indirection is needed. You can also make each linked list node
store a few entries to reduce the pointer overhead.

</aside>

### Open addressing

The other technique is <span name="open">called</span> **open addressing** or
(confusingly) **closed hashing**. With this technique, all entries live directly
in the bucket array, with one entry per bucket. If two entries collide in the
same bucket, we find a different empty bucket to use instead.

<aside name="open">

It's called "open" addressing because the entry may end up at an address
(bucket) outside of its preferred one. It's called "closed" hashing because all
of the entries stay inside the array of buckets.

</aside>

Storing all entries in a single, big, contiguous array is great for keeping the
memory representation simple and fast. But it makes all of the operations on the
hash table more complex. When inserting an entry, its bucket may be full,
sending us to look at another bucket. That bucket itself may be occupied and so
on. This process of finding an available bucket is called **probing**, and the
order that you examine buckets is a **probe sequence**.

There are a <span name="probe">number</span> of algorithms for determining
which buckets to probe and how to decide which entry goes in which bucket.
There's been a ton of research here because even slight tweaks can have a large
performance impact. And, on a data structure as heavily used as hash tables,
that performance impact touches a very large number of real-world programs
across a range of hardware capabilities.

<aside name="probe">

If you'd like to learn more (and you should, because some of these are really
cool), look into "double hashing", "cuckoo hashing", "Robin Hood hashing", and
anything those lead you to.

</aside>

As usual in this book, we'll pick the simplest one that gets the job done
efficiently. That's good old **linear probing**. When looking for an entry, we
look in the first bucket its key maps to. If it's not in there, we look in the
very next element in the array, and so on. If we reach the end, we wrap back
around to the beginning.

The good thing about linear probing is that it's cache friendly. Since you walk
the array directly in memory order, it keeps the CPU's cache lines full and
happy. The bad thing is that it's prone to **clustering**. If you have a lot of
entries with numerically similar key values, you can end up with a lot of
colliding, overflowing buckets right next to each other.

Compared to separate chaining, open addressing can be harder to wrap your head
around. I think of open addressing as similar to separate chaining except that
the "list" of nodes is threaded through the bucket array itself. Instead of
storing the links between them in pointers, the connections are calculated
implicitly by the order that you look through the buckets.

The tricky part is that more than one of these implicit lists may be interleaved
together. Let's walk through an example that covers all the interesting cases.
We'll ignore values for now and just worry about a set of keys. We start with an
empty array of 8 buckets.

<img src="image/hash-tables/insert-1.png" alt="An array with eight empty buckets." class="wide" />

We decide to insert "bagel". The first letter, "b" (ASCII value 98), modulo the
array size (8) puts it in bucket 2.

<img src="image/hash-tables/insert-2.png" alt="Bagel goes into bucket 2." class="wide" />

Next, we insert "jam". That also wants to go in bucket 2 (106 mod 8 = 2), but
that bucket's taken. We keep probing to the next bucket. It's empty, so we put
it there.

<img src="image/hash-tables/insert-3.png" alt="Jam goes into bucket 3, since 2 is full." class="wide" />

We insert "fruit", which happily lands in bucket 6.

<img src="image/hash-tables/insert-4.png" alt="Fruit goes into bucket 6." class="wide" />

Likewise, "migas" can go in its preferred bucket 5.

<img src="image/hash-tables/insert-5.png" alt="Migas goes into bucket 5." class="wide" />

When we try to insert "eggs", it also wants to be in bucket 5. That's full, so we
skip to 6. Bucket 6 is also full. Note that the entry in there is *not* part of
the same probe sequence. "Fruit" is in its preferred bucket, 6. So the 5 and 6
sequences have collided and are interleaved. We skip over that and finally put
"eggs" in bucket 7.

<img src="image/hash-tables/insert-6.png" alt="Eggs goes into bucket 7 because 5 and 6 are full." class="wide" />

We run into a similar problem with "nuts". It can't land in 6 like it wants to.
Nor can it go into 7. So we keep going. But we've reached the end of the array,
so we wrap back around to 0 and put it there.

<img src="image/hash-tables/insert-7.png" alt="Nuts wraps around to bucket 0 because 6 and 7 are full." class="wide" />

In practice, the interleaving turns out to not be much of a problem. Even in
separate chaining, we need to walk the list to check each entry's key because
multiple keys can reduce to the same bucket. With open addressing, we need to do
that same check, and that also covers the case where you are stepping over
entries that "belong" to a different original bucket.

## Hash Functions

We can now build ourselves a reasonably efficient table for storing variable
names up to eight characters long, but that limitation is still annoying. In
order to relax the last constraint, we need a way to take a string of any length
and convert it to a fixed-size integer.

Finally, we get to the "hash" part of "hash table". A **hash function** takes
some larger blob of data and "hashes" it to produce a fixed-size integer **hash
code** whose value depends on all of the bits of the original data. A <span
name="crypto">good</span> hash function has three main goals:

<aside name="crypto">

Hash functions are also used for cryptography. In that domain, "good" has a
*much* more stringent definition to avoid exposing details about the data being
hashed. We, thankfully, don't need to worry about those concerns for this book.

</aside>

*   **It must be *deterministic*.** The same input must always hash to the same
    number. If the same variable ends up in different buckets at different
    points in time, it's gonna get really hard to find it.

*   **It must be *uniform*.** Given a typical set of inputs, it should produce a
    wide and evenly distributed range of output numbers, with as few clumps or
    patterns as possible. We want it to <span name="scatter">scatter</span>
    values across the whole numeric range to minimize collisions and clustering.

*   **It must be *fast*.** Every operation on the hash table requires us to hash
    the key first. If hashing is slow, it can potentially cancel out the speed
    of the underlying array storage.

<aside name="scatter">

One of the original names for a hash table was "scatter table" because it takes
the entries and scatters them throughout the array. The word "hash" came from
the idea that a hash function takes the input data, chops it up, and tosses it
all together into a pile to come up with a single number from all of those bits.

</aside>

There is a veritable pile of hash functions out there. Some are old and
optimized for architectures no one uses anymore. Some are designed to be fast,
others cryptographically secure. Some take advantage of vector instructions and
cache sizes for specific chips, others aim to maximize portability.

There are people out there for whom designing and evaluating hash functions is,
like, their *jam*. I admire them, but I'm not mathematically astute enough to
*be* one. So for clox, I picked a simple, well-worn hash function called
[FNV-1a][] that's served me fine over the years. Consider <span
name="thing">trying</span> out different ones in your code and see if they make
a difference.

[fnv-1a]: http://www.isthe.com/chongo/tech/comp/fnv/

<aside name="thing">

Who knows, maybe hash functions could turn out to be your thing too?

</aside>

OK, that's a quick run through of buckets, load factors, open addressing,
collision resolution, and hash functions. That's an awful lot of text and not a
lot of real code. Don't worry if it still seems vague. Once we're done coding it
up, it will all click into place.

## Building a Hash Table

The great thing about hash tables compared to other classic techniques like
balanced search trees is that the actual data structure is so simple. Ours goes
into a new module.

^code table-h

A hash table is an array of entries. As in our dynamic array earlier, we keep
track of both the allocated size of the array (`capacity`) and the number of
key/value pairs currently stored in it (`count`). The ratio of count to capacity
is exactly the load factor of the hash table.

Each entry is one of these:

^code entry (1 before, 2 after)

It's a simple key/value pair. Since the key is always a <span
name="string">string</span>, we store the ObjString pointer directly instead of
wrapping it in a Value. It's a little faster and smaller this way.

<aside name="string">

In clox, we only need to support keys that are strings. Handling other types of
keys doesn't add much complexity. As long as you can compare two objects for
equality and reduce them to sequences of bits, it's easy to use them as hash
keys.

</aside>

To create a new, empty hash table, we declare a constructor-like function.

^code init-table-h (2 before, 2 after)

We need a new implementation file to define that. While we're at it, let's get
all of the pesky includes out of the way.

^code table-c

As in our dynamic value array type, a hash table initially starts with zero
capacity and a `NULL` array. We don't allocate anything until needed. Assuming
we do eventually allocate something, we need to be able to free it too.

^code free-table-h (1 before, 2 after)

And its glorious implementation:

^code free-table

Again, it looks just like a dynamic array. In fact, you can think of a hash
table as basically a dynamic array with a really strange policy for inserting
items. We don't need to check for `NULL` here since `FREE_ARRAY()` already
handles that gracefully.

### Hashing strings

Before we can start putting entries in the table, we need to, well, hash them.
To ensure that the entries get distributed uniformly throughout the array, we
want a good hash function that looks at all of the bits of the key string. If it
looked at, say, only the first few characters, then a series of strings that all
shared the same prefix would end up colliding in the same bucket.

On the other hand, walking the entire string to calculate the hash is kind of
slow. We'd lose some of the performance benefit of the hash table if we had to
walk the string every time we looked for a key in the table. So we'll do the
obvious thing: cache it.

Over in the "object" module in ObjString, we add:

^code obj-string-hash (1 before, 1 after)

Each ObjString stores the hash code for its string. Since strings are immutable
in Lox, we can calculate the hash code once up front and be certain that it will
never get invalidated. Caching it eagerly makes a kind of sense: allocating the
string and copying its characters over is already an *O(n)* operation, so it's a
good time to also do the *O(n)* calculation of the string's hash.

Whenever we call the internal function to allocate a string, we pass in its
hash code.

^code allocate-string (1 after)

That function simply stores the hash in the struct.

^code allocate-store-hash (1 before, 2 after)

The fun happens over at the callers. `allocateString()` is called from two
places: the function that copies a string and the one that takes ownership of an
existing dynamically allocated string. We'll start with the first.

^code copy-string-hash (1 before, 1 after)

No magic here. We calculate the hash code and then pass it along.

^code copy-string-allocate (2 before, 1 after)

The other string function is similar.

^code take-string-hash (1 before, 1 after)

The interesting code is over here:

^code hash-string

This is the actual bona fide "hash function" in clox. The algorithm is called
"FNV-1a", and is the shortest decent hash function I know. Brevity is certainly
a virtue in a book that aims to show you every line of code.

The basic idea is pretty simple, and many hash functions follow the same
pattern. You start with some initial hash value, usually a constant with certain
carefully chosen mathematical properties. Then you walk the data to be hashed.
For each byte (or sometimes word), you mix the bits into the hash value somehow,
and then scramble the resulting bits around some.

What it means to "mix" and "scramble" can get pretty sophisticated. Ultimately,
though, the basic goal is *uniformity* -- we want the resulting hash values to
be as widely scattered around the numeric range as possible to avoid collisions
and clustering.

### Inserting entries

Now that string objects know their hash code, we can start putting them into
hash tables.

^code table-set-h (1 before, 2 after)

This function adds the given key/value pair to the given hash table. If an entry
for that key is already present, the new value overwrites the old value. The
function returns `true` if a new entry was added. Here's the implementation:

^code table-set

Most of the interesting logic is in `findEntry()` which we'll get to soon. That
function's job is to take a key and figure out which bucket in the array it
should go in. It returns a pointer to that bucket -- the address of the Entry in
the array.

Once we have a bucket, inserting is straightforward. We update the hash table's
size, taking care to not increase the count if we overwrote the value for an
already-present key. Then we copy the key and value into the corresponding
fields in the Entry.

We're missing a little something here, though. We haven't actually allocated the
Entry array yet. Oops! Before we can insert anything, we need to make sure we
have an array, and that it's big enough.

^code table-set-grow (1 before, 1 after)

This is similar to the code we wrote a while back for growing a dynamic array.
If we don't have enough capacity to insert an item, we reallocate and grow the
array. The `GROW_CAPACITY()` macro takes an existing capacity and grows it by
a multiple to ensure that we get amortized constant performance over a series
of inserts.

The interesting difference here is that `TABLE_MAX_LOAD` constant.

^code max-load (2 before, 1 after)

This is how we manage the table's <span name="75">load</span> factor. We don't
grow when the capacity is completely full. Instead, we grow the array before
then, when the array becomes at least 75% full.

<aside name="75">

Ideal max load factor varies based on the hash function, collision-handling
strategy, and typical keysets you'll see. Since a toy language like Lox doesn't
have "real world" data sets, it's hard to optimize this, and I picked 75%
somewhat arbitrarily. When you build your own hash tables, benchmark and tune
this.

</aside>

We'll get to the implementation of `adjustCapacity()` soon. First, let's look
at that `findEntry()` function you've been wondering about.

^code find-entry

This function is the real core of the hash table. It's responsible for taking a
key and an array of buckets, and figuring out which bucket the entry belongs in.
This function is also where linear probing and collision handling come into
play. We'll use `findEntry()` both to look up existing entries in the hash
table and to decide where to insert new ones.

For all that, there isn't much to it. First, we use modulo to map the key's hash
code to an index within the array's bounds. That gives us a bucket index where,
ideally, we'll be able to find or place the entry.

There are a few cases to check for:

*   If the key for the Entry at that array index is `NULL`, then the bucket is
    empty. If we're using `findEntry()` to look up something in the hash table,
    this means it isn't there. If we're using it to insert, it means we've found
    a place to add the new entry.

*   If the key in the bucket is <span name="equal">equal</span> to the key we're
    looking for, then that key is already present in the table. If we're doing a
    lookup, that's good -- we've found the key we seek. If we're doing an insert,
    this means we'll be replacing the value for that key instead of adding a new
    entry.

<aside name="equal">

It looks like we're using `==` to see if two strings are equal. That doesn't
work, does it? There could be two copies of the same string at different places
in memory. Fear not, astute reader. We'll solve this further on. And, strangely
enough, it's a hash table that provides the tool we need.

</aside>

*   Otherwise, the bucket has an entry in it, but with a different key. This is
    a collision. In that case, we start probing. That's what that `for` loop
    does. We start at the bucket where the entry would ideally go. If that
    bucket is empty or has the same key, we're done. Otherwise, we advance to
    the next element -- this is the *linear* part of "linear probing" -- and
    check there. If we go past the end of the array, that second modulo operator
    wraps us back around to the beginning.

We exit the loop when we find either an empty bucket or a bucket with the same
key as the one we're looking for. You might be wondering about an infinite loop.
What if we collide with *every* bucket? Fortunately, that can't happen thanks to
our load factor. Because we grow the array as soon as it gets close to being
full, we know there will always be empty buckets.

We return directly from within the loop, yielding a pointer to the found Entry
so the caller can either insert something into it or read from it. Way back in
`tableSet()`, the function that first kicked this off, we store the new entry in
that returned bucket and we're done.

### Allocating and resizing

Before we can put entries in the hash table, we do need a place to actually
store them. We need to allocate an array of buckets. That happens in this
function:

^code table-adjust-capacity

We create a bucket array with `capacity` entries. After we allocate the array,
we initialize every element to be an empty bucket and then store the array (and
its capacity) in the hash table's main struct. This code is fine for when we
insert the very first entry into the table, and we require the first allocation
of the array. But what about when we already have one and we need to grow it?

Back when we were doing a dynamic array, we could just use `realloc()` and let
the C standard library copy everything over. That doesn't work for a hash table.
Remember that to choose the bucket for each entry, we take its hash key *modulo
the array size*. That means that when the array size changes, entries may end up
in different buckets.

Those new buckets may have new collisions that we need to deal with. So the
simplest way to get every entry where it belongs is to rebuild the table from
scratch by re-inserting every entry into the new empty array.

^code re-hash (2 before, 2 after)

We walk through the old array front to back. Any time we find a non-empty
bucket, we insert that entry into the new array. We use `findEntry()`, passing
in the *new* array instead of the one currently stored in the Table. (This is
why `findEntry()` takes a pointer directly to an Entry array and not the whole
`Table` struct. That way, we can pass the new array and capacity before we've
stored those in the struct.)

After that's done, we can release the memory for the old array.

^code free-old-array (3 before, 1 after)

With that, we have a hash table that we can stuff as many entries into as we
like. It handles overwriting existing keys and growing itself as needed to
maintain the desired load capacity.

While we're at it, let's also define a helper function for copying all of the
entries of one hash table into another.

^code table-add-all-h (1 before, 2 after)

We won't need this until much later when we support method inheritance, but we
may as well implement it now while we've got all the hash table stuff fresh in
our minds.

^code table-add-all

There's not much to say about this. It walks the bucket array of the source hash
table. Whenever it finds a non-empty bucket, it adds the entry to the
destination hash table using the `tableSet()` function we recently defined.

### Retrieving values

Now that our hash table contains some stuff, let's start pulling things back
out. Given a key, we can look up the corresponding value, if there is one, with
this function:

^code table-get-h (1 before, 1 after)

You pass in a table and a key. If it finds an entry with that key, it returns
`true`, otherwise it returns `false`. If the entry exists, the `value` output
parameter points to the resulting value.

Since `findEntry()` already does the hard work, the implementation isn't bad.

^code table-get

If the table is completely empty, we definitely won't find the entry, so we
check for that first. This isn't just an optimization -- it also ensures that we
don't try to access the bucket array when the array is `NULL`. Otherwise, we let
`findEntry()` work its magic. That returns a pointer to a bucket. If the bucket
is empty, which we detect by seeing if the key is `NULL`, then we didn't find an
Entry with our key. If `findEntry()` does return a non-empty Entry, then that's
our match. We take the Entry's value and copy it to the output parameter so the
caller can get it. Piece of cake.

### Deleting entries

There is one more fundamental operation a full-featured hash table needs to
support: removing an entry. This seems pretty obvious, if you can add things,
you should be able to *un*-add them, right? But you'd be surprised how many
tutorials on hash tables omit this.

I could have taken that route too. In fact, we use deletion in clox only in a
tiny edge case in the VM. But if you want to actually understand how to
completely implement a hash table, this feels important. I can sympathize with
their desire to overlook it. As we'll see, deleting from a hash table that uses
<span name="delete">open</span> addressing is tricky.

<aside name="delete">

With separate chaining, deleting is as easy as removing a node from a linked
list.

</aside>

At least the declaration is simple.

^code table-delete-h (1 before, 1 after)

The obvious approach is to mirror insertion. Use `findEntry()` to look up the
entry's bucket. Then clear out the bucket. Done!

In cases where there are no collisions, that works fine. But if a collision has
occurred, then the bucket where the entry lives may be part of one or more
implicit probe sequences. For example, here's a hash table containing three keys
all with the same preferred bucket, 2:

<img src="image/hash-tables/delete-1.png" alt="A hash table containing 'bagel' in bucket 2, 'biscuit' in bucket 3, and 'jam' in bucket 4." />

Remember that when we're walking a probe sequence to find an entry, we know
we've reached the end of a sequence and that the entry isn't present when we hit
an empty bucket. It's like the probe sequence is a list of entries and an empty
entry terminates that list.

If we delete "biscuit" by simply clearing the Entry, then we break that probe
sequence in the middle, leaving the trailing entries orphaned and unreachable.
Sort of like removing a node from a linked list without relinking the pointer
from the previous node to the next one.

If we later try to look for "jam", we'd start at "bagel", stop at the next
empty Entry, and never find it.

<img src="image/hash-tables/delete-2.png" alt="The 'biscuit' entry has been deleted from the hash table, breaking the chain." />

To solve this, most implementations use a trick called <span
name="tombstone">**tombstones**</span>. Instead of clearing the entry on
deletion, we replace it with a special sentinel entry called a "tombstone". When
we are following a probe sequence during a lookup, and we hit a tombstone, we
*don't* treat it like an empty slot and stop iterating. Instead, we keep going
so that deleting an entry doesn't break any implicit collision chains and we can
still find entries after it.

<img src="image/hash-tables/delete-3.png" alt="Instead of deleting 'biscuit', it's replaced with a tombstone." />

The code looks like this:

^code table-delete

First, we find the bucket containing the entry we want to delete. (If we don't
find it, there's nothing to delete, so we bail out.) We replace the entry with a
tombstone. In clox, we use a `NULL` key and a `true` value to represent that,
but any representation that can't be confused with an empty bucket or a valid
entry works.

<aside name="tombstone">

<img src="image/hash-tables/tombstone.png" alt="A tombstone enscribed 'Here lies entry biscuit &rarr; 3.75, gone but not deleted'." />

</aside>

That's all we need to do to delete an entry. Simple and fast. But all of the
other operations need to correctly handle tombstones too. A tombstone is a sort
of "half" entry. It has some of the characteristics of a present entry, and some
of the characteristics of an empty one.

When we are following a probe sequence during a lookup, and we hit a tombstone,
we note it and keep going.

^code find-tombstone (2 before, 2 after)

The first time we pass a tombstone, we store it in this local variable:

^code find-entry-tombstone (1 before, 1 after)

If we reach a truly empty entry, then the key isn't present. In that case, if we
have passed a tombstone, we return its bucket instead of the later empty one. If
we're calling `findEntry()` in order to insert a node, that lets us treat the
tombstone bucket as empty and reuse it for the new entry.

Reusing tombstone slots automatically like this helps reduce the number of
tombstones wasting space in the bucket array. In typical use cases where there
is a mixture of insertions and deletions, the number of tombstones grows for a
while and then tends to stabilize.

Even so, there's no guarantee that a large number of deletes won't cause the
array to be full of tombstones. In the very worst case, we could end up with
*no* empty buckets. That would be bad because, remember, the only thing
preventing an infinite loop in `findEntry()` is the assumption that we'll
eventually hit an empty bucket.

So we need to be thoughtful about how tombstones interact with the table's load
factor and resizing. The key question is, when calculating the load factor,
should we treat tombstones like full buckets or empty ones?

### Counting tombstones

If we treat tombstones like full buckets, then we may end up with a bigger array
than we probably need because it artificially inflates the load factor. There
are tombstones we could reuse, but they aren't treated as unused so we end up
growing the array prematurely.

But if we treat tombstones like empty buckets and *don't* include them in the
load factor, then we run the risk of ending up with *no* actual empty buckets to
terminate a lookup. An infinite loop is a much worse problem than a few extra
array slots, so for load factor, we consider tombstones to be full buckets.

That's why we don't reduce the count when deleting an entry in the previous
code. The count is no longer the number of entries in the hash table, it's the
number of entries plus tombstones. That implies that we increment the count
during insertion only if the new entry goes into an entirely empty bucket.

^code set-increment-count (1 before, 2 after)

If we are replacing a tombstone with a new entry, the bucket has already been
accounted for and the count doesn't change.

When we resize the array, we allocate a new array and re-insert all of the
existing entries into it. During that process, we *don't* copy the tombstones
over. They don't add any value since we're rebuilding the probe sequences
anyway, and would just slow down lookups. That means we need to recalculate the
count since it may change during a resize. So we clear it out:

^code resize-init-count (2 before, 1 after)

Then each time we find a non-tombstone entry, we increment it.

^code resize-increment-count (1 before, 1 after)

This means that when we grow the capacity, we may end up with *fewer* entries in
the resulting larger array because all of the tombstones get discarded. That's a
little wasteful, but not a huge practical problem.

I find it interesting that much of the work to support deleting entries is in
`findEntry()` and `adjustCapacity()`. The actual delete logic is quite simple
and fast. In practice, deletions tend to be rare, so you'd expect a hash table
to do as much work as it can in the delete function and leave the other
functions alone to keep them faster. With our tombstone approach, deletes are
fast, but lookups get penalized.

I did a little benchmarking to test this out in a few different deletion
scenarios. I was surprised to discover that tombstones did end up being faster
overall compared to doing all the work during deletion to reinsert the affected
entries.

But if you think about it, it's not that the tombstone approach pushes the work
of fully deleting an entry to other operations, it's more that it makes deleting
*lazy*. At first, it does the minimal work to turn the entry into a tombstone.
That can cause a penalty when later lookups have to skip over it. But it also
allows that tombstone bucket to be reused by a later insert too. That reuse is a
very efficient way to avoid the cost of rearranging all of the following
affected entries. You basically recycle a node in the chain of probed entries.
It's a neat trick.

## String Interning

We've got ourselves a hash table that mostly works, though it has a critical
flaw in its center. Also, we aren't using it for anything yet. It's time to
address both of those and, in the process, learn a classic technique used by
interpreters.

The reason the hash table doesn't totally work is that when `findEntry()` checks
to see if an existing key matches the one it's looking for, it uses `==` to
compare two strings for equality. That only returns true if the two keys are the
exact same string in memory. Two separate strings with the same characters
should be considered equal, but aren't.

Remember, back when we added strings in the last chapter, we added [explicit
support to compare the strings character-by-character][equals] in order to get
true value equality. We could do that in `findEntry()`, but that's <span
name="hash-collision">slow</span>.

[equals]: strings.html#operations-on-strings

<aside name="hash-collision">

In practice, we would first compare the hash codes of the two strings. That
quickly detects almost all different strings -- it wouldn't be a very good hash
function if it didn't. But when the two hashes are the same, we still have to
compare characters to make sure we didn't have a hash collision on different
strings.

</aside>

Instead, we'll use a technique called **string interning**. The core problem is
that it's possible to have different strings in memory with the same characters.
Those need to behave like equivalent values even though they are distinct
objects. They're essentially duplicates, and we have to compare all of their
bytes to detect that.

<span name="intern">String interning</span> is a process of deduplication. We
create a collection of "interned" strings. Any string in that collection is
guaranteed to be textually distinct from all others. When you intern a string,
you look for a matching string in the collection. If found, you use that
original one. Otherwise, the string you have is unique, so you add it to the
collection.

<aside name="intern">

I'm guessing "intern" is short for "internal". I think the idea is that the
language's runtime keeps its own "internal" collection of these strings, whereas
other strings could be user created and floating around in memory. When you
intern a string, you ask the runtime to add the string to that internal
collection and return a pointer to it.

Languages vary in how much string interning they do and how it's exposed to the
user. Lua interns *all* strings, which is what clox will do too. Lisp, Scheme,
Smalltalk, Ruby and others have a separate string-like type called "symbol" that
is implicitly interned. (This is why they say symbols are "faster" in Ruby.)
Java interns constant strings by default, and provides an API to let you
explicitly intern any string you give it.

</aside>

In this way, you know that each sequence of characters is represented by only
one string in memory. This makes value equality trivial. If two strings point
to the same address in memory, they are obviously the same string and must be
equal. And, because we know strings are unique, if two strings point to
different addresses, they must be distinct strings.

Thus, pointer equality exactly matches value equality. Which in turn means that
our existing `==` in `findEntry()` does the right thing. Or, at least, it will
once we intern all the strings. In order to reliably deduplicate all strings,
the VM needs to be able to find every string that's created. We do that by
giving it a hash table to store them all.

^code vm-strings (1 before, 1 after)

As usual, we need an include.

^code vm-include-table (1 before, 1 after)

When we spin up a new VM, the string table is empty.

^code init-strings (1 before, 1 after)

And when we shut down the VM, we clean up any resources used by the table.

^code free-strings (1 before, 1 after)

Some languages have a separate type or an explicit step to intern a string. For
clox, we'll automatically intern every one. That means whenever we create a new
unique string, we add it to the table.

^code allocate-store-string (1 before, 1 after)

We're using the table more like a hash *set* than a hash *table*. The keys are
the strings and those are all we care about, so we just use `nil` for the
values.

This gets a string into the table assuming that it's unique, but we need to
actually check for duplication before we get here. We do that in the two
higher-level functions that call `allocateString()`. Here's one:

^code copy-string-intern (1 before, 1 after)

When copying a string into a new LoxString, we look it up in the string table
first. If we find it, instead of "copying", we just return a reference to that
string. Otherwise, we fall through, allocate a new string, and store it in the
string table.

Taking ownership of a string is a little different.

^code take-string-intern (1 before, 1 after)

Again, we look up the string in the string table first. If we find it, before we
return it, we free the memory for the string that was passed in. Since ownership
is being passed to this function and we no longer need the duplicate string,
it's up to us to free it.

Before we get to the new function we need to write, there's one more include.

^code object-include-table (1 before, 1 after)

To look for a string in the table, we can't use the normal `tableGet()` function
because that calls `findEntry()`, which has the exact problem with duplicate
strings that we're trying to fix right now. Instead, we use this new function:

^code table-find-string-h (1 before, 2 after)

The implementation looks like so:

^code table-find-string

It appears we have copy-pasted `findEntry()`. There is a lot of redundancy, but
also a couple of key differences. First, we pass in the raw character array of
the key we're looking for instead of an ObjString. At the point that we call
this, we haven't created an ObjString yet.

Second, when checking to see if we found the key, we look at the actual strings.
We first see if they have matching lengths and hashes. Those are quick to check
and if they aren't equal, the strings definitely aren't the same.

If there is a hash collision, we do an actual character-by-character string
comparison. This is the one place in the VM where we actually test strings for
textual equality. We do it here to deduplicate strings and then the rest of the
VM can take for granted that any two strings at different addresses in memory
must have different contents.

In fact, now that we've interned all the strings, we can take advantage of it in
the bytecode interpreter. When a user does `==` on two objects that happen to be
strings, we don't need to test the characters any more.

^code equal (1 before, 1 after)

We've added a little overhead when creating strings to intern them. But in
return, at runtime, the equality operator on strings is much faster. With that,
we have a full-featured hash table ready for us to use for tracking variables,
instances, or any other key-value pairs that might show up.

We also sped up testing strings for equality. This is nice for when the user
does `==` on strings. But it's even more critical in a dynamically typed
language like Lox where method calls and instance fields are looked up by name
at runtime. If testing a string for equality is slow, then that means looking up
a method by name is slow. And if *that's* slow in your object-oriented language,
then *everything* is slow.

<div class="challenges">

## Challenges

1.  In clox, we happen to only need keys that are strings, so the hash table we
    built is hardcoded for that key type. If we exposed hash tables to Lox users
    as a first-class collection, it would be useful to support different kinds
    of keys.

    Add support for keys of the other primitive types: numbers, Booleans, and
    `nil`. Later, clox will support user-defined classes. If we want to support
    keys that are instances of those classes, what kind of complexity does that
    add?

1.  Hash tables have a lot of knobs you can tweak that affect their performance.
    You decide whether to use separate chaining or open addressing. Depending on
    which fork in that road you take, you can tune how many entries are stored
    in each node, or the probing strategy you use. You control the hash
    function, load factor, and growth rate.

    All of this variety wasn't created just to give CS doctoral candidates
    something to <span name="publish">publish</span> theses on: each has its
    uses in the many varied domains and hardware scenarios where hashing comes
    into play. Look up a few hash table implementations in different open source
    systems, research the choices they made, and try to figure out why they did
    things that way.

    <aside name="publish">

    Well, at least that wasn't the *only* reason they were created. Whether that
    was the *main* reason is up for debate.

    </aside>

1.  Benchmarking a hash table is notoriously difficult. A hash table
    implementation may perform well with some keysets and poorly with others. It
    may work well at small sizes but degrade as it grows, or vice versa. It may
    choke when deletions are common, but fly when they aren't. Creating
    benchmarks that accurately represent how your users will use the hash table
    is a challenge.

    Write a handful of different benchmark programs to validate our hash table
    implementation. How does the performance vary between them? Why did you
    choose the specific test cases you chose?

</div>


================================================
FILE: book/index.md
================================================
This text is not used. All of the content is in the index.html template.


================================================
FILE: book/inheritance.md
================================================
> Once we were blobs in the sea, and then fishes, and then lizards and rats and
> then monkeys, and hundreds of things in between. This hand was once a fin,
> this hand once had claws! In my human mouth I have the pointy teeth of a wolf
> and the chisel teeth of a rabbit and the grinding teeth of a cow! Our blood is
> as salty as the sea we used to live in! When we're frightened, the hair on our
> skin stands up, just like it did when we had fur. We are history! Everything
> we've ever been on the way to becoming us, we still are.
>
> <cite>Terry Pratchett, <em>A Hat Full of Sky</em></cite>

Can you believe it? We've reached the last chapter of [Part II][]. We're almost
done with our first Lox interpreter. The [previous chapter][] was a big ball of
intertwined object-orientation features. I couldn't separate those from each
other, but I did manage to untangle one piece. In this chapter, we'll finish
off Lox's class support by adding inheritance.

[part ii]: a-tree-walk-interpreter.html
[previous chapter]: classes.html

Inheritance appears in object-oriented languages all the way back to the <span
name="inherited">first</span> one, [Simula][]. Early on, Kristen Nygaard and
Ole-Johan Dahl noticed commonalities across classes in the simulation programs
they wrote. Inheritance gave them a way to reuse the code for those similar
parts.

[simula]: https://en.wikipedia.org/wiki/Simula

<aside name="inherited">

You could say all those other languages *inherited* it from Simula. Hey-ooo!
I'll, uh, see myself out.

</aside>

## Superclasses and Subclasses

Given that the concept is "inheritance", you would hope they would pick a
consistent metaphor and call them "parent" and "child" classes, but that would
be too easy. Way back when, C. A. R. Hoare coined the term "<span
name="subclass">subclass</span>" to refer to a record type that refines another
type. Simula borrowed that term to refer to a *class* that inherits from
another. I don't think it was until Smalltalk came along that someone flipped
the Latin prefix to get "superclass" to refer to the other side of the
relationship. From C++, you also hear "base" and "derived" classes. I'll mostly
stick with "superclass" and "subclass".

<aside name="subclass">

"Super-" and "sub-" mean "above" and "below" in Latin, respectively. Picture an
inheritance tree like a family tree with the root at the top -- subclasses are
below their superclasses on the diagram. More generally, "sub-" refers to things
that refine or are contained by some more general concept. In zoology, a
subclass is a finer categorization of a larger class of living things.

In set theory, a subset is contained by a larger superset which has all of the
elements of the subset and possibly more. Set theory and programming languages
meet each other in type theory. There, you have "supertypes" and "subtypes".

In statically typed object-oriented languages, a subclass is also often a
subtype of its superclass. Say we have a Doughnut superclass and a BostonCream
subclass. Every BostonCream is also an instance of Doughnut, but there may be
doughnut objects that are not BostonCreams (like Crullers).

Think of a type as the set of all values of that type. The set of all Doughnut
instances contains the set of all BostonCream instances since every BostonCream
is also a Doughnut. So BostonCream is a subclass, and a subtype, and its
instances are a subset. It all lines up.

<img src="image/inheritance/doughnuts.png" alt="Boston cream &lt;: doughnut." />

</aside>

Our first step towards supporting inheritance in Lox is a way to specify a
superclass when declaring a class. There's a lot of variety in syntax for this.
C++ and C# place a `:` after the subclass's name, followed by the superclass
name. Java uses `extends` instead of the colon. Python puts the superclass(es)
in parentheses after the class name. Simula puts the superclass's name *before*
the `class` keyword.

This late in the game, I'd rather not add a new reserved word or token to the
lexer. We don't have `extends` or even `:`, so we'll follow Ruby and use a
less-than sign (`<`).

```lox
class Doughnut {
  // General doughnut stuff...
}

class BostonCream < Doughnut {
  // Boston Cream-specific stuff...
}
```

To work this into the grammar, we add a new optional clause in our existing
`classDecl` rule.

```ebnf
classDecl      → "class" IDENTIFIER ( "<" IDENTIFIER )?
                 "{" function* "}" ;
```

After the class name, you can have a `<` followed by the superclass's name. The
superclass clause is optional because you don't *have* to have a superclass.
Unlike some other object-oriented languages like Java, Lox has no root "Object"
class that everything inherits from, so when you omit the superclass clause, the
class has *no* superclass, not even an implicit one.

We want to capture this new syntax in the class declaration's AST node.

^code superclass-ast (1 before, 1 after)

You might be surprised that we store the superclass name as an Expr.Variable,
not a Token. The grammar restricts the superclass clause to a single identifier,
but at runtime, that identifier is evaluated as a variable access. Wrapping the
name in an Expr.Variable early on in the parser gives us an object that the
resolver can hang the resolution information off of.

The new parser code follows the grammar directly.

^code parse-superclass (1 before, 1 after)

Once we've (possibly) parsed a superclass declaration, we store it in the AST.

^code construct-class-ast (2 before, 1 after)

If we didn't parse a superclass clause, the superclass expression will be
`null`. We'll have to make sure the later passes check for that. The first of
those is the resolver.

^code resolve-superclass (1 before, 2 after)

The class declaration AST node has a new subexpression, so we traverse into and
resolve that. Since classes are usually declared at the top level, the
superclass name will most likely be a global variable, so this doesn't usually
do anything useful. However, Lox allows class declarations even inside blocks,
so it's possible the superclass name refers to a local variable. In that case,
we need to make sure it's resolved.

Because even well-intentioned programmers sometimes write weird code, there's a
silly edge case we need to worry about while we're in here. Take a look at this:

```lox
class Oops < Oops {}
```

There's no way this will do anything useful, and if we let the runtime try to
run this, it will break the expectation the interpreter has about there not
being cycles in the inheritance chain. The safest thing is to detect this case
statically and report it as an error.

^code inherit-self (2 before, 1 after)

Assuming the code resolves without error, the AST travels to the interpreter.

^code interpret-superclass (1 before, 1 after)

If the class has a superclass expression, we evaluate it. Since that could
potentially evaluate to some other kind of object, we have to check at runtime
that the thing we want to be the superclass is actually a class. Bad things
would happen if we allowed code like:

```lox
var NotAClass = "I am totally not a class";

class Subclass < NotAClass {} // ?!
```

Assuming that check passes, we continue on. Executing a class declaration turns
the syntactic representation of a class -- its AST node -- into its runtime
representation, a LoxClass object. We need to plumb the superclass through to
that too. We pass the superclass to the constructor.

^code interpreter-construct-class (3 before, 1 after)

The constructor stores it in a field.

^code lox-class-constructor (1 after)

Which we declare here:

^code lox-class-superclass-field (1 before, 1 after)

With that, we can define classes that are subclasses of other classes. Now, what
does having a superclass actually *do?*

## Inheriting Methods

Inheriting from another class means that everything that's <span
name="liskov">true</span> of the superclass should be true, more or less, of the
subclass. In statically typed languages, that carries a lot of implications. The
sub*class* must also be a sub*type*, and the memory layout is controlled so that
you can pass an instance of a subclass to a function expecting a superclass and
it can still access the inherited fields correctly.

<aside name="liskov">

A fancier name for this hand-wavey guideline is the [*Liskov substitution
principle*][liskov]. Barbara Liskov introduced it in a keynote during the
formative period of object-oriented programming.

[liskov]: https://en.wikipedia.org/wiki/Liskov_substitution_principle

</aside>

Lox is a dynamically typed language, so our requirements are much simpler.
Basically, it means that if you can call some method on an instance of the
superclass, you should be able to call that method when given an instance of the
subclass. In other words, methods are inherited from the superclass.

This lines up with one of the goals of inheritance -- to give users a way to
reuse code across classes. Implementing this in our interpreter is
astonishingly easy.

^code find-method-recurse-superclass (3 before, 1 after)

That's literally all there is to it. When we are looking up a method on an
instance, if we don't find it on the instance's class, we recurse up through the
superclass chain and look there. Give it a try:

```lox
class Doughnut {
  cook() {
    print "Fry until golden brown.";
  }
}

class BostonCream < Doughnut {}

BostonCream().cook();
```

There we go, half of our inheritance features are complete with only three lines
of Java code.

## Calling Superclass Methods

In `findMethod()` we look for a method on the current class *before* walking up
the superclass chain. If a method with the same name exists in both the subclass
and the superclass, the subclass one takes precedence or **overrides** the
superclass method. Sort of like how variables in inner scopes shadow outer ones.

That's great if the subclass wants to *replace* some superclass behavior
completely. But, in practice, subclasses often want to *refine* the superclass's
behavior. They want to do a little work specific to the subclass, but also
execute the original superclass behavior too.

However, since the subclass has overridden the method, there's no way to refer
to the original one. If the subclass method tries to call it by name, it will
just recursively hit its own override. We need a way to say "Call this method,
but look for it directly on my superclass and ignore my override". Java uses
`super` for this, and we'll use that same syntax in Lox. Here is an example:

```lox
class Doughnut {
  cook() {
    print "Fry until golden brown.";
  }
}

class BostonCream < Doughnut {
  cook() {
    super.cook();
    print "Pipe full of custard and coat with chocolate.";
  }
}

BostonCream().cook();
```

If you run this, it should print:

```text
Fry until golden brown.
Pipe full of custard and coat with chocolate.
```

We have a new expression form. The `super` keyword, followed by a dot and an
identifier, looks for a method with that name. Unlike calls on `this`, the search
starts at the superclass.

### Syntax

With `this`, the keyword works sort of like a magic variable, and the expression
is that one lone token. But with `super`, the subsequent `.` and property name
are inseparable parts of the `super` expression. You can't have a bare `super`
token all by itself.

```lox
print super; // Syntax error.
```

So the new clause we add to the `primary` rule in our grammar includes the
property access as well.

```ebnf
primary        → "true" | "false" | "nil" | "this"
               | NUMBER | STRING | IDENTIFIER | "(" expression ")"
               | "super" "." IDENTIFIER ;
```

Typically, a `super` expression is used for a method call, but, as with regular
methods, the argument list is *not* part of the expression. Instead, a super
*call* is a super *access* followed by a function call. Like other method calls,
you can get a handle to a superclass method and invoke it separately.

```lox
var method = super.cook;
method();
```

So the `super` expression itself contains only the token for the `super` keyword
and the name of the method being looked up. The corresponding <span
name="super-ast">syntax tree node</span> is thus:

^code super-expr (1 before, 1 after)

<aside name="super-ast">

The generated code for the new node is in [Appendix II][appendix-super].

[appendix-super]: appendix-ii.html#super-expression

</aside>

Following the grammar, the new parsing code goes inside our existing `primary()`
method.

^code parse-super (2 before, 2 after)

A leading `super` keyword tells us we've hit a `super` expression. After that we
consume the expected `.` and method name.

### Semantics

Earlier, I said a `super` expression starts the method lookup from "the
superclass", but *which* superclass? The naïve answer is the superclass of
`this`, the object the surrounding method was called on. That coincidentally
produces the right behavior in a lot of cases, but that's not actually correct.
Gaze upon:

```lox
class A {
  method() {
    print "A method";
  }
}

class B < A {
  method() {
    print "B method";
  }

  test() {
    super.method();
  }
}

class C < B {}

C().test();
```
Translate this program to Java, C#, or C++ and it will print "A method", which
is what we want Lox to do too. When this program runs, inside the body of
`test()`, `this` is an instance of C. The superclass of C is B, but that is
*not* where the lookup should start. If it did, we would hit B's `method()`.

Instead, lookup should start on the superclass of *the class containing the
`super` expression*. In this case, since `test()` is defined inside B, the
`super` expression inside it should start the lookup on *B*&rsquo;s superclass
-- A.

<span name="flow"></span>

<img src="image/inheritance/classes.png" alt="The call chain flowing through the classes." />

<aside name="flow">

The execution flow looks something like this:

1. We call `test()` on an instance of C.

2. That enters the `test()` method inherited from B. That calls
   `super.method()`.

3. The superclass of B is A, so that chains to `method()` on A, and the program
   prints "A method".

</aside>

Thus, in order to evaluate a `super` expression, we need access to the
superclass of the class definition surrounding the call. Alack and alas, at the
point in the interpreter where we are executing a `super` expression, we don't
have that easily available.

We *could* add a field to LoxFunction to store a reference to the LoxClass that
owns that method. The interpreter would keep a reference to the
currently executing LoxFunction so that we could look it up later when we hit a
`super` expression. From there, we'd get the LoxClass of the method, then its
superclass.

That's a lot of plumbing. In the [last chapter][], we had a similar problem when
we needed to add support for `this`. In that case, we used our existing
environment and closure mechanism to store a reference to the current object.
Could we do something similar for storing the superclass<span
name="rhetorical">?</span> Well, I probably wouldn't be talking about it if the
answer was no, so... yes.

<aside name="rhetorical">

Does anyone even like rhetorical questions?

</aside>

[last chapter]: classes.html

One important difference is that we bound `this` when the method was *accessed*.
The same method can be called on different instances and each needs its own
`this`. With `super` expressions, the superclass is a fixed property of the
*class declaration itself*. Every time you evaluate some `super` expression, the
superclass is always the same.

That means we can create the environment for the superclass once, when the class
definition is executed. Immediately before we define the methods, we make a new
environment to bind the class's superclass to the name `super`.

<img src="image/inheritance/superclass.png" alt="The superclass environment." />

When we create the LoxFunction runtime representation for each method, that is
the environment they will capture in their closure. Later, when a method is
invoked and `this` is bound, the superclass environment becomes the parent for
the method's environment, like so:

<img src="image/inheritance/environments.png" alt="The environment chain including the superclass environment." />

That's a lot of machinery, but we'll get through it a step at a time. Before we
can get to creating the environment at runtime, we need to handle the
corresponding scope chain in the resolver.

^code begin-super-scope (2 before, 2 after)

If the class declaration has a superclass, then we create a new scope
surrounding all of its methods. In that scope, we define the name "super". Once
we're done resolving the class's methods, we discard that scope.

^code end-super-scope (2 before, 1 after)

It's a minor optimization, but we only create the superclass environment if the
class actually *has* a superclass. There's no point creating it when there isn't
a superclass since there'd be no superclass to store in it anyway.

With "super" defined in a scope chain, we are able to resolve the `super`
expression itself.

^code resolve-super-expr

We resolve the `super` token exactly as if it were a variable. The resolution
stores the number of hops along the environment chain that the interpreter needs
to walk to find the environment where the superclass is stored.

This code is mirrored in the interpreter. When we evaluate a subclass
definition, we create a new environment.

^code begin-superclass-environment (6 before, 2 after)

Inside that environment, we store a reference to the superclass -- the actual
LoxClass object for the superclass which we have now that we are in the runtime.
Then we create the LoxFunctions for each method. Those will capture the current
environment -- the one where we just bound "super" -- as their closure, holding
on to the superclass like we need. Once that's done, we pop the environment.

^code end-superclass-environment (2 before, 2 after)

We're ready to interpret `super` expressions themselves. There are a few moving
parts, so we'll build this method up in pieces.

^code interpreter-visit-super

First, the work we've been leading up to. We look up the surrounding class's
superclass by looking up "super" in the proper environment.

When we access a method, we also need to bind `this` to the object the method is
accessed from. In an expression like `doughnut.cook`, the object is whatever we
get from evaluating `doughnut`. In a `super` expression like `super.cook`, the
current object is implicitly the *same* current object that we're using. In
other words, `this`. Even though we are looking up the *method* on the
superclass, the *instance* is still `this`.

Unfortunately, inside the `super` expression, we don't have a convenient node
for the resolver to hang the number of hops to `this` on. Fortunately, we do
control the layout of the environment chains. The environment where "this" is
bound is always right inside the environment where we store "super".

^code super-find-this (2 before, 1 after)

Offsetting the distance by one looks up "this" in that inner environment. I
admit this isn't the most <span name="elegant">elegant</span> code, but it
works.

<aside name="elegant">

Writing a book that includes every single line of code for a program means I
can't hide the hacks by leaving them as an "exercise for the reader".

</aside>

Now we're ready to look up and bind the method, starting at the superclass.

^code super-find-method (2 before, 1 after)

This is almost exactly like the code for looking up a method of a get
expression, except that we call `findMethod()` on the superclass instead of on
the class of the current object.

That's basically it. Except, of course, that we might *fail* to find the method.
So we check for that too.

^code super-no-method (2 before, 2 after)

There you have it! Take that BostonCream example earlier and give it a try.
Assuming you and I did everything right, it should fry it first, then stuff it
with cream.

### Invalid uses of super

As with previous language features, our implementation does the right thing when
the user writes correct code, but we haven't bulletproofed the intepreter
against bad code. In particular, consider:

```lox
class Eclair {
  cook() {
    super.cook();
    print "Pipe full of crème pâtissière.";
  }
}
```

This class has a `super` expression, but no superclass. At runtime, the code for
evaluating `super` expressions assumes that "super" was successfully resolved
and will be found in the environment. That's going to fail here because there is
no surrounding environment for the superclass since there is no superclass. The
JVM will throw an exception and bring our interpreter to its knees.

Heck, there are even simpler broken uses of super:

```lox
super.notEvenInAClass();
```

We could handle errors like these at runtime by checking to see if the lookup
of "super" succeeded. But we can tell statically -- just by looking at the
source code -- that Eclair has no superclass and thus no `super` expression will
work inside it. Likewise, in the second example, we know that the `super`
expression is not even inside a method body.

Even though Lox is dynamically typed, that doesn't mean we want to defer
*everything* to runtime. If the user made a mistake, we'd like to help them find
it sooner rather than later. So we'll report these errors statically, in the
resolver.

First, we add a new case to the enum we use to keep track of what kind of class
is surrounding the current code being visited.

^code class-type-subclass (1 before, 1 after)

We'll use that to distinguish when we're inside a class that has a superclass
versus one that doesn't. When we resolve a class declaration, we set that if the
class is a subclass.

^code set-current-subclass (1 before, 1 after)

Then, when we resolve a `super` expression, we check to see that we are
currently inside a scope where that's allowed.

^code invalid-super (1 before, 1 after)

If not -- oopsie! -- the user made a mistake.

## Conclusion

We made it! That final bit of error handling is the last chunk of code needed to
complete our Java implementation of Lox. This is a real <span
name="superhero">accomplishment</span> and one you should be proud of. In the
past dozen chapters and a thousand or so lines of code, we have learned and
implemented...

* [tokens and lexing][4],
* [abstract syntax trees][5],
* [recursive descent parsing][6],
* prefix and infix expressions,
* runtime representation of objects,
* [interpreting code using the Visitor pattern][7],
* [lexical scope][8],
* environment chains for storing variables,
* [control flow][9],
* [functions with parameters][10],
* closures,
* [static variable resolution and error detection][11],
* [classes][12],
* constructors,
* fields,
* methods, and finally,
* inheritance.

[4]: scanning.html
[5]: representing-code.html
[6]: parsing-expressions.html
[7]: evaluating-expressions.html
[8]: statements-and-state.html
[9]: control-flow.html
[10]: functions.html
[11]: resolving-and-binding.html
[12]: classes.html

<aside name="superhero">

<img src="image/inheritance/superhero.png" alt="You, being your bad self." />

</aside>

We did all of that from scratch, with no external dependencies or magic tools.
Just you and I, our respective text editors, a couple of collection classes in
the Java standard library, and the JVM runtime.

This marks the end of Part II, but not the end of the book. Take a break. Maybe
write a few fun Lox programs and run them in your interpreter. (You may want to
add a few more native methods for things like reading user input.) When you're
refreshed and ready, we'll embark on our [next adventure][].

[next adventure]: a-bytecode-virtual-machine.html

<div class="challenges">

## Challenges

1.  Lox supports only *single inheritance* -- a class may have a single
    superclass and that's the only way to reuse methods across classes. Other
    languages have explored a variety of ways to more freely reuse and share
    capabilities across classes: mixins, traits, multiple inheritance, virtual
    inheritance, extension methods, etc.

    If you were to add some feature along these lines to Lox, which would you
    pick and why? If you're feeling courageous (and you should be at this
    point), go ahead and add it.

1.  In Lox, as in most other object-oriented languages, when looking up a
    method, we start at the bottom of the class hierarchy and work our way up --
    a subclass's method is preferred over a superclass's. In order to get to the
    superclass method from within an overriding method, you use `super`.

    The language [BETA][] takes the [opposite approach][inner]. When you call a
    method, it starts at the *top* of the class hierarchy and works *down*. A
    superclass method wins over a subclass method. In order to get to the
    subclass method, the superclass method can call `inner`, which is sort of
    like the inverse of `super`. It chains to the next method down the
    hierarchy.

    The superclass method controls when and where the subclass is allowed to
    refine its behavior. If the superclass method doesn't call `inner` at all,
    then the subclass has no way of overriding or modifying the superclass's
    behavior.

    Take out Lox's current overriding and `super` behavior and replace it with
    BETA's semantics. In short:

    *   When calling a method on a class, prefer the method *highest* on the
        class's inheritance chain.

    *   Inside the body of a method, a call to `inner` looks for a method with
        the same name in the nearest subclass along the inheritance chain
        between the class containing the `inner` and the class of `this`. If
        there is no matching method, the `inner` call does nothing.

    For example:

    ```lox
    class Doughnut {
      cook() {
        print "Fry until golden brown.";
        inner();
        print "Place in a nice box.";
      }
    }

    class BostonCream < Doughnut {
      cook() {
        print "Pipe full of custard and coat with chocolate.";
      }
    }

    BostonCream().cook();
    ```

    This should print:

    ```text
    Fry until golden brown.
    Pipe full of custard and coat with chocolate.
    Place in a nice box.
    ```

1.  In the chapter where I introduced Lox, [I challenged you][challenge] to
    come up with a couple of features you think the language is missing. Now
    that you know how to build an interpreter, implement one of those features.

[challenge]: the-lox-language.html#challenges
[inner]: http://journal.stuffwithstuff.com/2012/12/19/the-impoliteness-of-overriding-methods/
[beta]: https://beta.cs.au.dk/

</div>


================================================
FILE: book/introduction.md
================================================
> Fairy tales are more than true: not because they tell us that dragons exist,
> but because they tell us that dragons can be beaten.
>
> <cite>G.K. Chesterton by way of Neil Gaiman, <em>Coraline</em></cite>

I'm really excited we're going on this journey together. This is a book on
implementing interpreters for programming languages. It's also a book on how to
design a language worth implementing. It's the book I wish I'd had when I first
started getting into languages, and it's the book I've been writing in my <span
name="head">head</span> for nearly a decade.

<aside name="head">

To my friends and family, sorry I've been so absentminded!

</aside>

In these pages, we will walk step-by-step through two complete interpreters for
a full-featured language. I assume this is your first foray into languages, so
I'll cover each concept and line of code you need to build a complete, usable,
fast language implementation.

In order to cram two full implementations inside one book without it turning
into a doorstop, this text is lighter on theory than others. As we build each
piece of the system, I will introduce the history and concepts behind it. I'll
try to get you familiar with the lingo so that if you ever find yourself at a
<span name="party">cocktail party</span> full of PL (programming language)
researchers, you'll fit in.

<aside name="party">

Strangely enough, a situation I have found myself in multiple times. You
wouldn't believe how much some of them can drink.

</aside>

But we're mostly going to spend our brain juice getting the language up and
running. This is not to say theory isn't important. Being able to reason
precisely and <span name="formal">formally</span> about syntax and semantics is
a vital skill when working on a language. But, personally, I learn best by
doing. It's hard for me to wade through paragraphs full of abstract concepts and
really absorb them. But if I've coded something, run it, and debugged it, then I
*get* it.

<aside name="formal">

Static type systems in particular require rigorous formal reasoning. Hacking on
a type system has the same feel as proving a theorem in mathematics.

It turns out this is no coincidence. In the early half of last century, Haskell
Curry and William Alvin Howard showed that they are two sides of the same coin:
[the Curry-Howard isomorphism][].

[the curry-howard isomorphism]: https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence

</aside>

That's my goal for you. I want you to come away with a solid intuition of how a
real language lives and breathes. My hope is that when you read other, more
theoretical books later, the concepts there will firmly stick in your mind,
adhered to this tangible substrate.

## Why Learn This Stuff?

Every introduction to every compiler book seems to have this section. I don't
know what it is about programming languages that causes such existential doubt.
I don't think ornithology books worry about justifying their existence. They
assume the reader loves birds and start teaching.

But programming languages are a little different. I suppose it is true that the
odds of any of us creating a broadly successful, general-purpose programming
language are slim. The designers of the world's widely used languages could fit
in a Volkswagen bus, even without putting the pop-top camper up. If joining that
elite group was the *only* reason to learn languages, it would be hard to
justify. Fortunately, it isn't.

### Little languages are everywhere

For every successful general-purpose language, there are a thousand successful
niche ones. We used to call them "little languages", but inflation in the jargon
economy led to the name "domain-specific languages". These are pidgins
tailor-built to a specific task. Think application scripting languages, template
engines, markup formats, and configuration files.

<span name="little"></span><img src="image/introduction/little-languages.png" alt="A random selection of little languages." />

<aside name="little">

A random selection of some little languages you might run into.

</aside>

Almost every large software project needs a handful of these. When you can, it's
good to reuse an existing one instead of rolling your own. Once you factor in
documentation, debuggers, editor support, syntax highlighting, and all of the
other trappings, doing it yourself becomes a tall order.

But there's still a good chance you'll find yourself needing to whip up a parser
or other tool when there isn't an existing library that fits your needs. Even
when you are reusing some existing implementation, you'll inevitably end up
needing to debug and maintain it and poke around in its guts.

### Languages are great exercise

Long distance runners sometimes train with weights strapped to their ankles or
at high altitudes where the atmosphere is thin. When they later unburden
themselves, the new relative ease of light limbs and oxygen-rich air enables
them to run farther and faster.

Implementing a language is a real test of programming skill. The code is complex
and performance critical. You must master recursion, dynamic arrays, trees,
graphs, and hash tables. You probably use hash tables at least in your
day-to-day programming, but do you *really* understand them? Well, after we've
crafted our own from scratch, I guarantee you will.

While I intend to show you that an interpreter isn't as daunting as you might
believe, implementing one well is still a challenge. Rise to it, and you'll come
away a stronger programmer, and smarter about how you use data structures and
algorithms in your day job.

### One more reason

This last reason is hard for me to admit, because it's so close to my heart.
Ever since I learned to program as a kid, I felt there was something magical
about languages. When I first tapped out BASIC programs one key at a time I
couldn't conceive how BASIC *itself* was made.

Later, the mixture of awe and terror on my college friends' faces when talking
about their compilers class was enough to convince me language hackers were a
different breed of human -- some sort of wizards granted privileged access to
arcane arts.

It's a charming <span name="image">image</span>, but it has a darker side. *I*
didn't feel like a wizard, so I was left thinking I lacked some inborn quality
necessary to join the cabal. Though I've been fascinated by languages ever since
I doodled made-up keywords in my school notebook, it took me decades to muster
the courage to try to really learn them. That "magical" quality, that sense of
exclusivity, excluded *me*.

<aside name="image">

And its practitioners don't hesitate to play up this image. Two of the seminal
texts on programming languages feature a [dragon][] and a [wizard][] on their
covers.

[dragon]: https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools
[wizard]: https://mitpress.mit.edu/sites/default/files/sicp/index.html

</aside>

When I did finally start cobbling together my own little interpreters, I quickly
learned that, of course, there is no magic at all. It's just code, and the
people who hack on languages are just people.

There *are* a few techniques you don't often encounter outside of languages, and
some parts are a little difficult. But not more difficult than other obstacles
you've overcome. My hope is that if you've felt intimidated by languages and
this book helps you overcome that fear, maybe I'll leave you just a tiny bit
braver than you were before.

And, who knows, maybe you *will* make the next great language. Someone has to.

## How the Book Is Organized

This book is broken into three parts. You're reading the first one now. It's a
couple of chapters to get you oriented, teach you some of the lingo that
language hackers use, and introduce you to Lox, the language we'll be
implementing.

Each of the other two parts builds one complete Lox interpreter. Within those
parts, each chapter is structured the same way. The chapter takes a single
language feature, teaches you the concepts behind it, and walks you through an
implementation.

It took a good bit of trial and error on my part, but I managed to carve up the
two interpreters into chapter-sized chunks that build on the previous chapters
but require nothing from later ones. From the very first chapter, you'll have a
working program you can run and play with. With each passing chapter, it grows
increasingly full-featured until you eventually have a complete language.

Aside from copious, scintillating English prose, chapters have a few other
delightful facets:

### The code

We're about *crafting* interpreters, so this book contains real code. Every
single line of code needed is included, and each snippet tells you where to
insert it in your ever-growing implementation.

Many other language books and language implementations use tools like [Lex][]
and <span name="yacc">[Yacc][]</span>, so-called **compiler-compilers**, that
automatically generate some of the source files for an implementation from some
higher-level description. There are pros and cons to tools like those, and
strong opinions -- some might say religious convictions -- on both sides.

<aside name="yacc">

Yacc is a tool that takes in a grammar file and produces a source file for a
compiler, so it's sort of like a "compiler" that outputs a compiler, which is
where we get the term "compiler-compiler".

Yacc wasn't the first of its ilk, which is why it's named "Yacc" -- *Yet
Another* Compiler-Compiler. A later similar tool is [Bison][], named as a pun on
the pronunciation of Yacc like "yak".

<img src="image/introduction/yak.png" alt="A yak." />

[bison]: https://en.wikipedia.org/wiki/GNU_bison

If you find all of these little self-references and puns charming and fun,
you'll fit right in here. If not, well, maybe the language nerd sense of humor
is an acquired taste.

</aside>

We will abstain from using them here. I want to ensure there are no dark corners
where magic and confusion can hide, so we'll write everything by hand. As you'll
see, it's not as bad as it sounds, and it means you really will understand each
line of code and how both interpreters work.

[lex]: https://en.wikipedia.org/wiki/Lex_(software)
[yacc]: https://en.wikipedia.org/wiki/Yacc

A book has different constraints from the "real world" and so the coding style
here might not always reflect the best way to write maintainable production
software. If I seem a little cavalier about, say, omitting `private` or
declaring a global variable, understand I do so to keep the code easier on your
eyes. The pages here aren't as wide as your IDE and every character counts.

Also, the code doesn't have many comments. That's because each handful of lines
is surrounded by several paragraphs of honest-to-God prose explaining it. When
you write a book to accompany your program, you are welcome to omit comments
too. Otherwise, you should probably use `//` a little more than I do.

While the book contains every line of code and teaches what each means, it does
not describe the machinery needed to compile and run the interpreter. I assume
you can slap together a makefile or a project in your IDE of choice in order to
get the code to run. Those kinds of instructions get out of date quickly, and
I want this book to age like XO brandy, not backyard hooch.

### Snippets

Since the book contains literally every line of code needed for the
implementations, the snippets are quite precise. Also, because I try to keep the
program in a runnable state even when major features are missing, sometimes we
add temporary code that gets replaced in later snippets.

A snippet with all the bells and whistles looks like this:

<div class="codehilite"><pre class="insert-before">
      default:
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()<br>
replace 1 line</div>
<pre class="insert">
        <span class="k">if</span> (<span class="i">isDigit</span>(<span class="i">c</span>)) {
          <span class="i">number</span>();
        } <span class="k">else</span> {
          <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">line</span>, <span class="s">&quot;Unexpected character.&quot;</span>);
        }
</pre><pre class="insert-after">
        break;
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>(), replace 1 line</div>

In the center, you have the new code to add. It may have a few faded out lines
above or below to show where it goes in the existing surrounding code. There is
also a little blurb telling you in which file and where to place the snippet. If
that blurb says "replace _ lines", there is some existing code between the faded
lines that you need to remove and replace with the new snippet.

### Asides

<span name="joke">Asides</span> contain biographical sketches, historical
background, references to related topics, and suggestions of other areas to
explore. There's nothing that you *need* to know in them to understand later
parts of the book, so you can skip them if you want. I won't judge you, but I
might be a little sad.

<aside name="joke">

Well, some asides do, at least. Most of them are just dumb jokes and amateurish
drawings.

</aside>

### Challenges

Each chapter ends with a few exercises. Unlike textbook problem sets, which tend
to review material you already covered, these are to help you learn *more* than
what's in the chapter. They force you to step off the guided path and explore on
your own. They will make you research other languages, figure out how to
implement features, or otherwise get you out of your comfort zone.

<span name="warning">Vanquish</span> the challenges and you'll come away with a
broader understanding and possibly a few bumps and scrapes. Or skip them if you
want to stay inside the comfy confines of the tour bus. It's your book.

<aside name="warning">

A word of warning: the challenges often ask you to make changes to the
interpreter you're building. You'll want to implement those in a copy of your
code. The later chapters assume your interpreter is in a pristine
("unchallenged"?) state.

</aside>

### Design notes

Most "programming language" books are strictly programming language
*implementation* books. They rarely discuss how one might happen to *design* the
language being implemented. Implementation is fun because it is so <span
name="benchmark">precisely defined</span>. We programmers seem to have an
affinity for things that are black and white, ones and zeroes.

<aside name="benchmark">

I know a lot of language hackers whose careers are based on this. You slide a
language spec under their door, wait a few months, and code and benchmark
results come out.

</aside>

Personally, I think the world needs only so many implementations of <span
name="fortran">FORTRAN 77</span>. At some point, you find yourself designing a
*new* language. Once you start playing *that* game, then the softer, human side
of the equation becomes paramount. Things like which features are easy to learn,
how to balance innovation and familiarity, what syntax is more readable and to
whom.

<aside name="fortran">

Hopefully your new language doesn't hardcode assumptions about the width of a
punched card into its grammar.

</aside>

All of that stuff profoundly affects the success of your new language. I want
your language to succeed, so in some chapters I end with a "design note", a
little essay on some corner of the human aspect of programming languages. I'm no
expert on this -- I don't know if anyone really is -- so take these with a large
pinch of salt. That should make them tastier food for thought, which is my main
aim.

## The First Interpreter

We'll write our first interpreter, jlox, in <span name="lang">Java</span>. The
focus is on *concepts*. We'll write the simplest, cleanest code we can to
correctly implement the semantics of the language. This will get us comfortable
with the basic techniques and also hone our understanding of exactly how the
language is supposed to behave.

<aside name="lang">

The book uses Java and C, but readers have ported the code to [many other
languages][port]. If the languages I picked aren't your bag, take a look at
those.

[port]: https://github.com/munificent/craftinginterpreters/wiki/Lox-implementations

</aside>

Java is a great language for this. It's high level enough that we don't get
overwhelmed by fiddly implementation details, but it's still pretty explicit.
Unlike in scripting languages, there tends to be less complex machinery hiding
under the hood, and you've got static types to see what data structures you're
working with.

I also chose Java specifically because it is an object-oriented language. That
paradigm swept the programming world in the '90s and is now the dominant way of
thinking for millions of programmers. Odds are good you're already used to
organizing code into classes and methods, so we'll keep you in that comfort
zone.

While academic language folks sometimes look down on object-oriented languages,
the reality is that they are widely used even for language work. GCC and LLVM
are written in C++, as are most JavaScript virtual machines. Object-oriented
languages are ubiquitous, and the tools and compilers *for* a language are often
written *in* the <span name="host">same language</span>.

<aside name="host">

A compiler reads files in one language, translates them, and outputs files in
another language. You can implement a compiler in any language, including the
same language it compiles, a process called **self-hosting**.

You can't compile your compiler using itself yet, but if you have another
compiler for your language written in some other language, you use *that* one to
compile your compiler once. Now you can use the compiled version of your own
compiler to compile future versions of itself, and you can discard the original
one compiled from the other compiler. This is called **bootstrapping**, from
the image of pulling yourself up by your own bootstraps.

<img src="image/introduction/bootstrap.png" alt="Fact: This is the primary mode of transportation of the American cowboy." />

</aside>

And, finally, Java is hugely popular. That means there's a good chance you
already know it, so there's less for you to learn to get going in the book. If
you aren't that familiar with Java, don't freak out. I try to stick to a fairly
minimal subset of it. I use the diamond operator from Java 7 to make things a
little more terse, but that's about it as far as "advanced" features go. If you
know another object-oriented language, like C# or C++, you can muddle through.

By the end of part II, we'll have a simple, readable implementation. It's not
very fast, but it's correct. However, we are only able to accomplish that by
building on the Java virtual machine's own runtime facilities. We want to learn
how Java *itself* implements those things.

## The Second Interpreter

So in the next part, we start all over again, but this time in C. C is the
perfect language for understanding how an implementation *really* works, all the
way down to the bytes in memory and the code flowing through the CPU.

A big reason that we're using C is so I can show you things C is particularly
good at, but that *does* mean you'll need to be pretty comfortable with it. You
don't have to be the reincarnation of Dennis Ritchie, but you shouldn't be
spooked by pointers either.

If you aren't there yet, pick up an introductory book on C and chew through it,
then come back here when you're done. In return, you'll come away from this book
an even stronger C programmer. That's useful given how many language
implementations are written in C: Lua, CPython, and Ruby's MRI, to name a few.

In our C interpreter, <span name="clox">clox</span>, we are forced to implement
for ourselves all the things Java gave us for free. We'll write our own dynamic
array and hash table. We'll decide how objects are represented in memory, and
build a garbage collector to reclaim them.

<aside name="clox">

I pronounce the name like "sea-locks", but you can say it "clocks" or even
"cloch", where you pronounce the "x" like the Greeks do if it makes you happy.

</aside>

Our Java implementation was focused on being correct. Now that we have that
down, we'll turn to also being *fast*. Our C interpreter will contain a <span
name="compiler">compiler</span> that translates Lox to an efficient bytecode
representation (don't worry, I'll get into what that means soon), which it then
executes. This is the same technique used by implementations of Lua, Python,
Ruby, PHP, and many other successful languages.

<aside name="compiler">

Did you think this was just an interpreter book? It's a compiler book as well.
Two for the price of one!

</aside>

We'll even try our hand at benchmarking and optimization. By the end, we'll have
a robust, accurate, fast interpreter for our language, able to keep up with
other professional caliber implementations out there. Not bad for one book and a
few thousand lines of code.

<div class="challenges">

## Challenges

1.  There are at least six domain-specific languages used in the [little system
    I cobbled together][repo] to write and publish this book. What are they?

1.  Get a "Hello, world!" program written and running in Java. Set up whatever
    makefiles or IDE projects you need to get it working. If you have a
    debugger, get comfortable with it and step through your program as it runs.

1.  Do the same thing for C. To get some practice with pointers, define a
    [doubly linked list][] of heap-allocated strings. Write functions to insert,
    find, and delete items from it. Test them.

[repo]: https://github.com/munificent/craftinginterpreters
[doubly linked list]: https://en.wikipedia.org/wiki/Doubly_linked_list

</div>

<div class="design-note">

## Design Note: What's in a Name?

One of the hardest challenges in writing this book was coming up with a name for
the language it implements. I went through *pages* of candidates before I found
one that worked. As you'll discover on the first day you start building your own
language, naming is deviously hard. A good name satisfies a few criteria:

1.  **It isn't in use.** You can run into all sorts of trouble, legal and
    social, if you inadvertently step on someone else's name.

2.  **It's easy to pronounce.** If things go well, hordes of people will be
    saying and writing your language's name. Anything longer than a couple of
    syllables or a handful of letters will annoy them to no end.

3.  **It's distinct enough to search for.** People will Google your language's
    name to learn about it, so you want a word that's rare enough that most
    results point to your docs. Though, with the amount of AI search engines are
    packing today, that's less of an issue. Still, you won't be doing your users
    any favors if you name your language "for".

4.  **It doesn't have negative connotations across a number of cultures.** This
    is hard to be on guard for, but it's worth considering. The designer of
    Nimrod ended up renaming his language to "Nim" because too many people
    remember that Bugs Bunny used "Nimrod" as an insult. (Bugs was using it
    ironically.)

If your potential name makes it through that gauntlet, keep it. Don't get hung
up on trying to find an appellation that captures the quintessence of your
language. If the names of the world's other successful languages teach us
anything, it's that the name doesn't matter much. All you need is a reasonably
unique token.

</div>


================================================
FILE: book/jumping-back-and-forth.md
================================================
> The order that our mind imagines is like a net, or like a ladder, built to
> attain something. But afterward you must throw the ladder away, because you
> discover that, even if it was useful, it was meaningless.
>
> <cite>Umberto Eco, <em>The Name of the Rose</em></cite>

It's taken a while to get here, but we're finally ready to add control flow to
our virtual machine. In the tree-walk interpreter we built for jlox, we
implemented Lox's control flow in terms of Java's. To execute a Lox `if`
statement, we used a Java `if` statement to run the chosen branch. That works,
but isn't entirely satisfying. By what magic does the *JVM itself* or a native
CPU implement `if` statements? Now that we have our own bytecode VM to hack on,
we can answer that.

When we talk about "control flow", what are we referring to? By "flow" we mean
the way execution moves through the text of the program. Almost like there is a
little robot inside the computer wandering through our code, executing bits and
pieces here and there. Flow is the path that robot takes, and by *controlling*
the robot, we drive which pieces of code it executes.

In jlox, the robot's locus of attention -- the *current* bit of code -- was
implicit based on which AST nodes were stored in various Java variables and what
Java code we were in the middle of running. In clox, it is much more explicit.
The VM's `ip` field stores the address of the current bytecode instruction. The
value of that field is exactly "where we are" in the program.

Execution proceeds normally by incrementing the `ip`. But we can mutate that
variable however we want to. In order to implement control flow, all that's
necessary is to change the `ip` in more interesting ways. The simplest control
flow construct is an `if` statement with no `else` clause:

```lox
if (condition) print("condition was truthy");
```

The VM evaluates the bytecode for the condition expression. If the result is
truthy, then it continues along and executes the `print` statement in the body.
The interesting case is when the condition is falsey. When that happens,
execution skips over the then branch and proceeds to the next statement.

To skip over a chunk of code, we simply set the `ip` field to the address of the
bytecode instruction following that code. To *conditionally* skip over some
code, we need an instruction that looks at the value on top of the stack. If
it's falsey, it adds a given offset to the `ip` to jump over a range of
instructions. Otherwise, it does nothing and lets execution proceed to the next
instruction as usual.

When we compile to bytecode, the explicit nested block structure of the code
evaporates, leaving only a flat series of instructions behind. Lox is a
[structured programming][] language, but clox bytecode isn't. The right -- or
wrong, depending on how you look at it -- set of bytecode instructions could
jump into the middle of a block, or from one scope into another.

The VM will happily execute that, even if the result leaves the stack in an
unknown, inconsistent state. So even though the bytecode is unstructured, we'll
take care to ensure that our compiler only generates clean code that maintains
the same structure and nesting that Lox itself does.

This is exactly how real CPUs behave. Even though we might program them using
higher-level languages that mandate structured control flow, the compiler lowers
that down to raw jumps. At the bottom, it turns out goto is the only real
control flow.

[structured programming]: https://en.wikipedia.org/wiki/Structured_programming

Anyway, I didn't mean to get all philosophical. The important bit is that if we
have that one conditional jump instruction, that's enough to implement Lox's
`if` statement, as long as it doesn't have an `else` clause. So let's go ahead
and get started with that.

## If Statements

This many chapters in, you know the drill. Any new feature starts in the front
end and works its way through the pipeline. An `if` statement is, well, a
statement, so that's where we hook it into the parser.

^code parse-if (2 before, 1 after)

When we see an `if` keyword, we hand off compilation to this function:

^code if-statement

<aside name="paren">

Have you ever noticed that the `(` after the `if` keyword doesn't actually do
anything useful? The language would be just as unambiguous and easy to parse
without it, like:

```lox
if condition) print("looks weird");
```

The closing `)` is useful because it separates the condition expression from the
body. Some languages use a `then` keyword instead. But the opening `(` doesn't
do anything. It's just there because unmatched parentheses look bad to us
humans.

</aside>

First we compile the condition expression, bracketed by parentheses. At runtime,
that will leave the condition value on top of the stack. We'll use that to
determine whether to execute the then branch or skip it.

Then we emit a new `OP_JUMP_IF_FALSE` instruction. It has an operand for how
much to offset the `ip` -- how many bytes of code to skip. If the condition is
falsey, it adjusts the `ip` by that amount. Something like this:

<aside name="legend">

The boxes with the torn edges here represent the blob of bytecode generated by
compiling some sub-clause of a control flow construct. So the "condition
expression" box is all of the instructions emitted when we compiled that
expression.

</aside>

<span name="legend"></span>

<img src="image/jumping-back-and-forth/if-without-else.png" alt="Flowchart of the compiled bytecode of an if statement." />

But we have a problem. When we're writing the `OP_JUMP_IF_FALSE` instruction's
operand, how do we know how far to jump? We haven't compiled the then branch
yet, so we don't know how much bytecode it contains.

To fix that, we use a classic trick called **backpatching**. We emit the jump
instruction first with a placeholder offset operand. We keep track of where that
half-finished instruction is. Next, we compile the then body. Once that's done,
we know how far to jump. So we go back and replace that placeholder offset with
the real one now that we can calculate it. Sort of like sewing a patch onto the
existing fabric of the compiled code.

<img src="image/jumping-back-and-forth/patch.png" alt="A patch containing a number being sewn onto a sheet of bytecode." />

We encode this trick into two helper functions.

^code emit-jump

The first emits a bytecode instruction and writes a placeholder operand for the
jump offset. We pass in the opcode as an argument because later we'll have two
different instructions that use this helper. We use two bytes for the jump
offset operand. A 16-bit <span name="offset">offset</span> lets us jump over up
to 65,535 bytes of code, which should be plenty for our needs.

<aside name="offset">

Some instruction sets have separate "long" jump instructions that take larger
operands for when you need to jump a greater distance.

</aside>

The function returns the offset of the emitted instruction in the chunk. After
compiling the then branch, we take that offset and pass it to this:

^code patch-jump

This goes back into the bytecode and replaces the operand at the given location
with the calculated jump offset. We call `patchJump()` right before we emit the
next instruction that we want the jump to land on, so it uses the current
bytecode count to determine how far to jump. In the case of an `if` statement,
that means right after we compile the then branch and before we compile the next
statement.

That's all we need at compile time. Let's define the new instruction.

^code jump-if-false-op (1 before, 1 after)

Over in the VM, we get it working like so:

^code op-jump-if-false (2 before, 1 after)

This is the first instruction we've added that takes a 16-bit operand. To read
that from the chunk, we use a new macro.

^code read-short (1 before, 1 after)

It yanks the next two bytes from the chunk and builds a 16-bit unsigned integer
out of them. As usual, we clean up our macro when we're done with it.

^code undef-read-short (1 before, 1 after)

After reading the offset, we check the condition value on top of the stack.
<span name="if">If</span> it's falsey, we apply this jump offset to the `ip`.
Otherwise, we leave the `ip` alone and execution will automatically proceed to
the next instruction following the jump instruction.

In the case where the condition is falsey, we don't need to do any other work.
We've offset the `ip`, so when the outer instruction dispatch loop turns again,
it will pick up execution at that new instruction, past all of the code in the
then branch.

<aside name="if">

I said we wouldn't use C's `if` statement to implement Lox's control flow, but
we do use one here to determine whether or not to offset the instruction
pointer. But we aren't really using C for *control flow*. If we wanted to, we
could do the same thing purely arithmetically. Let's assume we have a function
`falsey()` that takes a Lox Value and returns 1 if it's falsey or 0 otherwise.
Then we could implement the jump instruction like:

```c
case OP_JUMP_IF_FALSE: {
  uint16_t offset = READ_SHORT();
  vm.ip += falsey() * offset;
  break;
}
```

The `falsey()` function would probably use some control flow to handle the
different value types, but that's an implementation detail of that function and
doesn't affect how our VM does its own control flow.

</aside>

Note that the jump instruction doesn't pop the condition value off the stack. So
we aren't totally done here, since this leaves an extra value floating around on
the stack. We'll clean that up soon. Ignoring that for the moment, we do have a
working `if` statement in Lox now, with only one little instruction required to
support it at runtime in the VM.

### Else clauses

An `if` statement without support for `else` clauses is like Morticia Addams
without Gomez. So, after we compile the then branch, we look for an `else`
keyword. If we find one, we compile the else branch.

^code compile-else (1 before, 1 after)

When the condition is falsey, we'll jump over the then branch. If there's an
else branch, the `ip` will land right at the beginning of its code. But that's
not enough, though. Here's the flow that leads to:

<img src="image/jumping-back-and-forth/bad-else.png" alt="Flowchart of the compiled bytecode with the then branch incorrectly falling through to the else branch." />

If the condition is truthy, we execute the then branch like we want. But after
that, execution rolls right on through into the else branch. Oops! When the
condition is true, after we run the then branch, we need to jump over the else
branch. That way, in either case, we only execute a single branch, like this:

<img src="image/jumping-back-and-forth/if-else.png" alt="Flowchart of the compiled bytecode for an if with an else clause." />

To implement that, we need another jump from the end of the then branch.

^code jump-over-else (2 before, 1 after)

We patch that offset after the end of the else body.

^code patch-else (1 before, 1 after)

After executing the then branch, this jumps to the next statement after the else
branch. Unlike the other jump, this jump is unconditional. We always take it, so
we need another instruction that expresses that.

^code jump-op (1 before, 1 after)

We interpret it like so:

^code op-jump (2 before, 1 after)

Nothing too surprising here -- the only difference is that it doesn't check a
condition and always applies the offset.

We have then and else branches working now, so we're close. The last bit is to
clean up that condition value we left on the stack. Remember, each statement is
required to have zero stack effect -- after the statement is finished executing,
the stack should be as tall as it was before.

We could have the `OP_JUMP_IF_FALSE` instruction pop the condition itself, but
soon we'll use that same instruction for the logical operators where we don't
want the condition popped. Instead, we'll have the compiler emit a couple of
explicit `OP_POP` instructions when compiling an `if` statement. We need to take
care that every execution path through the generated code pops the condition.

When the condition is truthy, we pop it right before the code inside the then
branch.

^code pop-then (1 before, 1 after)

Otherwise, we pop it at the beginning of the else branch.

^code pop-end (1 before, 2 after)

This little instruction here also means that every `if` statement has an
implicit else branch even if the user didn't write an `else` clause. In the case
where they left it off, all the branch does is discard the condition value.

The full correct flow looks like this:

<img src="image/jumping-back-and-forth/full-if-else.png" alt="Flowchart of the compiled bytecode including necessary pop instructions." />

If you trace through, you can see that it always executes a single branch and
ensures the condition is popped first. All that remains is a little disassembler
support.

^code disassemble-jump (1 before, 1 after)

These two instructions have a new format with a 16-bit operand, so we add a new
utility function to disassemble them.

^code jump-instruction

There we go, that's one complete control flow construct. If this were an '80s
movie, the montage music would kick in and the rest of the control flow syntax
would take care of itself. Alas, the <span name="80s">'80s</span> are long over,
so we'll have to grind it out ourselves.

<aside name="80s">

My enduring love of Depeche Mode notwithstanding.

</aside>

## Logical Operators

You probably remember this from jlox, but the logical operators `and` and `or`
aren't just another pair of binary operators like `+` and `-`. Because they
short-circuit and may not evaluate their right operand depending on the value of
the left one, they work more like control flow expressions.

They're basically a little variation on an `if` statement with an `else` clause.
The easiest way to explain them is to just show you the compiler code and the
control flow it produces in the resulting bytecode. Starting with `and`, we hook
it into the expression parsing table here:

^code table-and (1 before, 1 after)

That hands off to a new parser function.

^code and

At the point this is called, the left-hand side expression has already been
compiled. That means at runtime, its value will be on top of the stack. If that
value is falsey, then we know the entire `and` must be false, so we skip the
right operand and leave the left-hand side value as the result of the entire
expression. Otherwise, we discard the left-hand value and evaluate the right
operand which becomes the result of the whole `and` expression.

Those four lines of code right there produce exactly that. The flow looks like
this:

<img src="image/jumping-back-and-forth/and.png" alt="Flowchart of the compiled bytecode of an 'and' expression." />

Now you can see why `OP_JUMP_IF_FALSE` <span name="instr">leaves</span> the
value on top of the stack. When the left-hand side of the `and` is falsey, that
value sticks around to become the result of the entire expression.

<aside name="instr">

We've got plenty of space left in our opcode range, so we could have separate
instructions for conditional jumps that implicitly pop and those that don't, I
suppose. But I'm trying to keep things minimal for the book. In your bytecode
VM, it's worth exploring adding more specialized instructions and seeing how
they affect performance.

</aside>

### Logical or operator

The `or` operator is a little more complex. First we add it to the parse table.

^code table-or (1 before, 1 after)

When that parser consumes an infix `or` token, it calls this:

^code or

In an `or` expression, if the left-hand side is *truthy*, then we skip over the
right operand. Thus we need to jump when a value is truthy. We could add a
separate instruction, but just to show how our compiler is free to map the
language's semantics to whatever instruction sequence it wants, I implemented it
in terms of the jump instructions we already have.

When the left-hand side is falsey, it does a tiny jump over the next statement.
That statement is an unconditional jump over the code for the right operand.
This little dance effectively does a jump when the value is truthy. The flow
looks like this:

<img src="image/jumping-back-and-forth/or.png" alt="Flowchart of the compiled bytecode of a logical or expression." />

If I'm honest with you, this isn't the best way to do this. There are more
instructions to dispatch and more overhead. There's no good reason why `or`
should be slower than `and`. But it is kind of fun to see that it's possible to
implement both operators without adding any new instructions. Forgive me my
indulgences.

OK, those are the three *branching* constructs in Lox. By that, I mean, these
are the control flow features that only jump *forward* over code. Other
languages often have some kind of multi-way branching statement like `switch`
and maybe a conditional expression like `?:`, but Lox keeps it simple.

## While Statements

That takes us to the *looping* statements, which jump *backward* so that code
can be executed more than once. Lox only has two loop constructs, `while` and
`for`. A `while` loop is (much) simpler, so we start the party there.

^code parse-while (1 before, 1 after)

When we reach a `while` token, we call:

^code while-statement

Most of this mirrors `if` statements -- we compile the condition expression,
surrounded by mandatory parentheses. That's followed by a jump instruction that
skips over the subsequent body statement if the condition is falsey.

We patch the jump after compiling the body and take care to <span
name="pop">pop</span> the condition value from the stack on either path. The
only difference from an `if` statement is the loop. That looks like this:

<aside name="pop">

Really starting to second-guess my decision to use the same jump instructions
for the logical operators.

</aside>

^code loop (1 before, 2 after)

After the body, we call this function to emit a "loop" instruction. That
instruction needs to know how far back to jump. When jumping forward, we had to
emit the instruction in two stages since we didn't know how far we were going to
jump until after we emitted the jump instruction. We don't have that problem
now. We've already compiled the point in code that we want to jump back to --
it's right before the condition expression.

All we need to do is capture that location as we compile it.

^code loop-start (1 before, 1 after)

After executing the body of a `while` loop, we jump all the way back to before
the condition. That way, we re-evaluate the condition expression on each
iteration. We store the chunk's current instruction count in `loopStart` to
record the offset in the bytecode right before the condition expression we're
about to compile. Then we pass that into this helper function:

^code emit-loop

It's a bit like `emitJump()` and `patchJump()` combined. It emits a new loop
instruction, which unconditionally jumps *backwards* by a given offset. Like the
jump instructions, after that we have a 16-bit operand. We calculate the offset
from the instruction we're currently at to the `loopStart` point that we want to
jump back to. The `+ 2` is to take into account the size of the `OP_LOOP`
instruction's own operands which we also need to jump over.

From the VM's perspective, there really is no semantic difference between
`OP_LOOP` and `OP_JUMP`. Both just add an offset to the `ip`. We could have used
a single instruction for both and given it a signed offset operand. But I
figured it was a little easier to sidestep the annoying bit twiddling required
to manually pack a signed 16-bit integer into two bytes, and we've got the
opcode space available, so why not use it?

The new instruction is here:

^code loop-op (1 before, 1 after)

And in the VM, we implement it thusly:

^code op-loop (1 before, 1 after)

The only difference from `OP_JUMP` is a subtraction instead of an addition.
Disassembly is similar too.

^code disassemble-loop (1 before, 1 after)

That's our `while` statement. It contains two jumps -- a conditional forward one
to escape the loop when the condition is not met, and an unconditional loop
backward after we have executed the body. The flow looks like this:

<img src="image/jumping-back-and-forth/while.png" alt="Flowchart of the compiled bytecode of a while statement." />

## For Statements

The other looping statement in Lox is the venerable `for` loop, inherited from
C. It's got a lot more going on with it compared to a `while` loop. It has three
clauses, all of which are optional:

<span name="detail"></span>

*   The initializer can be a variable declaration or an expression. It runs once
    at the beginning of the statement.

*   The condition clause is an expression. Like in a `while` loop, we exit the
    loop when it evaluates to something falsey.

*   The increment expression runs once at the end of each loop iteration.

<aside name="detail">

If you want a refresher, the corresponding chapter in part II goes through the
semantics [in more detail][jlox].

[jlox]: control-flow.html#for-loops

</aside>

In jlox, the parser desugared a `for` loop to a synthesized AST for a `while`
loop with some extra stuff before it and at the end of the body. We'll do
something similar, though we won't go through anything like an AST. Instead,
our bytecode compiler will use the jump and loop instructions we already have.

We'll work our way through the implementation a piece at a time, starting with
the `for` keyword.

^code parse-for (1 before, 1 after)

It calls a helper function. If we only supported `for` loops with empty clauses
like `for (;;)`, then we could implement it like this:

^code for-statement

There's a bunch of mandatory punctuation at the top. Then we compile the body.
Like we did for `while` loops, we record the bytecode offset at the top of the
body and emit a loop to jump back to that point after it. We've got a working
implementation of <span name="infinite">infinite</span> loops now.

<aside name="infinite">

Alas, without `return` statements, there isn't any way to terminate it short of
a runtime error.

</aside>

### Initializer clause

Now we'll add the first clause, the initializer. It executes only once, before
the body, so compiling is straightforward.

^code for-initializer (1 before, 2 after)

The syntax is a little complex since we allow either a variable declaration or
an expression. We use the presence of the `var` keyword to tell which we have.
For the expression case, we call `expressionStatement()` instead of
`expression()`. That looks for a semicolon, which we need here too, and also
emits an `OP_POP` instruction to discard the value. We don't want the
initializer to leave anything on the stack.

If a `for` statement declares a variable, that variable should be scoped to the
loop body. We ensure that by wrapping the whole statement in a scope.

^code for-begin-scope (1 before, 1 after)

Then we close it at the end.

^code for-end-scope (1 before, 1 after)

### Condition clause

Next, is the condition expression that can be used to exit the loop.

^code for-exit (1 before, 1 after)

Since the clause is optional, we need to see if it's actually present. If the
clause is omitted, the next token must be a semicolon, so we look for that to
tell. If there isn't a semicolon, there must be a condition expression.

In that case, we compile it. Then, just like with while, we emit a conditional
jump that exits the loop if the condition is falsey. Since the jump leaves the
value on the stack, we pop it before executing the body. That ensures we discard
the value when the condition is true.

After the loop body, we need to patch that jump.

^code exit-jump (1 before, 2 after)

We do this only when there is a condition clause. If there isn't, there's no
jump to patch and no condition value on the stack to pop.

### Increment clause

I've saved the best for last, the increment clause. It's pretty convoluted. It
appears textually before the body, but executes *after* it. If we parsed to an
AST and generated code in a separate pass, we could simply traverse into and
compile the `for` statement AST's body field before its increment clause.

Unfortunately, we can't compile the increment clause later, since our compiler
only makes a single pass over the code. Instead, we'll *jump over* the
increment, run the body, jump *back* up to the increment, run it, and then go to
the next iteration.

I know, a little weird, but hey, it beats manually managing ASTs in memory in C,
right? Here's the code:

^code for-increment (2 before, 2 after)

Again, it's optional. Since this is the last clause, when omitted, the next
token will be the closing parenthesis. When an increment is present, we need to
compile it now, but it shouldn't execute yet. So, first, we emit an
unconditional jump that hops over the increment clause's code to the body of the
loop.

Next, we compile the increment expression itself. This is usually an assignment.
Whatever it is, we only execute it for its side effect, so we also emit a pop to
discard its value.

The last part is a little tricky. First, we emit a loop instruction. This is the
main loop that takes us back to the top of the `for` loop -- right before the
condition expression if there is one. That loop happens right after the
increment, since the increment executes at the end of each loop iteration.

Then we change `loopStart` to point to the offset where the increment expression
begins. Later, when we emit the loop instruction after the body statement, this
will cause it to jump up to the *increment* expression instead of the top of the
loop like it does when there is no increment. This is how we weave the
increment in to run after the body.

It's convoluted, but it all works out. A complete loop with all the clauses
compiles to a flow like this:

<img src="image/jumping-back-and-forth/for.png" alt="Flowchart of the compiled bytecode of a for statement." />

As with implementing `for` loops in jlox, we didn't need to touch the runtime.
It all gets compiled down to primitive control flow operations the VM already
supports. In this chapter, we've taken a big <span name="leap">leap</span>
forward -- clox is now Turing complete. We've also covered quite a bit of new
syntax: three statements and two expression forms. Even so, it only took three
new simple instructions. That's a pretty good effort-to-reward ratio for the
architecture of our VM.

<aside name="leap">

I couldn't resist the pun. I regret nothing.

</aside>

<div class="challenges">

## Challenges

1.  In addition to `if` statements, most C-family languages have a multi-way
    `switch` statement. Add one to clox. The grammar is:

    ```ebnf
    switchStmt     → "switch" "(" expression ")"
                     "{" switchCase* defaultCase? "}" ;
    switchCase     → "case" expression ":" statement* ;
    defaultCase    → "default" ":" statement* ;
    ```

    To execute a `switch` statement, first evaluate the parenthesized switch
    value expression. Then walk the cases. For each case, evaluate its value
    expression. If the case value is equal to the switch value, execute the
    statements under the case and then exit the `switch` statement. Otherwise,
    try the next case. If no case matches and there is a `default` clause,
    execute its statements.

    To keep things simpler, we're omitting fallthrough and `break` statements.
    Each case automatically jumps to the end of the switch statement after its
    statements are done.

1.  In jlox, we had a challenge to add support for `break` statements. This
    time, let's do `continue`:

    ```ebnf
    continueStmt   → "continue" ";" ;
    ```

    A `continue` statement jumps directly to the top of the nearest enclosing
    loop, skipping the rest of the loop body. Inside a `for` loop, a `continue`
    jumps to the increment clause, if there is one. It's a compile-time error to
    have a `continue` statement not enclosed in a loop.

    Make sure to think about scope. What should happen to local variables
    declared inside the body of the loop or in blocks nested inside the loop
    when a `continue` is executed?

1.  Control flow constructs have been mostly unchanged since Algol 68. Language
    evolution since then has focused on making code more declarative and high
    level, so imperative control flow hasn't gotten much attention.

    For fun, try to invent a useful novel control flow feature for Lox. It can
    be a refinement of an existing form or something entirely new. In practice,
    it's hard to come up with something useful enough at this low expressiveness
    level to outweigh the cost of forcing a user to learn an unfamiliar notation
    and behavior, but it's a good chance to practice your design skills.

</div>

<div class="design-note">

## Design Note: Considering Goto Harmful

Discovering that all of our beautiful structured control flow in Lox is actually
compiled to raw unstructured jumps is like the moment in Scooby Doo when the
monster rips the mask off their face. It was goto all along! Except in this
case, the monster is *under* the mask. We all know goto is evil. But... why?

It is true that you can write outrageously unmaintainable code using goto. But I
don't think most programmers around today have seen that first hand. It's been a
long time since that style was common. These days, it's a boogie man we invoke
in scary stories around the campfire.

The reason we rarely confront that monster in person is because Edsger Dijkstra
slayed it with his famous letter "Go To Statement Considered Harmful", published
in *Communications of the ACM* (March, 1968). Debate around structured
programming had been fierce for some time with adherents on both sides, but I
think Dijkstra deserves the most credit for effectively ending it. Most new
languages today have no unstructured jump statements.

A one-and-a-half page letter that almost single-handedly destroyed a language
feature must be pretty impressive stuff. If you haven't read it, I encourage you
to do so. It's a seminal piece of computer science lore, one of our tribe's
ancestral songs. Also, it's a nice, short bit of practice for reading academic
CS <span name="style">writing</span>, which is a useful skill to develop.

<aside name="style">

That is, if you can get past Dijkstra's insufferable faux-modest
self-aggrandizing writing style:

> More recently I discovered why the use of the go to statement has such
> disastrous effects. ...At that time I did not attach too much importance to
> this discovery; I now submit my considerations for publication because in very
> recent discussions in which the subject turned up, I have been urged to do so.

Ah, yet another one of my many discoveries. I couldn't even be bothered to write
it up until the clamoring masses begged me to.

</aside>

I've read it through a number of times, along with a few critiques, responses,
and commentaries. I ended up with mixed feelings, at best. At a very high level,
I'm with him. His general argument is something like this:

1.  As programmers, we write programs -- static text -- but what we care about
    is the actual running program -- its dynamic behavior.

2.  We're better at reasoning about static things than dynamic things. (He
    doesn't provide any evidence to support this claim, but I accept it.)

3.  Thus, the more we can make the dynamic execution of the program reflect its
    textual structure, the better.

This is a good start. Drawing our attention to the separation between the code
we write and the code as it runs inside the machine is an interesting insight.
Then he tries to define a "correspondence" between program text and execution.
For someone who spent literally his entire career advocating greater rigor in
programming, his definition is pretty hand-wavey. He says:

> Let us now consider how we can characterize the progress of a process. (You
> may think about this question in a very concrete manner: suppose that a
> process, considered as a time succession of actions, is stopped after an
> arbitrary action, what data do we have to fix in order that we can redo the
> process until the very same point?)

Imagine it like this. You have two computers with the same program running on
the exact same inputs -- so totally deterministic. You pause one of them at an
arbitrary point in its execution. What data would you need to send to the other
computer to be able to stop it exactly as far along as the first one was?

If your program allows only simple statements like assignment, it's easy. You
just need to know the point after the last statement you executed. Basically a
breakpoint, the `ip` in our VM, or the line number in an error message. Adding
branching control flow like `if` and `switch` doesn't add any more to this. Even
if the marker points inside a branch, we can still tell where we are.

Once you add function calls, you need something more. You could have paused the
first computer in the middle of a function, but that function may be called from
multiple places. To pause the second machine at exactly the same point in *the
entire program's* execution, you need to pause it on the *right* call to that
function.

So you need to know not just the current statement, but, for function calls that
haven't returned yet, you need to know the locations of the callsites. In other
words, a call stack, though I don't think that term existed when Dijkstra wrote
this. Groovy.

He notes that loops make things harder. If you pause in the middle of a loop
body, you don't know how many iterations have run. So he says you also need to
keep an iteration count. And, since loops can nest, you need a stack of those
(presumably interleaved with the call stack pointers since you can be in loops
in outer calls too).

This is where it gets weird. So we're really building to something now, and you
expect him to explain how goto breaks all of this. Instead, he just says:

> The unbridled use of the go to statement has an immediate consequence that it
> becomes terribly hard to find a meaningful set of coordinates in which to
> describe the process progress.

He doesn't prove that this is hard, or say why. He just says it. He does say
that one approach is unsatisfactory:

> With the go to statement one can, of course, still describe the progress
> uniquely by a counter counting the number of actions performed since program
> start (viz. a kind of normalized clock). The difficulty is that such a
> coordinate, although unique, is utterly unhelpful.

But... that's effectively what loop counters do, and he was fine with those.
It's not like every loop is a simple "for every integer from 0 to 10"
incrementing count. Many are `while` loops with complex conditionals.

Taking an example close to home, consider the core bytecode execution loop at
the heart of clox. Dijkstra argues that that loop is tractable because we can
simply count how many times the loop has run to reason about its progress. But
that loop runs once for each executed instruction in some user's compiled Lox
program. Does knowing that it executed 6,201 bytecode instructions really tell
us VM maintainers *anything* edifying about the state of the interpreter?

In fact, this particular example points to a deeper truth. Böhm and Jacopini
[proved][] that *any* control flow using goto can be transformed into one using
just sequencing, loops, and branches. Our bytecode interpreter loop is a living
example of that proof: it implements the unstructured control flow of the clox
bytecode instruction set without using any gotos itself.

[proved]: https://en.wikipedia.org/wiki/Structured_program_theorem

That seems to offer a counter-argument to Dijkstra's claim: you *can* define a
correspondence for a program using gotos by transforming it to one that doesn't
and then use the correspondence from that program, which -- according to him --
is acceptable because it uses only branches and loops.

But, honestly, my argument here is also weak. I think both of us are basically
doing pretend math and using fake logic to make what should be an empirical,
human-centered argument. Dijkstra is right that some code using goto is really
bad. Much of that could and should be turned into clearer code by using
structured control flow.

By eliminating goto completely from languages, you're definitely prevented from
writing bad code using gotos. It may be that forcing users to use structured
control flow and making it an uphill battle to write goto-like code using those
constructs is a net win for all of our productivity.

But I do wonder sometimes if we threw out the baby with the bathwater. In the
absence of goto, we often resort to more complex structured patterns. The
"switch inside a loop" is a classic one. Another is using a guard variable to
exit out of a series of nested loops:

<span name="break">
</span>

```c
// See if the matrix contains a zero.
bool found = false;
for (int x = 0; x < xSize; x++) {
  for (int y = 0; y < ySize; y++) {
    for (int z = 0; z < zSize; z++) {
      if (matrix[x][y][z] == 0) {
        printf("found");
        found = true;
        break;
      }
    }
    if (found) break;
  }
  if (found) break;
}
```

Is that really better than:

```c
for (int x = 0; x < xSize; x++) {
  for (int y = 0; y < ySize; y++) {
    for (int z = 0; z < zSize; z++) {
      if (matrix[x][y][z] == 0) {
        printf("found");
        goto done;
      }
    }
  }
}
done:
```

<aside name="break">

You could do this without `break` statements -- themselves a limited goto-ish
construct -- by inserting `!found &&` at the beginning of the condition clause
of each loop.

</aside>

I guess what I really don't like is that we're making language design and
engineering decisions today based on fear. Few people today have any subtle
understanding of the problems and benefits of goto. Instead, we just think it's
"considered harmful". Personally, I've never found dogma a good starting place
for quality creative work.

</div>


================================================
FILE: book/local-variables.md
================================================
> And as imagination bodies forth<br />
> The forms of things unknown, the poet's pen<br />
> Turns them to shapes and gives to airy nothing<br />
> A local habitation and a name.
>
> <cite>William Shakespeare, <em>A Midsummer Night's Dream</em></cite>

The [last chapter][] introduced variables to clox, but only of the <span
name="global">global</span> variety. In this chapter, we'll extend that to
support blocks, block scope, and local variables. In jlox, we managed to pack
all of that and globals into one chapter. For clox, that's two chapters worth of
work partially because, frankly, everything takes more effort in C.

<aside name="global">

There's probably some dumb "think globally, act locally" joke here, but I'm
struggling to find it.

</aside>

[last chapter]: global-variables.html

But an even more important reason is that our approach to local variables will
be quite different from how we implemented globals. Global variables are late
bound in Lox. "Late" in this context means "resolved after compile time". That's
good for keeping the compiler simple, but not great for performance. Local
variables are one of the most-used <span name="params">parts</span> of a
language. If locals are slow, *everything* is slow. So we want a strategy for
local variables that's as efficient as possible.

<aside name="params">

Function parameters are also heavily used. They work like local variables too,
so we'll use the same implementation technique for them.

</aside>

Fortunately, lexical scoping is here to help us. As the name implies, lexical
scope means we can resolve a local variable just by looking at the text of the
program -- locals are *not* late bound. Any processing work we do in the
compiler is work we *don't* have to do at runtime, so our implementation of
local variables will lean heavily on the compiler.

## Representing Local Variables

The nice thing about hacking on a programming language in modern times is
there's a long lineage of other languages to learn from. So how do C and Java
manage their local variables? Why, on the stack, of course! They typically use
the native stack mechanisms supported by the chip and OS. That's a little too
low level for us, but inside the virtual world of clox, we have our own stack we
can use.

Right now, we only use it for holding on to **temporaries** -- short-lived blobs
of data that we need to remember while computing an expression. As long as we
don't get in the way of those, we can stuff our local variables onto the stack
too. This is great for performance. Allocating space for a new local requires
only incrementing the `stackTop` pointer, and freeing is likewise a decrement.
Accessing a variable from a known stack slot is an indexed array lookup.

We do need to be careful, though. The VM expects the stack to behave like, well,
a stack. We have to be OK with allocating new locals only on the top of the
stack, and we have to accept that we can discard a local only when nothing is
above it on the stack. Also, we need to make sure temporaries don't interfere.

Conveniently, the design of Lox is in <span name="harmony">harmony</span> with
these constraints. New locals are always created by declaration statements.
Statements don't nest inside expressions, so there are never any temporaries on
the stack when a statement begins executing. Blocks are strictly nested. When a
block ends, it always takes the innermost, most recently declared locals with
it. Since those are also the locals that came into scope last, they should be on
top of the stack where we need them.

<aside name="harmony">

This alignment obviously isn't coincidental. I designed Lox to be amenable to
single-pass compilation to stack-based bytecode. But I didn't have to tweak the
language too much to fit in those restrictions. Most of its design should feel
pretty natural.

This is in large part because the history of languages is deeply tied to
single-pass compilation and -- to a lesser degree -- stack-based architectures.
Lox's block scoping follows a tradition stretching back to BCPL. As programmers,
our intuition of what's "normal" in a language is informed even today by the
hardware limitations of yesteryear.

</aside>

Step through this example program and watch how the local variables come in and
go out of scope:

<img src="image/local-variables/scopes.png" alt="A series of local variables come into and out of scope in a stack-like fashion." />

See how they fit a stack perfectly? It seems that the stack will work for
storing locals at runtime. But we can go further than that. Not only do we know
*that* they will be on the stack, but we can even pin down precisely *where*
they will be on the stack. Since the compiler knows exactly which local
variables are in scope at any point in time, it can effectively simulate the
stack during compilation and note <span name="fn">where</span> in the stack each
variable lives.

We'll take advantage of this by using these stack offsets as operands for the
bytecode instructions that read and store local variables. This makes working
with locals deliciously fast -- as simple as indexing into an array.

<aside name="fn">

In this chapter, locals start at the bottom of the VM's stack array and are
indexed from there. When we add [functions][], that scheme gets a little more
complex. Each function needs its own region of the stack for its parameters and
local variables. But, as we'll see, that doesn't add as much complexity as you
might expect.

[functions]: calls-and-functions.html

</aside>

There's a lot of state we need to track in the compiler to make this whole thing
go, so let's get started there. In jlox, we used a linked chain of "environment"
HashMaps to track which local variables were currently in scope. That's sort of
the classic, schoolbook way of representing lexical scope. For clox, as usual,
we're going a little closer to the metal. All of the state lives in a new
struct.

^code compiler-struct (1 before, 2 after)

We have a simple, flat array of all locals that are in scope during each point in
the compilation process. They are <span name="order">ordered</span> in the array
in the order that their declarations appear in the code. Since the instruction
operand we'll use to encode a local is a single byte, our VM has a hard limit on
the number of locals that can be in scope at once. That means we can also give
the locals array a fixed size.

<aside name="order">

We're writing a single-pass compiler, so it's not like we have *too* many other
options for how to order them in the array.

</aside>

^code uint8-count (1 before, 2 after)

Back in the Compiler struct, the `localCount` field tracks how many locals are
in scope -- how many of those array slots are in use. We also track the "scope
depth". This is the number of blocks surrounding the current bit of code we're
compiling.

Our Java interpreter used a chain of maps to keep each block's variables
separate from other blocks'. This time, we'll simply number variables with the
level of nesting where they appear. Zero is the global scope, one is the first
top-level block, two is inside that, you get the idea. We use this to track
which block each local belongs to so that we know which locals to discard when a
block ends.

Each local in the array is one of these:

^code local-struct (1 before, 2 after)

We store the name of the variable. When we're resolving an identifier, we
compare the identifier's lexeme with each local's name to find a match. It's
pretty hard to resolve a variable if you don't know its name. The `depth` field
records the scope depth of the block where the local variable was declared.
That's all the state we need for now.

This is a very different representation from what we had in jlox, but it still
lets us answer all of the same questions our compiler needs to ask of the
lexical environment. The next step is figuring out how the compiler *gets* at
this state. If we were <span name="thread">principled</span> engineers, we'd
give each function in the front end a parameter that accepts a pointer to a
Compiler. We'd create a Compiler at the beginning and carefully thread it
through each function call... but that would mean a lot of boring changes to
the code we already wrote, so here's a global variable instead:

<aside name="thread">

In particular, if we ever want to use our compiler in a multi-threaded
application, possibly with multiple compilers running in parallel, then using a
global variable is a *bad* idea.

</aside>

^code current-compiler (1 before, 1 after)

Here's a little function to initialize the compiler:

^code init-compiler

When we first start up the VM, we call it to get everything into a clean state.

^code compiler (1 before, 1 after)

Our compiler has the data it needs, but not the operations on that data. There's
no way to create and destroy scopes, or add and resolve variables. We'll add
those as we need them. First, let's start building some language features.

## Block Statements

Before we can have any local variables, we need some local scopes. These come
from two things: function bodies and <span name="block">blocks</span>. Functions
are a big chunk of work that we'll tackle in [a later chapter][functions], so
for now we're only going to do blocks. As usual, we start with the syntax. The
new grammar we'll introduce is:

```ebnf
statement      → exprStmt
               | printStmt
               | block ;

block          → "{" declaration* "}" ;
```

<aside name="block">

When you think about it, "block" is a weird name. Used metaphorically, "block"
usually means a small indivisible unit, but for some reason, the Algol 60
committee decided to use it to refer to a *compound* structure -- a series of
statements. It could be worse, I suppose. Algol 58 called `begin` and `end`
"statement parentheses".

<img src="image/local-variables/block.png" alt="A cinder block." class="above" />

</aside>

Blocks are a kind of statement, so the rule for them goes in the `statement`
production. The corresponding code to compile one looks like this:

^code parse-block (2 before, 1 after)

After <span name="helper">parsing</span> the initial curly brace, we use this
helper function to compile the rest of the block:

<aside name="helper">

This function will come in handy later for compiling function bodies.

</aside>

^code block

It keeps parsing declarations and statements until it hits the closing brace. As
we do with any loop in the parser, we also check for the end of the token
stream. This way, if there's a malformed program with a missing closing curly,
the compiler doesn't get stuck in a loop.

Executing a block simply means executing the statements it contains, one after
the other, so there isn't much to compiling them. The semantically interesting
thing blocks do is create scopes. Before we compile the body of a block, we call
this function to enter a new local scope:

^code begin-scope

In order to "create" a scope, all we do is increment the current depth. This is
certainly much faster than jlox, which allocated an entire new HashMap for
each one. Given `beginScope()`, you can probably guess what `endScope()` does.

^code end-scope

That's it for blocks and scopes -- more or less -- so we're ready to stuff some
variables into them.

## Declaring Local Variables

Usually we start with parsing here, but our compiler already supports parsing
and compiling variable declarations. We've got `var` statements, identifier
expressions and assignment in there now. It's just that the compiler assumes
all variables are global. So we don't need any new parsing support, we just need
to hook up the new scoping semantics to the existing code.

<img src="image/local-variables/declaration.png" alt="The code flow within varDeclaration()." />

Variable declaration parsing begins in `varDeclaration()` and relies on a couple
of other functions. First, `parseVariable()` consumes the identifier token for
the variable name, adds its lexeme to the chunk's constant table as a string,
and then returns the constant table index where it was added. Then, after
`varDeclaration()` compiles the initializer, it calls `defineVariable()` to emit
the bytecode for storing the variable's value in the global variable hash table.

Both of those helpers need a few changes to support local variables. In
`parseVariable()`, we add:

^code parse-local (1 before, 1 after)

First, we "declare" the variable. I'll get to what that means in a second. After
that, we exit the function if we're in a local scope. At runtime, locals aren't
looked up by name. There's no need to stuff the variable's name into the
constant table, so if the declaration is inside a local scope, we return a dummy
table index instead.

Over in `defineVariable()`, we need to emit the code to store a local variable
if we're in a local scope. It looks like this:

^code define-variable (1 before, 1 after)

Wait, what? Yup. That's it. There is no code to create a local variable at
runtime. Think about what state the VM is in. It has already executed the code
for the variable's initializer (or the implicit `nil` if the user omitted an
initializer), and that value is sitting right on top of the stack as the only
remaining temporary. We also know that new locals are allocated at the top of
the stack... right where that value already is. Thus, there's nothing to do. The
temporary simply *becomes* the local variable. It doesn't get much more
efficient than that.

<span name="locals"></span>

<img src="image/local-variables/local-slots.png" alt="Walking through the bytecode execution showing that each initializer's result ends up in the local's slot." />

<aside name="locals">

The code on the left compiles to the sequence of instructions on the right.

</aside>

OK, so what's "declaring" about? Here's what that does:

^code declare-variable

This is the point where the compiler records the existence of the variable. We
only do this for locals, so if we're in the top-level global scope, we just bail
out. Because global variables are late bound, the compiler doesn't keep track of
which declarations for them it has seen.

But for local variables, the compiler does need to remember that the variable
exists. That's what declaring it does -- it adds it to the compiler's list of
variables in the current scope. We implement that using another new function.

^code add-local

This initializes the next available Local in the compiler's array of variables.
It stores the variable's <span name="lexeme">name</span> and the depth of the
scope that owns the variable.

<aside name="lexeme">

Worried about the lifetime of the string for the variable's name? The Local
directly stores a copy of the Token struct for the identifier. Tokens store a
pointer to the first character of their lexeme and the lexeme's length. That
pointer points into the original source string for the script or REPL entry
being compiled.

As long as that string stays around during the entire compilation process --
which it must since, you know, we're compiling it -- then all of the tokens
pointing into it are fine.

</aside>

Our implementation is fine for a correct Lox program, but what about invalid
code? Let's aim to be robust. The first error to handle is not really the user's
fault, but more a limitation of the VM. The instructions to work with local
variables refer to them by slot index. That index is stored in a single-byte
operand, which means the VM only supports up to 256 local variables in scope at
one time.

If we try to go over that, not only could we not refer to them at runtime, but
the compiler would overwrite its own locals array, too. Let's prevent that.

^code too-many-locals (1 before, 1 after)

The next case is trickier. Consider:

```lox
{
  var a = "first";
  var a = "second";
}
```

At the top level, Lox allows redeclaring a variable with the same name as a
previous declaration because that's useful for the REPL. But inside a local
scope, that's a pretty <span name="rust">weird</span> thing to do. It's likely
to be a mistake, and many languages, including our own Lox, enshrine that
assumption by making this an error.

<aside name="rust">

Interestingly, the Rust programming language *does* allow this, and idiomatic
code relies on it.

</aside>

Note that the above program is different from this one:

```lox
{
  var a = "outer";
  {
    var a = "inner";
  }
}
```

It's OK to have two variables with the same name in *different* scopes, even
when the scopes overlap such that both are visible at the same time. That's
shadowing, and Lox does allow that. It's only an error to have two variables
with the same name in the *same* local scope.

We detect that error like so:

^code existing-in-scope (1 before, 2 after)

<aside name="negative">

Don't worry about that odd `depth != -1` part yet. We'll get to what that's
about later.

</aside>

Local variables are appended to the array when they're declared, which means the
current scope is always at the end of the array. When we declare a new variable,
we start at the end and work backward, looking for an existing variable with the
same name. If we find one in the current scope, we report the error. Otherwise,
if we reach the beginning of the array or a variable owned by another scope,
then we know we've checked all of the existing variables in the scope.

To see if two identifiers are the same, we use this:

^code identifiers-equal

Since we know the lengths of both lexemes, we check that first. That will fail
quickly for many non-equal strings. If the <span name="hash">lengths</span> are
the same, we check the characters using `memcmp()`. To get to `memcmp()`, we
need an include.

<aside name="hash">

It would be a nice little optimization if we could check their hashes, but
tokens aren't full LoxStrings, so we haven't calculated their hashes yet.

</aside>

^code compiler-include-string (1 before, 2 after)

With this, we're able to bring variables into being. But, like ghosts, they
linger on beyond the scope where they are declared. When a block ends, we need
to put them to rest.

^code pop-locals (1 before, 1 after)

When we pop a scope, we walk backward through the local array looking for any
variables declared at the scope depth we just left. We discard them by simply
decrementing the length of the array.

There is a runtime component to this too. Local variables occupy slots on the
stack. When a local variable goes out of scope, that slot is no longer needed
and should be freed. So, for each variable that we discard, we also emit an
`OP_POP` <span name="pop">instruction</span> to pop it from the stack.

<aside name="pop">

When multiple local variables go out of scope at once, you get a series of
`OP_POP` instructions that get interpreted one at a time. A simple optimization
you could add to your Lox implementation is a specialized `OP_POPN` instruction
that takes an operand for the number of slots to pop and pops them all at once.

</aside>

## Using Locals

We can now compile and execute local variable declarations. At runtime, their
values are sitting where they should be on the stack. Let's start using them.
We'll do both variable access and assignment at the same time since they touch
the same functions in the compiler.

We already have code for getting and setting global variables, and -- like good
little software engineers -- we want to reuse as much of that existing code as
we can. Something like this:

^code named-local (1 before, 2 after)

Instead of hardcoding the bytecode instructions emitted for variable access and
assignment, we use a couple of C variables. First, we try to find a local
variable with the given name. If we find one, we use the instructions for
working with locals. Otherwise, we assume it's a global variable and use the
existing bytecode instructions for globals.

A little further down, we use those variables to emit the right instructions.
For assignment:

^code emit-set (2 before, 1 after)

And for access:

^code emit-get (2 before, 1 after)

The real heart of this chapter, the part where we resolve a local variable, is
here:

^code resolve-local

For all that, it's straightforward. We walk the list of locals that are
currently in scope. If one has the same name as the identifier token, the
identifier must refer to that variable. We've found it! We walk the array
backward so that we find the *last* declared variable with the identifier. That
ensures that inner local variables correctly shadow locals with the same name in
surrounding scopes.

At runtime, we load and store locals using the stack slot index, so that's what
the compiler needs to calculate after it resolves the variable. Whenever a
variable is declared, we append it to the locals array in Compiler. That means
the first local variable is at index zero, the next one is at index one, and so
on. In other words, the locals array in the compiler has the *exact* same layout
as the VM's stack will have at runtime. The variable's index in the locals array
is the same as its stack slot. How convenient!

If we make it through the whole array without finding a variable with the given
name, it must not be a local. In that case, we return `-1` to signal that it
wasn't found and should be assumed to be a global variable instead.

### Interpreting local variables

Our compiler is emitting two new instructions, so let's get them working. First
is loading a local variable:

^code get-local-op (1 before, 1 after)

And its implementation:

^code interpret-get-local (1 before, 1 after)

It takes a single-byte operand for the stack slot where the local lives. It
loads the value from that index and then pushes it on top of the stack where
later instructions can find it.

<aside name="slot">

It seems redundant to push the local's value onto the stack since it's already
on the stack lower down somewhere. The problem is that the other bytecode
instructions only look for data at the *top* of the stack. This is the core
aspect that makes our bytecode instruction set *stack*-based.
[Register-based][reg] bytecode instruction sets avoid this stack juggling at the
cost of having larger instructions with more operands.

[reg]: a-virtual-machine.html#design-note

</aside>

Next is assignment:

^code set-local-op (1 before, 1 after)

You can probably predict the implementation.

^code interpret-set-local (1 before, 1 after)

It takes the assigned value from the top of the stack and stores it in the stack
slot corresponding to the local variable. Note that it doesn't pop the value
from the stack. Remember, assignment is an expression, and every expression
produces a value. The value of an assignment expression is the assigned value
itself, so the VM just leaves the value on the stack.

Our disassembler is incomplete without support for these two new instructions.

^code disassemble-local (1 before, 1 after)

The compiler compiles local variables to direct slot access. The local
variable's name never leaves the compiler to make it into the chunk at all.
That's great for performance, but not so great for introspection. When we
disassemble these instructions, we can't show the variable's name like we could
with globals. Instead, we just show the slot number.

<aside name="debug">

Erasing local variable names in the compiler is a real issue if we ever want to
implement a debugger for our VM. When users step through code, they expect to
see the values of local variables organized by their names. To support that,
we'd need to output some additional information that tracks the name of each
local variable at each stack slot.

</aside>

^code byte-instruction

### Another scope edge case

We already sunk some time into handling a couple of weird edge cases around
scopes. We made sure shadowing works correctly. We report an error if two
variables in the same local scope have the same name. For reasons that aren't
entirely clear to me, variable scoping seems to have a lot of these wrinkles.
I've never seen a language where it feels completely <span
name="elegant">elegant</span>.

<aside name="elegant">

No, not even Scheme.

</aside>

We've got one more edge case to deal with before we end this chapter. Recall this strange beastie we first met in [jlox's implementation of variable resolution][shadow]:

[shadow]: resolving-and-binding.html#resolving-variable-declarations

```lox
{
  var a = "outer";
  {
    var a = a;
  }
}
```

We slayed it then by splitting a variable's declaration into two phases, and
we'll do that again here:

<img src="image/local-variables/phases.png" alt="An example variable declaration marked 'declared uninitialized' before the variable name and 'ready for use' after the initializer." />

As soon as the variable declaration begins -- in other words, before its
initializer -- the name is declared in the current scope. The variable exists,
but in a special "uninitialized" state. Then we compile the initializer. If at
any point in that expression we resolve an identifier that points back to this
variable, we'll see that it is not initialized yet and report an error. After we
finish compiling the initializer, we mark the variable as initialized and ready
for use.

To implement this, when we declare a local, we need to indicate the
"uninitialized" state somehow. We could add a new field to Local, but let's be a
little more parsimonious with memory. Instead, we'll set the variable's scope
depth to a special sentinel value, `-1`.

^code declare-undefined (1 before, 1 after)

Later, once the variable's initializer has been compiled, we mark it
initialized.

^code define-local (1 before, 2 after)

That is implemented like so:

^code mark-initialized

So this is *really* what "declaring" and "defining" a variable means in the
compiler. "Declaring" is when the variable is added to the scope, and "defining"
is when it becomes available for use.

When we resolve a reference to a local variable, we check the scope depth to see
if it's fully defined.

^code own-initializer-error (1 before, 1 after)

If the variable has the sentinel depth, it must be a reference to a variable in
its own initializer, and we report that as an error.

That's it for this chapter! We added blocks, local variables, and real,
honest-to-God lexical scoping. Given that we introduced an entirely different
runtime representation for variables, we didn't have to write a lot of code. The
implementation ended up being pretty clean and efficient.

You'll notice that almost all of the code we wrote is in the compiler. Over in
the runtime, it's just two little instructions. You'll see this as a continuing
<span name="static">trend</span> in clox compared to jlox. One of the biggest
hammers in the optimizer's toolbox is pulling work forward into the compiler so
that you don't have to do it at runtime. In this chapter, that meant resolving
exactly which stack slot every local variable occupies. That way, at runtime, no
lookup or resolution needs to happen.

<aside name="static">

You can look at static types as an extreme example of this trend. A statically
typed language takes all of the type analysis and type error handling and sorts
it all out during compilation. Then the runtime doesn't have to waste any time
checking that values have the proper type for their operation. In fact, in some
statically typed languages like C, you don't even *know* the type at runtime.
The compiler completely erases any representation of a value's type leaving just
the bare bits.

</aside>

<div class="challenges">

## Challenges

1.  Our simple local array makes it easy to calculate the stack slot of each
    local variable. But it means that when the compiler resolves a reference to
    a variable, we have to do a linear scan through the array.

    Come up with something more efficient. Do you think the additional
    complexity is worth it?

1.  How do other languages handle code like this:

    ```lox
    var a = a;
    ```

    What would you do if it was your language? Why?

1.  Many languages make a distinction between variables that can be reassigned
    and those that can't. In Java, the `final` modifier prevents you from
    assigning to a variable. In JavaScript, a variable declared with `let` can
    be assigned, but one declared using `const` can't. Swift treats `let` as
    single-assignment and uses `var` for assignable variables. Scala and Kotlin
    use `val` and `var`.

    Pick a keyword for a single-assignment variable form to add to Lox. Justify
    your choice, then implement it. An attempt to assign to a variable declared
    using your new keyword should cause a compile error.

1.  Extend clox to allow more than 256 local variables to be in scope at a time.

</div>


================================================
FILE: book/methods-and-initializers.md
================================================
> When you are on the dancefloor, there is nothing to do but dance.
>
> <cite>Umberto Eco, <em>The Mysterious Flame of Queen Loana</em></cite>

It is time for our virtual machine to bring its nascent objects to life with
behavior. That means methods and method calls. And, since they are a special
kind of method, initializers too.

All of this is familiar territory from our previous jlox interpreter. What's new
in this second trip is an important optimization we'll implement to make method
calls over seven times faster than our baseline performance. But before we get
to that fun, we gotta get the basic stuff working.

## Method Declarations

We can't optimize method calls before we have method calls, and we can't call
methods without having methods to call, so we'll start with declarations.

### Representing methods

We usually start in the compiler, but let's knock the object model out first
this time. The runtime representation for methods in clox is similar to that of
jlox. Each class stores a hash table of methods. Keys are method names, and each
value is an ObjClosure for the body of the method.

^code class-methods (3 before, 1 after)

A brand new class begins with an empty method table.

^code init-methods (1 before, 1 after)

The ObjClass struct owns the memory for this table, so when the memory manager
deallocates a class, the table should be freed too.

^code free-methods (1 before, 1 after)

Speaking of memory managers, the GC needs to trace through classes into the
method table. If a class is still reachable (likely through some instance),
then all of its methods certainly need to stick around too.

^code mark-methods (1 before, 1 after)

We use the existing `markTable()` function, which traces through the key string
and value in each table entry.

Storing a class's methods is pretty familiar coming from jlox. The different
part is how that table gets populated. Our previous interpreter had access to
the entire AST node for the class declaration and all of the methods it
contained. At runtime, the interpreter simply walked that list of declarations.

Now every piece of information the compiler wants to shunt over to the runtime
has to squeeze through the interface of a flat series of bytecode instructions.
How do we take a class declaration, which can contain an arbitrarily large set
of methods, and represent it as bytecode? Let's hop over to the compiler and
find out.

### Compiling method declarations

The last chapter left us with a compiler that parses classes but allows only an
empty body. Now we insert a little code to compile a series of method
declarations between the braces.

^code class-body (1 before, 1 after)

Lox doesn't have field declarations, so anything before the closing brace at the
end of the class body must be a method. We stop compiling methods when we hit
that final curly or if we reach the end of the file. The latter check ensures
our compiler doesn't get stuck in an infinite loop if the user accidentally
forgets the closing brace.

The tricky part with compiling a class declaration is that a class may declare
any number of methods. Somehow the runtime needs to look up and bind all of
them. That would be a lot to pack into a single `OP_CLASS` instruction. Instead,
the bytecode we generate for a class declaration will split the process into a
<span name="series">*series*</span> of instructions. The compiler already emits
an `OP_CLASS` instruction that creates a new empty ObjClass object. Then it
emits instructions to store the class in a variable with its name.

<aside name="series">

We did something similar for closures. The `OP_CLOSURE` instruction needs to
know the type and index for each captured upvalue. We encoded that using a
series of pseudo-instructions following the main `OP_CLOSURE` instruction --
basically a variable number of operands. The VM processes all of those extra
bytes immediately when interpreting the `OP_CLOSURE` instruction.

Here our approach is a little different because from the VM's perspective, each
instruction to define a method is a separate stand-alone operation. Either
approach would work. A variable-sized pseudo-instruction is possibly marginally
faster, but class declarations are rarely in hot loops, so it doesn't matter
much.

</aside>

Now, for each method declaration, we emit a new `OP_METHOD` instruction that
adds a single method to that class. When all of the `OP_METHOD` instructions
have executed, we're left with a fully formed class. While the user sees a class
declaration as a single atomic operation, the VM implements it as a series of
mutations.

To define a new method, the VM needs three things:

1.  The name of the method.

1.  The closure for the method body.

1.  The class to bind the method to.

We'll incrementally write the compiler code to see how those all get through to
the runtime, starting here:

^code method

Like `OP_GET_PROPERTY` and other instructions that need names at runtime, the
compiler adds the method name token's lexeme to the constant table, getting back
a table index. Then we emit an `OP_METHOD` instruction with that index as the
operand. That's the name. Next is the method body:

^code method-body (1 before, 1 after)

We use the same `function()` helper that we wrote for compiling function
declarations. That utility function compiles the subsequent parameter list and
function body. Then it emits the code to create an ObjClosure and leave it on
top of the stack. At runtime, the VM will find the closure there.

Last is the class to bind the method to. Where can the VM find that?
Unfortunately, by the time we reach the `OP_METHOD` instruction, we don't know
where it is. It <span name="global">could</span> be on the stack, if the user
declared the class in a local scope. But a top-level class declaration ends up
with the ObjClass in the global variable table.

<aside name="global">

If Lox supported declaring classes only at the top level, the VM could assume
that any class could be found by looking it up directly from the global
variable table. Alas, because we support local classes, we need to handle that
case too.

</aside>

Fear not. The compiler does know the *name* of the class. We can capture it
right after we consume its token.

^code class-name (1 before, 1 after)

And we know that no other declaration with that name could possibly shadow the
class. So we do the easy fix. Before we start binding methods, we emit whatever
code is necessary to load the class back on top of the stack.

^code load-class (2 before, 1 after)

Right before compiling the class body, we <span name="load">call</span>
`namedVariable()`. That helper function generates code to load a variable with
the given name onto the stack. Then we compile the methods.

<aside name="load">

The preceding call to `defineVariable()` pops the class, so it seems silly to
call `namedVariable()` to load it right back onto the stack. Why not simply
leave it on the stack in the first place? We could, but in the [next
chapter][super] we will insert code between these two calls to support
inheritance. At that point, it will be simpler if the class isn't sitting around
on the stack.

[super]: superclasses.html

</aside>

This means that when we execute each `OP_METHOD` instruction, the stack has the
method's closure on top with the class right under it. Once we've reached the
end of the methods, we no longer need the class and tell the VM to pop it off
the stack.

^code pop-class (1 before, 1 after)

Putting all of that together, here is an example class declaration to throw at
the compiler:

```lox
class Brunch {
  bacon() {}
  eggs() {}
}
```

Given that, here is what the compiler generates and how those instructions
affect the stack at runtime:

<img src="image/methods-and-initializers/method-instructions.png" alt="The series of bytecode instructions for a class declaration with two methods." />

All that remains for us is to implement the runtime for that new `OP_METHOD`
instruction.

### Executing method declarations

First we define the opcode.

^code method-op (1 before, 1 after)

We disassemble it like other instructions that have string constant operands.

^code disassemble-method (2 before, 1 after)

And over in the interpreter, we add a new case too.

^code interpret-method (1 before, 1 after)

There, we read the method name from the constant table and pass it here:

^code define-method

The method closure is on top of the stack, above the class it will be bound to.
We read those two stack slots and store the closure in the class's method table.
Then we pop the closure since we're done with it.

Note that we don't do any runtime type checking on the closure or class object.
That `AS_CLASS()` call is safe because the compiler itself generated the code
that causes the class to be in that stack slot. The VM <span
name="verify">trusts</span> its own compiler.

<aside name="verify">

The VM trusts that the instructions it executes are valid because the *only* way
to get code to the bytecode interpreter is by going through clox's own compiler.
Many bytecode VMs, like the JVM and CPython, support executing bytecode that has
been compiled separately. That leads to a different security story. Maliciously
crafted bytecode could crash the VM or worse.

To prevent that, the JVM does a bytecode verification pass before it executes
any loaded code. CPython says it's up to the user to ensure any bytecode they
run is safe.

</aside>

After the series of `OP_METHOD` instructions is done and the `OP_POP` has popped
the class, we will have a class with a nicely populated method table, ready to
start doing things. The next step is pulling those methods back out and using
them.

## Method References

Most of the time, methods are accessed and immediately called, leading to this
familiar syntax:

```lox
instance.method(argument);
```

But remember, in Lox and some other languages, those two steps are distinct and
can be separated.

```lox
var closure = instance.method;
closure(argument);
```

Since users *can* separate the operations, we have to implement them separately.
The first step is using our existing dotted property syntax to access a method
defined on the instance's class. That should return some kind of object that the
user can then call like a function.

The obvious approach is to look up the method in the class's method table and
return the ObjClosure associated with that name. But we also need to remember
that when you access a method, `this` gets bound to the instance the method was
accessed from. Here's the example from [when we added methods to jlox][jlox]:

[jlox]: classes.html#methods-on-classes

```lox
class Person {
  sayName() {
    print this.name;
  }
}

var jane = Person();
jane.name = "Jane";

var method = jane.sayName;
method(); // ?
```

This should print "Jane", so the object returned by `.sayName` somehow needs to
remember the instance it was accessed from when it later gets called. In jlox,
we implemented that "memory" using the interpreter's existing heap-allocated
Environment class, which handled all variable storage.

Our bytecode VM has a more complex architecture for storing state. [Local
variables and temporaries][locals] are on the stack, [globals][] are in a hash
table, and variables in closures use [upvalues][]. That necessitates a somewhat
more complex solution for tracking a method's receiver in clox, and a new
runtime type.

[locals]: local-variables.html#representing-local-variables
[globals]: global-variables.html#variable-declarations
[upvalues]: closures.html#upvalues

### Bound methods

When the user executes a method access, we'll find the closure for that method
and wrap it in a new <span name="bound">"bound method"</span> object that tracks
the instance that the method was accessed from. This bound object can be called
later like a function. When invoked, the VM will do some shenanigans to wire up
`this` to point to the receiver inside the method's body.

<aside name="bound">

I took the name "bound method" from CPython. Python behaves similar to Lox here,
and I used its implementation for inspiration.

</aside>

Here's the new object type:

^code obj-bound-method (2 before, 1 after)

It wraps the receiver and the method closure together. The receiver's type is
Value even though methods can be called only on ObjInstances. Since the VM
doesn't care what kind of receiver it has anyway, using Value means we don't
have to keep converting the pointer back to a Value when it gets passed to more
general functions.

The new struct implies the usual boilerplate you're used to by now. A new case
in the object type enum:

^code obj-type-bound-method (1 before, 1 after)

A macro to check a value's type:

^code is-bound-method (2 before, 1 after)

Another macro to cast the value to an ObjBoundMethod pointer:

^code as-bound-method (2 before, 1 after)

A function to create a new ObjBoundMethod:

^code new-bound-method-h (2 before, 1 after)

And an implementation of that function here:

^code new-bound-method

The constructor-like function simply stores the given closure and receiver. When
the bound method is no longer needed, we free it.

^code free-bound-method (1 before, 1 after)

The bound method has a couple of references, but it doesn't *own* them, so it
frees nothing but itself. However, those references do get traced by the garbage
collector.

^code blacken-bound-method (1 before, 1 after)

This <span name="trace">ensures</span> that a handle to a method keeps the
receiver around in memory so that `this` can still find the object when you
invoke the handle later. We also trace the method closure.

<aside name="trace">

Tracing the method closure isn't really necessary. The receiver is an
ObjInstance, which has a pointer to its ObjClass, which has a table for all of
the methods. But it feels dubious to me in some vague way to have ObjBoundMethod
rely on that.

</aside>

The last operation all objects support is printing.

^code print-bound-method (1 before, 1 after)

A bound method prints exactly the same way as a function. From the user's
perspective, a bound method *is* a function. It's an object they can call. We
don't expose that the VM implements bound methods using a different object type.

<aside name="party">

<img src="image/methods-and-initializers/party-hat.png" alt="A party hat." />

</aside>

Put on your <span name="party">party</span> hat because we just reached a little
milestone. ObjBoundMethod is the very last runtime type to add to clox. You've
written your last `IS_` and `AS_` macros. We're only a few chapters from the end
of the book, and we're getting close to a complete VM.

### Accessing methods

Let's get our new object type doing something. Methods are accessed using the
same "dot" property syntax we implemented in the last chapter. The compiler
already parses the right expressions and emits `OP_GET_PROPERTY` instructions
for them. The only changes we need to make are in the runtime.

When a property access instruction executes, the instance is on top of the
stack. The instruction's job is to find a field or method with the given name
and replace the top of the stack with the accessed property.

The interpreter already handles fields, so we simply extend the
`OP_GET_PROPERTY` case with another section.

^code get-method (5 before, 1 after)

We insert this after the code to look up a field on the receiver instance.
Fields take priority over and shadow methods, so we look for a field first. If
the instance does not have a field with the given property name, then the name
may refer to a method.

We take the instance's class and pass it to a new `bindMethod()` helper. If that
function finds a method, it places the method on the stack and returns `true`.
Otherwise it returns `false` to indicate a method with that name couldn't be
found. Since the name also wasn't a field, that means we have a runtime error,
which aborts the interpreter.

Here is the good stuff:

^code bind-method

First we look for a method with the given name in the class's method table. If
we don't find one, we report a runtime error and bail out. Otherwise, we take
the method and wrap it in a new ObjBoundMethod. We grab the receiver from its
home on top of the stack. Finally, we pop the instance and replace the top of
the stack with the bound method.

For example:

```lox
class Brunch {
  eggs() {}
}

var brunch = Brunch();
var eggs = brunch.eggs;
```

Here is what happens when the VM executes the `bindMethod()` call for the
`brunch.eggs` expression:

<img src="image/methods-and-initializers/bind-method.png" alt="The stack changes caused by bindMethod()." />

That's a lot of machinery under the hood, but from the user's perspective, they
simply get a function that they can call.

### Calling methods

Users can declare methods on classes, access them on instances, and get bound
methods onto the stack. They just can't <span name="do">*do*</span> anything
useful with those bound method objects. The operation we're missing is calling
them. Calls are implemented in `callValue()`, so we add a case there for the new
object type.

<aside name="do">

A bound method *is* a first-class value, so they can store it in variables, pass
it to functions, and otherwise do "value"-y stuff with it.

</aside>

^code call-bound-method (1 before, 1 after)

We pull the raw closure back out of the ObjBoundMethod and use the existing
`call()` helper to begin an invocation of that closure by pushing a CallFrame
for it onto the call stack. That's all it takes to be able to run this Lox
program:

```lox
class Scone {
  topping(first, second) {
    print "scone with " + first + " and " + second;
  }
}

var scone = Scone();
scone.topping("berries", "cream");
```

That's three big steps. We can declare, access, and invoke methods. But
something is missing. We went to all that trouble to wrap the method closure in
an object that binds the receiver, but when we invoke the method, we don't use
that receiver at all.

## This

The reason bound methods need to keep hold of the receiver is so that it can be
accessed inside the body of the method. Lox exposes a method's receiver through
`this` expressions. It's time for some new syntax. The lexer already treats
`this` as a special token type, so the first step is wiring that token up in the
parse table.

^code table-this (1 before, 1 after)

<aside name="this">

The underscore at the end of the name of the parser function is because `this`
is a reserved word in C++ and we support compiling clox as C++.

</aside>

When the parser encounters a `this` in prefix position, it dispatches to a new
parser function.

^code this

We'll apply the same implementation technique for `this` in clox that we used in
jlox. We treat `this` as a lexically scoped local variable whose value gets
magically initialized. Compiling it like a local variable means we get a lot of
behavior for free. In particular, closures inside a method that reference `this`
will do the right thing and capture the receiver in an upvalue.

When the parser function is called, the `this` token has just been consumed and
is stored as the previous token. We call our existing `variable()` function
which compiles identifier expressions as variable accesses. It takes a single
Boolean parameter for whether the compiler should look for a following `=`
operator and parse a setter. You can't assign to `this`, so we pass `false` to
disallow that.

The `variable()` function doesn't care that `this` has its own token type and
isn't an identifier. It is happy to treat the lexeme "this" as if it were a
variable name and then look it up using the existing scope resolution machinery.
Right now, that lookup will fail because we never declared a variable whose name
is "this". It's time to think about where the receiver should live in memory.

At least until they get captured by closures, clox stores every local variable
on the VM's stack. The compiler keeps track of which slots in the function's
stack window are owned by which local variables. If you recall, the compiler
sets aside stack slot zero by declaring a local variable whose name is an empty
string.

For function calls, that slot ends up holding the function being called. Since
the slot has no name, the function body never accesses it. You can guess where
this is going. For *method* calls, we can repurpose that slot to store the
receiver. Slot zero will store the instance that `this` is bound to. In order to
compile `this` expressions, the compiler simply needs to give the correct name
to that local variable.

^code slot-zero (1 before, 1 after)

We want to do this only for methods. Function declarations don't have a `this`.
And, in fact, they *must not* declare a variable named "this", so that if you
write a `this` expression inside a function declaration which is itself inside a
method, the `this` correctly resolves to the outer method's receiver.

```lox
class Nested {
  method() {
    fun function() {
      print this;
    }

    function();
  }
}

Nested().method();
```

This program should print "Nested instance". To decide what name to give to
local slot zero, the compiler needs to know whether it's compiling a function or
method declaration, so we add a new case to our FunctionType enum to distinguish
methods.

^code method-type-enum (1 before, 1 after)

When we compile a method, we use that type.

^code method-type (2 before, 1 after)

Now we can correctly compile references to the special "this" variable, and the
compiler will emit the right `OP_GET_LOCAL` instructions to access it. Closures
can even capture `this` and store the receiver in upvalues. Pretty cool.

Except that at runtime, the receiver isn't actually *in* slot zero. The
interpreter isn't holding up its end of the bargain yet. Here is the fix:

^code store-receiver (2 before, 2 after)

When a method is called, the top of the stack contains all of the arguments, and
then just under those is the closure of the called method. That's where slot
zero in the new CallFrame will be. This line of code inserts the receiver into
that slot. For example, given a method call like this:

```lox
scone.topping("berries", "cream");
```

We calculate the slot to store the receiver like so:

<img src="image/methods-and-initializers/closure-slot.png" alt="Skipping over the argument stack slots to find the slot containing the closure." />

The `-argCount` skips past the arguments and the `- 1` adjusts for the fact that
`stackTop` points just *past* the last used stack slot.

### Misusing this

Our VM now supports users *correctly* using `this`, but we also need to make
sure it properly handles users *mis*using `this`. Lox says it is a compile
error for a `this` expression to appear outside of the body of a method. These
two wrong uses should be caught by the compiler:

```lox
print this; // At top level.

fun notMethod() {
  print this; // In a function.
}
```

So how does the compiler know if it's inside a method? The obvious answer is to
look at the FunctionType of the current Compiler. We did just add an enum case
there to treat methods specially. However, that wouldn't correctly handle code
like the earlier example where you are inside a function which is, itself,
nested inside a method.

We could try to resolve "this" and then report an error if it wasn't found in
any of the surrounding lexical scopes. That would work, but would require us to
shuffle around a bunch of code, since right now the code for resolving a
variable implicitly considers it a global access if no declaration is found.

In the next chapter, we will need information about the nearest enclosing class.
If we had that, we could use it here to determine if we are inside a method. So
we may as well make our future selves' lives a little easier and put that
machinery in place now.

^code current-class (1 before, 2 after)

This module variable points to a struct representing the current, innermost
class being compiled. The new type looks like this:

^code class-compiler-struct (1 before, 2 after)

Right now we store only a pointer to the ClassCompiler for the enclosing class,
if any. Nesting a class declaration inside a method in some other class is an
uncommon thing to do, but Lox supports it. Just like the Compiler struct, this
means ClassCompiler forms a linked list from the current innermost class being
compiled out through all of the enclosing classes.

If we aren't inside any class declaration at all, the module variable
`currentClass` is `NULL`. When the compiler begins compiling a class, it pushes
a new ClassCompiler onto that implicit linked stack.

^code create-class-compiler (2 before, 1 after)

The memory for the ClassCompiler struct lives right on the C stack, a handy
capability we get by writing our compiler using recursive descent. At the end of
the class body, we pop that compiler off the stack and restore the enclosing
one.

^code pop-enclosing (1 before, 1 after)

When an outermost class body ends, `enclosing` will be `NULL`, so this resets
`currentClass` to `NULL`. Thus, to see if we are inside a class -- and therefore
inside a method -- we simply check that module variable.

^code this-outside-class (1 before, 1 after)

With that, `this` outside of a class is correctly forbidden. Now our methods
really feel like *methods* in the object-oriented sense. Accessing the receiver
lets them affect the instance you called the method on. We're getting there!

## Instance Initializers

The reason object-oriented languages tie state and behavior together -- one of
the core tenets of the paradigm -- is to ensure that objects are always in a
valid, meaningful state. When the only way to touch an object's state is <span
name="through">through</span> its methods, the methods can make sure nothing
goes awry. But that presumes the object is *already* in a proper state. What
about when it's first created?

<aside name="through">

Of course, Lox does let outside code directly access and modify an instance's
fields without going through its methods. This is unlike Ruby and Smalltalk,
which completely encapsulate state inside objects. Our toy scripting language,
alas, isn't so principled.

</aside>

Object-oriented languages ensure that brand new objects are properly set up
through constructors, which both produce a new instance and initialize its
state. In Lox, the runtime allocates new raw instances, and a class may declare
an initializer to set up any fields. Initializers work mostly like normal
methods, with a few tweaks:

1.  The runtime automatically invokes the initializer method whenever an
    instance of a class is created.

2.  The caller that constructs an instance always gets the instance <span
    name="return">back</span> after the initializer finishes, regardless of what
    the initializer function itself returns. The initializer method doesn't need
    to explicitly return `this`.

3.  In fact, an initializer is *prohibited* from returning any value at all
    since the value would never be seen anyway.

<aside name="return">

It's as if the initializer is implicitly wrapped in a bundle of code like this:

```lox
fun create(klass) {
  var obj = newInstance(klass);
  obj.init();
  return obj;
}
```

Note how the value returned by `init()` is discarded.

</aside>

Now that we support methods, to add initializers, we merely need to implement
those three special rules. We'll go in order.

### Invoking initializers

First, automatically calling `init()` on new instances:

^code call-init (1 before, 1 after)

After the runtime allocates the new instance, we look for an `init()` method on
the class. If we find one, we initiate a call to it. This pushes a new CallFrame
for the initializer's closure. Say we run this program:

```lox
class Brunch {
  init(food, drink) {}
}

Brunch("eggs", "coffee");
```

When the VM executes the call to `Brunch()`, it goes like this:

<img src="image/methods-and-initializers/init-call-frame.png" alt="The aligned stack windows for the Brunch() call and the corresponding init() method it forwards to." />

Any arguments passed to the class when we called it are still sitting on the
stack above the instance. The new CallFrame for the `init()` method shares that
stack window, so those arguments implicitly get forwarded to the initializer.

Lox doesn't require a class to define an initializer. If omitted, the runtime
simply returns the new uninitialized instance. However, if there is no `init()`
method, then it doesn't make any sense to pass arguments to the class when
creating the instance. We make that an error.

^code no-init-arity-error (1 before, 1 after)

When the class *does* provide an initializer, we also need to ensure that the
number of arguments passed matches the initializer's arity. Fortunately, the
`call()` helper does that for us already.

To call the initializer, the runtime looks up the `init()` method by name. We
want that to be fast since it happens every time an instance is constructed.
That means it would be good to take advantage of the string interning we've
already implemented. To do that, the VM creates an ObjString for "init" and
reuses it. The string lives right in the VM struct.

^code vm-init-string (1 before, 1 after)

We create and intern the string when the VM boots up.

^code init-init-string (1 before, 2 after)

We want it to stick around, so the GC considers it a root.

^code mark-init-string (1 before, 1 after)

Look carefully. See any bug waiting to happen? No? It's a subtle one. The
garbage collector now reads `vm.initString`. That field is initialized from the
result of calling `copyString()`. But copying a string allocates memory, which
can trigger a GC. If the collector ran at just the wrong time, it would read
`vm.initString` before it had been initialized. So, first we zero the field out.

^code null-init-string (2 before, 2 after)

We clear the pointer when the VM shuts down since the next line will free it.

^code clear-init-string (1 before, 1 after)

OK, that lets us call initializers.

### Initializer return values

The next step is ensuring that constructing an instance of a class with an
initializer always returns the new instance, and not `nil` or whatever the body
of the initializer returns. Right now, if a class defines an initializer, then
when an instance is constructed, the VM pushes a call to that initializer onto
the CallFrame stack. Then it just keeps on trucking.

The user's invocation on the class to create the instance will complete whenever
that initializer method returns, and will leave on the stack whatever value the
initializer puts there. That means that unless the user takes care to put
`return this;` at the end of the initializer, no instance will come out. Not
very helpful.

To fix this, whenever the front end compiles an initializer method, it will emit
different bytecode at the end of the body to return `this` from the method
instead of the usual implicit `nil` most functions return. In order to do
*that*, the compiler needs to actually know when it is compiling an initializer.
We detect that by checking to see if the name of the method we're compiling is
"init".

^code initializer-name (1 before, 1 after)

We define a new function type to distinguish initializers from other methods.

^code initializer-type-enum (1 before, 1 after)

Whenever the compiler emits the implicit return at the end of a body, we check
the type to decide whether to insert the initializer-specific behavior.

^code return-this (1 before, 1 after)

In an initializer, instead of pushing `nil` onto the stack before returning,
we load slot zero, which contains the instance. This `emitReturn()` function is
also called when compiling a `return` statement without a value, so this also
correctly handles cases where the user does an early return inside the
initializer.

### Incorrect returns in initializers

The last step, the last item in our list of special features of initializers, is
making it an error to try to return anything *else* from an initializer. Now
that the compiler tracks the method type, this is straightforward.

^code return-from-init (3 before, 1 after)

We report an error if a `return` statement in an initializer has a value. We
still go ahead and compile the value afterwards so that the compiler doesn't get
confused by the trailing expression and report a bunch of cascaded errors.

Aside from inheritance, which we'll get to [soon][super], we now have a
fairly full-featured class system working in clox.

```lox
class CoffeeMaker {
  init(coffee) {
    this.coffee = coffee;
  }

  brew() {
    print "Enjoy your cup of " + this.coffee;

    // No reusing the grounds!
    this.coffee = nil;
  }
}

var maker = CoffeeMaker("coffee and chicory");
maker.brew();
```

Pretty fancy for a C program that would fit on an old <span
name="floppy">floppy</span> disk.

<aside name="floppy">

I acknowledge that "floppy disk" may no longer be a useful size reference for
current generations of programmers. Maybe I should have said "a few tweets" or
something.

</aside>

## Optimized Invocations

Our VM correctly implements the language's semantics for method calls and
initializers. We could stop here. But the main reason we are building an entire
second implementation of Lox from scratch is to execute faster than our old Java
interpreter. Right now, method calls even in clox are slow.

Lox's semantics define a method invocation as two operations -- accessing the
method and then calling the result. Our VM must support those as separate
operations because the user *can* separate them. You can access a method without
calling it and then invoke the bound method later. Nothing we've implemented so
far is unnecessary.

But *always* executing those as separate operations has a significant cost.
Every single time a Lox program accesses and invokes a method, the runtime
heap allocates a new ObjBoundMethod, initializes its fields, then pulls them
right back out. Later, the GC has to spend time freeing all of those ephemeral
bound methods.

Most of the time, a Lox program accesses a method and then immediately calls it.
The bound method is created by one bytecode instruction and then consumed by the
very next one. In fact, it's so immediate that the compiler can even textually
*see* that it's happening -- a dotted property access followed by an opening
parenthesis is most likely a method call.

Since we can recognize this pair of operations at compile time, we have the
opportunity to emit a <span name="super">new, special</span> instruction that
performs an optimized method call.

We start in the function that compiles dotted property expressions.

<aside name="super" class="bottom">

If you spend enough time watching your bytecode VM run, you'll notice it often
executes the same series of bytecode instructions one after the other. A classic
optimization technique is to define a new single instruction called a
**superinstruction** that fuses those into a single instruction with the same
behavior as the entire sequence.

One of the largest performance drains in a bytecode interpreter is the overhead
of decoding and dispatching each instruction. Fusing several instructions into
one eliminates some of that.

The challenge is determining *which* instruction sequences are common enough to
benefit from this optimization. Every new superinstruction claims an opcode for
its own use and there are only so many of those to go around. Add too many, and
you'll need a larger encoding for opcodes, which then increases code size and
makes decoding *all* instructions slower.

</aside>

^code parse-call (3 before, 1 after)

After the compiler has parsed the property name, we look for a left parenthesis.
If we match one, we switch to a new code path. There, we compile the argument
list exactly like we do when compiling a call expression. Then we emit a single
new `OP_INVOKE` instruction. It takes two operands:

1.  The index of the property name in the constant table.

2.  The number of arguments passed to the method.

In other words, this single instruction combines the operands of the
`OP_GET_PROPERTY` and `OP_CALL` instructions it replaces, in that order. It
really is a fusion of those two instructions. Let's define it.

^code invoke-op (1 before, 1 after)

And add it to the disassembler:

^code disassemble-invoke (2 before, 1 after)

This is a new, special instruction format, so it needs a little custom
disassembly logic.

^code invoke-instruction

We read the two operands and then print out both the method name and the
argument count. Over in the interpreter's bytecode dispatch loop is where the
real action begins.

^code interpret-invoke (1 before, 1 after)

Most of the work happens in `invoke()`, which we'll get to. Here, we look up the
method name from the first operand and then read the argument count operand.
Then we hand off to `invoke()` to do the heavy lifting. That function returns
`true` if the invocation succeeds. As usual, a `false` return means a runtime
error occurred. We check for that here and abort the interpreter if disaster has
struck.

Finally, assuming the invocation succeeded, then there is a new CallFrame on the
stack, so we refresh our cached copy of the current frame in `frame`.

The interesting work happens here:

^code invoke

First we grab the receiver off the stack. The arguments passed to the method are
above it on the stack, so we peek that many slots down. Then it's a simple
matter to cast the object to an instance and invoke the method on it.

That does assume the object *is* an instance. As with `OP_GET_PROPERTY`
instructions, we also need to handle the case where a user incorrectly tries to
call a method on a value of the wrong type.

^code invoke-check-type (1 before, 1 after)

<span name="helper">That's</span> a runtime error, so we report that and bail
out. Otherwise, we get the instance's class and jump over to this other new
utility function:

<aside name="helper">

As you can guess by now, we split this code into a separate function because
we're going to reuse it later -- in this case for `super` calls.

</aside>

^code invoke-from-class

This function combines the logic of how the VM implements `OP_GET_PROPERTY` and
`OP_CALL` instructions, in that order. First we look up the method by name in
the class's method table. If we don't find one, we report that runtime error and
exit.

Otherwise, we take the method's closure and push a call to it onto the CallFrame
stack. We don't need to heap allocate and initialize an ObjBoundMethod. In fact,
we don't even need to <span name="juggle">juggle</span> anything on the stack.
The receiver and method arguments are already right where they need to be.

<aside name="juggle">

This is a key reason *why* we use stack slot zero to store the receiver -- it's
how the caller already organizes the stack for a method call. An efficient
calling convention is an important part of a bytecode VM's performance story.

</aside>

If you fire up the VM and run a little program that calls methods now, you
should see the exact same behavior as before. But, if we did our job right, the
*performance* should be much improved. I wrote a little microbenchmark that
does a batch of 10,000 method calls. Then it tests how many of these batches it
can execute in 10 seconds. On my computer, without the new `OP_INVOKE`
instruction, it got through 1,089 batches. With this new optimization, it
finished 8,324 batches in the same time. That's *7.6 times faster*, which is a
huge improvement when it comes to programming language optimization.

<span name="pat"></span>

<aside name="pat">

We shouldn't pat ourselves on the back *too* firmly. This performance
improvement is relative to our own unoptimized method call implementation which
was quite slow. Doing a heap allocation for every single method call isn't going
to win any races.

</aside>

<img src="image/methods-and-initializers/benchmark.png" alt="Bar chart comparing the two benchmark results." />

### Invoking fields

The fundamental creed of optimization is: "Thou shalt not break correctness."
<span name="monte">Users</span> like it when a language implementation gives
them an answer faster, but only if it's the *right* answer. Alas, our
implementation of faster method invocations fails to uphold that principle:

```lox
class Oops {
  init() {
    fun f() {
      print "not a method";
    }

    this.field = f;
  }
}

var oops = Oops();
oops.field();
```

The last line looks like a method call. The compiler thinks that it is and
dutifully emits an `OP_INVOKE` instruction for it. However, it's not. What is
actually happening is a *field* access that returns a function which then gets
called. Right now, instead of executing that correctly, our VM reports a runtime
error when it can't find a method named "field".

<aside name="monte">

There are cases where users may be satisfied when a program sometimes returns
the wrong answer in return for running significantly faster or with a better
bound on the performance. These are the field of [**Monte Carlo
algorithms**][monte]. For some use cases, this is a good trade-off.

[monte]: https://en.wikipedia.org/wiki/Monte_Carlo_algorithm

The important part, though, is that the user is *choosing* to apply one of these
algorithms. We language implementers can't unilaterally decide to sacrifice
their program's correctness.

</aside>

Earlier, when we implemented `OP_GET_PROPERTY`, we handled both field and method
accesses. To squash this new bug, we need to do the same thing for `OP_INVOKE`.

^code invoke-field (1 before, 1 after)

Pretty simple fix. Before looking up a method on the instance's class, we look
for a field with the same name. If we find a field, then we store it on the
stack in place of the receiver, *under* the argument list. This is how
`OP_GET_PROPERTY` behaves since the latter instruction executes before a
subsequent parenthesized list of arguments has been evaluated.

Then we try to call that field's value like the callable that it hopefully is.
The `callValue()` helper will check the value's type and call it as appropriate
or report a runtime error if the field's value isn't a callable type like a
closure.

That's all it takes to make our optimization fully safe. We do sacrifice a
little performance, unfortunately. But that's the price you have to pay
sometimes. You occasionally get frustrated by optimizations you *could* do if
only the language wouldn't allow some annoying corner case. But, as language
<span name="designer">implementers</span>, we have to play the game we're given.

<aside name="designer">

As language *designers*, our role is very different. If we do control the
language itself, we may sometimes choose to restrict or change the language in
ways that enable optimizations. Users want expressive languages, but they also
want fast implementations. Sometimes it is good language design to sacrifice a
little power if you can give them perf in return.

</aside>

The code we wrote here follows a typical pattern in optimization:

1.  Recognize a common operation or sequence of operations that is performance
    critical. In this case, it is a method access followed by a call.

2.  Add an optimized implementation of that pattern. That's our `OP_INVOKE`
    instruction.

3.  Guard the optimized code with some conditional logic that validates that the
    pattern actually applies. If it does, stay on the fast path. Otherwise, fall
    back to a slower but more robust unoptimized behavior. Here, that means
    checking that we are actually calling a method and not accessing a field.

As your language work moves from getting the implementation working *at all* to
getting it to work *faster*, you will find yourself spending more and more
time looking for patterns like this and adding guarded optimizations for them.
Full-time VM engineers spend much of their careers in this loop.

But we can stop here for now. With this, clox now supports most of the features
of an object-oriented programming language, and with respectable performance.

<div class="challenges">

## Challenges

1.  The hash table lookup to find a class's `init()` method is constant time,
    but still fairly slow. Implement something faster. Write a benchmark and
    measure the performance difference.

1.  In a dynamically typed language like Lox, a single callsite may invoke a
    variety of methods on a number of classes throughout a program's execution.
    Even so, in practice, most of the time a callsite ends up calling the exact
    same method on the exact same class for the duration of the run. Most calls
    are actually not polymorphic even if the language says they can be.

    How do advanced language implementations optimize based on that observation?

1.  When interpreting an `OP_INVOKE` instruction, the VM has to do two hash
    table lookups. First, it looks for a field that could shadow a method, and
    only if that fails does it look for a method. The former check is rarely
    useful -- most fields do not contain functions. But it is *necessary*
    because the language says fields and methods are accessed using the same
    syntax, and fields shadow methods.

    That is a language *choice* that affects the performance of our
    implementation. Was it the right choice? If Lox were your language, what
    would you do?

</div>

<div class="design-note">

## Design Note: Novelty Budget

I still remember the first time I wrote a tiny BASIC program on a TRS-80 and
made a computer do something it hadn't done before. It felt like a superpower.
The first time I cobbled together just enough of a parser and interpreter to let
me write a tiny program in *my own language* that made a computer do a thing was
like some sort of higher-order meta-superpower. It was and remains a wonderful
feeling.

I realized I could design a language that looked and behaved however I chose. It
was like I'd been going to a private school that required uniforms my whole life
and then one day transferred to a public school where I could wear whatever I
wanted. I don't need to use curly braces for blocks? I can use something other
than an equals sign for assignment? I can do objects without classes? Multiple
inheritance *and* multimethods? A dynamic language that overloads statically, by
arity?

Naturally, I took that freedom and ran with it. I made the weirdest, most
arbitrary language design decisions. Apostrophes for generics. No commas between
arguments. Overload resolution that can fail at runtime. I did things
differently just for difference's sake.

This is a very fun experience that I highly recommend. We need more weird,
avant-garde programming languages. I want to see more art languages. I still
make oddball toy languages for fun sometimes.

*However*, if your goal is success where "success" is defined as a large number
of users, then your priorities must be different. In that case, your primary
goal is to have your language loaded into the brains of as many people as
possible. That's *really hard*. It takes a lot of human effort to move a
language's syntax and semantics from a computer into trillions of neurons.

Programmers are naturally conservative with their time and cautious about what
languages are worth uploading into their wetware. They don't want to waste their
time on a language that ends up not being useful to them. As a language
designer, your goal is thus to give them as much language power as you can with
as little required learning as possible.

One natural approach is *simplicity*. The fewer concepts and features your
language has, the less total volume of stuff there is to learn. This is one of
the reasons minimal <span name="dynamic">scripting</span> languages often find
success even though they aren't as powerful as the big industrial languages --
they are easier to get started with, and once they are in someone's brain, the
user wants to keep using them.

<aside name="dynamic">

In particular, this is a big advantage of dynamically typed languages. A static
language requires you to learn *two* languages -- the runtime semantics and the
static type system -- before you can get to the point where you are making the
computer do stuff. Dynamic languages require you to learn only the former.

Eventually, programs get big enough that the value of static analysis pays for
the effort to learn that second static language, but the value proposition isn't
as obvious at the outset.

</aside>

The problem with simplicity is that simply cutting features often sacrifices
power and expressiveness. There is an art to finding features that punch above
their weight, but often minimal languages simply do less.

There is another path that avoids much of that problem. The trick is to realize
that a user doesn't have to load your entire language into their head, *just the
part they don't already have in there*. As I mentioned in an [earlier design
note][note], learning is about transferring the *delta* between what they
already know and what they need to know.

[note]: parsing-expressions.html#design-note

Many potential users of your language already know some other programming
language. Any features your language shares with that language are essentially
"free" when it comes to learning. It's already in their head, they just have to
recognize that your language does the same thing.

In other words, *familiarity* is another key tool to lower the adoption cost of
your language. Of course, if you fully maximize that attribute, the end result
is a language that is completely identical to some existing one. That's not a
recipe for success, because at that point there's no incentive for users to
switch to your language at all.

So you do need to provide some compelling differences. Some things your language
can do that other languages can't, or at least can't do as well. I believe this
is one of the fundamental balancing acts of language design: similarity to other
languages lowers learning cost, while divergence raises the compelling
advantages.

I think of this balancing act in terms of a <span name="idiosyncracy">**novelty
budget**</span>, or as Steve Klabnik calls it, a "[strangeness budget][]". Users
have a low threshold for the total amount of new stuff they are willing to
accept to learn a new language. Exceed that, and they won't show up.

[strangeness budget]: https://words.steveklabnik.com/the-language-strangeness-budget

<aside name="idiosyncracy">

A related concept in psychology is [**idiosyncrasy credit**][idiosyncracy], the
idea that other people in society grant you a finite amount of deviations from
social norms. You earn credit by fitting in and doing in-group things, which you
can then spend on oddball activities that might otherwise raise eyebrows. In
other words, demonstrating that you are "one of the good ones" gives you license
to raise your freak flag, but only so far.

[idiosyncracy]: https://en.wikipedia.org/wiki/Idiosyncrasy_credit

</aside>

Anytime you add something new to your language that other languages don't have,
or anytime your language does something other languages do in a different way,
you spend some of that budget. That's OK -- you *need* to spend it to make your
language compelling. But your goal is to spend it *wisely*. For each feature or
difference, ask yourself how much compelling power it adds to your language and
then evaluate critically whether it pays its way. Is the change so valuable that
it is worth blowing some of your novelty budget?

In practice, I find this means that you end up being pretty conservative with
syntax and more adventurous with semantics. As fun as it is to put on a new
change of clothes, swapping out curly braces with some other block delimiter is
very unlikely to add much real power to the language, but it does spend some
novelty. It's hard for syntax differences to carry their weight.

On the other hand, new semantics can significantly increase the power of the
language. Multimethods, mixins, traits, reflection, dependent types, runtime
metaprogramming, etc. can radically level up what a user can do with the
language.

Alas, being conservative like this is not as fun as just changing everything.
But it's up to you to decide whether you want to chase mainstream success or not
in the first place. We don't all need to be radio-friendly pop bands. If you
want your language to be like free jazz or drone metal and are happy with the
proportionally smaller (but likely more devoted) audience size, go for it.

</div>


================================================
FILE: book/optimization.md
================================================
> The evening's the best part of the day. You've done your day's work. Now you
> can put your feet up and enjoy it.
>
> <cite>Kazuo Ishiguro, <em>The Remains of the Day</em></cite>

If I still lived in New Orleans, I'd call this chapter a *lagniappe*, a little
something extra given for free to a customer. You've got a whole book and a
complete virtual machine already, but I want you to have some more fun hacking
on clox. This time, we're going for pure performance. We'll apply two very
different optimizations to our virtual machine.  In the process, you'll get a
feel for measuring and improving the performance of a language implementation --
or any program, really.

## Measuring Performance

**Optimization** means taking a working application and improving its
performance. An optimized program does the same thing, it just takes less
resources to do so. The resource we usually think of when optimizing is runtime
speed, but it can also be important to reduce memory usage, startup time,
persistent storage size, or network bandwidth. All physical resources have some
cost -- even if the cost is mostly in wasted human time -- so optimization work
often pays off.

There was a time in the early days of computing that a skilled programmer could
hold the entire hardware architecture and compiler pipeline in their head and
understand a program's performance just by thinking real hard. Those days are
long gone, separated from the present by microcode, cache lines, branch
prediction, deep compiler pipelines, and mammoth instruction sets. We like to
pretend C is a "low-level" language, but the stack of technology between

```c
printf("Hello, world!");
```

and a greeting appearing on screen is now perilously tall.

Optimization today is an empirical science. Our program is a border collie
sprinting through the hardware's obstacle course. If we want her to reach the
end faster, we can't just sit and ruminate on canine physiology until
enlightenment strikes. Instead, we need to *observe* her performance, see where
she stumbles, and then find faster paths for her to take.

Much like agility training is particular to one dog and one obstacle course, we
can't assume that our virtual machine optimizations will make *all* Lox programs
run faster on *all* hardware. Different Lox programs stress different areas of
the VM, and different architectures have their own strengths and weaknesses.

### Benchmarks

When we add new functionality, we validate correctness by writing tests -- Lox
programs that use a feature and validate the VM's behavior. Tests pin down
semantics and ensure we don't break existing features when we add new ones. We
have similar needs when it comes to performance:

1.  How do we validate that an optimization *does* improve performance, and by
    how much?

2.  How do we ensure that other unrelated changes don't *regress* performance?

The Lox programs we write to accomplish those goals are **benchmarks**. These
are carefully crafted programs that stress some part of the language
implementation. They measure not *what* the program does, but how <span
name="much">*long*</span> it takes to do it.

<aside name="much">

Most benchmarks measure running time. But, of course, you'll eventually find
yourself needing to write benchmarks that measure memory allocation, how much
time is spent in the garbage collector, startup time, etc.

</aside>

By measuring the performance of a benchmark before and after a change, you can
see what your change does. When you land an optimization, all of the tests
should behave exactly the same as they did before, but hopefully the benchmarks
run faster.

Once you have an entire <span name="js">*suite*</span> of benchmarks, you can
measure not just *that* an optimization changes performance, but on which
*kinds* of code. Often you'll find that some benchmarks get faster while others
get slower. Then you have to make hard decisions about what kinds of code your
language implementation optimizes for.

The suite of benchmarks you choose to write is a key part of that decision. In
the same way that your tests encode your choices around what correct behavior
looks like, your benchmarks are the embodiment of your priorities when it comes
to performance. They will guide which optimizations you implement, so choose
your benchmarks carefully, and don't forget to periodically reflect on whether
they are helping you reach your larger goals.

<aside name="js">

In the early proliferation of JavaScript VMs, the first widely used benchmark
suite was SunSpider from WebKit. During the browser wars, marketing folks used
SunSpider results to claim their browser was fastest. That highly incentivized
VM hackers to optimize to those benchmarks.

Unfortunately, SunSpider programs often didn't match real-world JavaScript. They
were mostly microbenchmarks -- tiny toy programs that completed quickly. Those
benchmarks penalize complex just-in-time compilers that start off slower but get
*much* faster once the JIT has had enough time to optimize and re-compile hot
code paths. This put VM hackers in the unfortunate position of having to choose
between making the SunSpider numbers get better, or actually optimizing the
kinds of programs real users ran.

Google's V8 team responded by sharing their Octane benchmark suite, which was
closer to real-world code at the time. Years later, as JavaScript use patterns
continued to evolve, even Octane outlived its usefulness. Expect that your
benchmarks will evolve as your language's ecosystem does.

Remember, the ultimate goal is to make *user programs* faster, and benchmarks
are only a proxy for that.

</aside>

Benchmarking is a subtle art. Like tests, you need to balance not overfitting to
your implementation while ensuring that the benchmark does actually tickle the
code paths that you care about. When you measure performance, you need to
compensate for variance caused by CPU throttling, caching, and other weird
hardware and operating system quirks. I won't give you a whole sermon here,
but treat benchmarking as its own skill that improves with practice.

### Profiling

OK, so you've got a few benchmarks now. You want to make them go faster. Now
what? First of all, let's assume you've done all the obvious, easy work. You are
using the right algorithms and data structures -- or, at least, you aren't using
ones that are aggressively wrong. I don't consider using a hash table instead of
a linear search through a huge unsorted array "optimization" so much as "good
software engineering".

Since the hardware is too complex to reason about our program's performance from
first principles, we have to go out into the field. That means *profiling*. A
**profiler**, if you've never used one, is a tool that runs your <span
name="program">program</span> and tracks hardware resource use as the code
executes. Simple ones show you how much time was spent in each function in your
program. Sophisticated ones log data cache misses, instruction cache misses,
branch mispredictions, memory allocations, and all sorts of other metrics.

<aside name="program">

"Your program" here means the Lox VM itself running some *other* Lox program. We
are trying to optimize clox, not the user's Lox script. Of course, the choice of
which Lox program to load into our VM will highly affect which parts of clox get
stressed, which is why benchmarks are so important.

A profiler *won't* show us how much time is spent in each *Lox* function in the
script being run. We'd have to write our own "Lox profiler" to do that, which is
slightly out of scope for this book.

</aside>

There are many profilers out there for various operating systems and languages.
On whatever platform you program, it's worth getting familiar with a decent
profiler. You don't need to be a master. I have learned things within minutes of
throwing a program at a profiler that would have taken me *days* to discover on
my own through trial and error. Profilers are wonderful, magical tools.

## Faster Hash Table Probing

Enough pontificating, let's get some performance charts going up and to the
right. The first optimization we'll do, it turns out, is about the *tiniest*
possible change we could make to our VM.

When I first got the bytecode virtual machine that clox is descended from
working, I did what any self-respecting VM hacker would do. I cobbled together a
couple of benchmarks, fired up a profiler, and ran those scripts through my
interpreter. In a dynamically typed language like Lox, a large fraction of user
code is field accesses and method calls, so one of my benchmarks looked
something like this:

```lox
class Zoo {
  init() {
    this.aardvark = 1;
    this.baboon   = 1;
    this.cat      = 1;
    this.donkey   = 1;
    this.elephant = 1;
    this.fox      = 1;
  }
  ant()    { return this.aardvark; }
  banana() { return this.baboon; }
  tuna()   { return this.cat; }
  hay()    { return this.donkey; }
  grass()  { return this.elephant; }
  mouse()  { return this.fox; }
}

var zoo = Zoo();
var sum = 0;
var start = clock();
while (sum < 100000000) {
  sum = sum + zoo.ant()
            + zoo.banana()
            + zoo.tuna()
            + zoo.hay()
            + zoo.grass()
            + zoo.mouse();
}

print clock() - start;
print sum;
```

<aside name="sum" class="bottom">

Another thing this benchmark is careful to do is *use* the result of the code it
executes. By calculating a rolling sum and printing the result, we ensure the VM
*must* execute all that Lox code. This is an important habit. Unlike our simple
Lox VM, many compilers do aggressive dead code elimination and are smart enough
to discard a computation whose result is never used.

Many a programming language hacker has been impressed by the blazing performance
of a VM on some benchmark, only to realize that it's because the compiler
optimized the entire benchmark program away to nothing.

</aside>

If you've never seen a benchmark before, this might seem ludicrous. *What* is
going on here? The program itself doesn't intend to <span name="sum">do</span>
anything useful. What it does do is call a bunch of methods and access a bunch
of fields since those are the parts of the language we're interested in. Fields
and methods live in hash tables, so it takes care to populate at least a <span
name="more">*few*</span> interesting keys in those tables. That is all wrapped
in a big loop to ensure our profiler has enough execution time to dig in and see
where the cycles are going.

<aside name="more">

If you really want to benchmark hash table performance, you should use many
tables of different sizes. The six keys we add to each table here aren't even
enough to get over our hash table's eight-element minimum threshold. But I
didn't want to throw an enormous benchmark script at you. Feel free to add more
critters and treats if you like.

</aside>

Before I tell you what my profiler showed me, spend a minute taking a few
guesses. Where in clox's codebase do you think the VM spent most of its time? Is
there any code we've written in previous chapters that you suspect is
particularly slow?

Here's what I found: Naturally, the function with the greatest inclusive time is
`run()`. (**Inclusive time** means the total time spent in some function and all
other functions it calls -- the total time between when you enter the function
and when it returns.) Since `run()` is the main bytecode execution loop, it
drives everything.

Inside `run()`, there are small chunks of time sprinkled in various cases in the
bytecode switch for common instructions like `OP_POP`, `OP_RETURN`, and
`OP_ADD`. The big heavy instructions are `OP_GET_GLOBAL` with 17% of the
execution time, `OP_GET_PROPERTY` at 12%, and `OP_INVOKE` which takes a whopping
42% of the total running time.

So we've got three hotspots to optimize? Actually, no. Because it turns out
those three instructions spend almost all of their time inside calls to the same
function: `tableGet()`. That function claims a whole 72% of the execution time
(again, inclusive). Now, in a dynamically typed language, we expect to spend a
fair bit of time looking stuff up in hash tables -- it's sort of the price of
dynamism. But, still, *wow.*

### Slow key wrapping

If you take a look at `tableGet()`, you'll see it's mostly a wrapper around a
call to `findEntry()` where the actual hash table lookup happens. To refresh
your memory, here it is in full:

```c
static Entry* findEntry(Entry* entries, int capacity,
                        ObjString* key) {
  uint32_t index = key->hash % capacity;
  Entry* tombstone = NULL;

  for (;;) {
    Entry* entry = &entries[index];
    if (entry->key == NULL) {
      if (IS_NIL(entry->value)) {
        // Empty entry.
        return tombstone != NULL ? tombstone : entry;
      } else {
        // We found a tombstone.
        if (tombstone == NULL) tombstone = entry;
      }
    } else if (entry->key == key) {
      // We found the key.
      return entry;
    }

    index = (index + 1) % capacity;
  }
}
```

When running that previous benchmark -- on my machine, at least -- the VM spends
70% of the total execution time on *one line* in this function. Any guesses as
to which one? No? It's this:

```c
  uint32_t index = key->hash % capacity;
```

That pointer dereference isn't the problem. It's the little `%`. It turns out
the modulo operator is *really* slow. Much slower than other <span
name="division">arithmetic</span> operators. Can we do something better?

<aside name="division">

Pipelining makes it hard to talk about the performance of an individual CPU
instruction, but to give you a feel for things, division and modulo are about
30-50 *times* slower than addition and subtraction on x86.

</aside>

In the general case, it's really hard to re-implement a fundamental arithmetic
operator in user code in a way that's faster than what the CPU itself can do.
After all, our C code ultimately compiles down to the CPU's own arithmetic
operations. If there were tricks we could use to go faster, the chip would
already be using them.

However, we can take advantage of the fact that we know more about our problem
than the CPU does. We use modulo here to take a key string's hash code and
wrap it to fit within the bounds of the table's entry array. That array starts
out at eight elements and grows by a factor of two each time. We know -- and the
CPU and C compiler do not -- that our table's size is always a power of two.

Because we're clever bit twiddlers, we know a faster way to calculate the
remainder of a number modulo a power of two: **bit masking**. Let's say we want
to calculate 229 modulo 64. The answer is 37, which is not particularly apparent
in decimal, but is clearer when you view those numbers in binary:

<img src="image/optimization/mask.png" alt="The bit patterns resulting from 229 % 64 = 37 and 229 &amp; 63 = 37." />

On the left side of the illustration, notice how the result (37) is simply the
dividend (229) with the highest two bits shaved off? Those two highest bits are
the bits at or to the left of the divisor's single 1 bit.

On the right side, we get the same result by taking 229 and bitwise <span
class="small-caps">AND</span>-ing it with 63, which is one less than our
original power of two divisor. Subtracting one from a power of two gives you a
series of 1 bits. That is exactly the mask we need in order to strip out those
two leftmost bits.

In other words, you can calculate a number modulo any power of two by simply
<span class="small-caps">AND</span>-ing it with that power of two minus one. I'm
not enough of a mathematician to *prove* to you that this works, but if you
think it through, it should make sense. We can replace that slow modulo operator
with a very fast decrement and bitwise <span class="small-caps">AND</span>. We
simply change the offending line of code to this:

^code initial-index (2 before, 1 after)

CPUs love bitwise operators, so it's hard to <span name="sub">improve</span> on that. 

<aside name="sub">

Another potential improvement is to eliminate the decrement by storing the bit
mask directly instead of the capacity. In my tests, that didn't make a
difference. Instruction pipelining makes some operations essentially free if the
CPU is bottlenecked elsewhere.

</aside>

Our linear probing search may need to wrap around the end of the array, so there
is another modulo in `findEntry()` to update.

^code next-index (4 before, 1 after)

This line didn't show up in the profiler since most searches don't wrap.

The `findEntry()` function has a sister function, `tableFindString()` that does
a hash table lookup for interning strings. We may as well apply the same
optimizations there too. This function is called only when interning strings,
which wasn't heavily stressed by our benchmark. But a Lox program that created
lots of strings might noticeably benefit from this change.

^code find-string-index (2 before, 2 after)

And also when the linear probing wraps around.

^code find-string-next (3 before, 1 after)

Let's see if our fixes were worth it. I tweaked that zoological benchmark to
count how many <span name="batch">batches</span> of 10,000 calls it can run in
ten seconds. More batches equals faster performance. On my machine using the
unoptimized code, the benchmark gets through 3,192 batches. After this
optimization, that jumps to 6,249.

<img src="image/optimization/hash-chart.png" alt="Bar chart comparing the performance before and after the optimization." />

That's almost exactly twice as much work in the same amount of time. We made the
VM twice as fast (usual caveat: on this benchmark). That is a massive win when
it comes to optimization. Usually you feel good if you can claw a few percentage
points here or there. Since methods, fields, and global variables are so
prevalent in Lox programs, this tiny optimization improves performance across
the board. Almost every Lox program benefits.

<aside name="batch">

Our original benchmark fixed the amount of *work* and then measured the *time*.
Changing the script to count how many batches of calls it can do in ten seconds
fixes the time and measures the work. For performance comparisons, I like the
latter measure because the reported number represents *speed*. You can directly
compare the numbers before and after an optimization. When measuring execution
time, you have to do a little arithmetic to get to a good relative measure of
performance.

</aside>

Now, the point of this section is *not* that the modulo operator is profoundly
evil and you should stamp it out of every program you ever write. Nor is it that
micro-optimization is a vital engineering skill. It's rare that a performance
problem has such a narrow, effective solution. We got lucky.

The point is that we didn't *know* that the modulo operator was a performance
drain until our profiler told us so. If we had wandered around our VM's codebase
blindly guessing at hotspots, we likely wouldn't have noticed it. What I want
you to take away from this is how important it is to have a profiler in your
toolbox.

To reinforce that point, let's go ahead and run the original benchmark in our
now-optimized VM and see what the profiler shows us. On my machine, `tableGet()`
is still a fairly large chunk of execution time. That's to be expected for a
dynamically typed language. But it has dropped from 72% of the total execution
time down to 35%. That's much more in line with what we'd like to see and shows
that our optimization didn't just make the program faster, but made it faster
*in the way we expected*. Profilers are as useful for verifying solutions as
they are for discovering problems.

## NaN Boxing

This next optimization has a very different feel. Thankfully, despite the odd
name, it does not involve punching your grandmother. It's different, but not,
like, *that* different. With our previous optimization, the profiler told us
where the problem was, and we merely had to use some ingenuity to come up with a
solution.

This optimization is more subtle, and its performance effects more scattered
across the virtual machine. The profiler won't help us come up with this.
Instead, it was invented by <span name="someone">someone</span> thinking deeply
about the lowest levels of machine architecture.

<aside name="someone">

I'm not sure who first came up with this trick. The earliest source I can find
is David Gudeman's 1993 paper "Representing Type Information in Dynamically
Typed Languages". Everyone else cites that. But Gudeman himself says the paper
isn't novel work, but instead "gathers together a body of folklore".

Maybe the inventor has been lost to the mists of time, or maybe it's been
reinvented a number of times. Anyone who ruminates on IEEE 754 long enough
probably starts thinking about trying to stuff something useful into all those
unused NaN bits.

</aside>

Like the heading says, this optimization is called **NaN boxing** or sometimes
**NaN tagging**. Personally I like the latter name because "boxing" tends to imply
some kind of heap-allocated representation, but the former seems to be the more
widely used term. This technique changes how we represent values in the VM.

On a 64-bit machine, our Value type takes up 16 bytes. The struct has two
fields, a type tag and a union for the payload. The largest fields in the union
are an Obj pointer and a double, which are both 8 bytes. To keep the union field
aligned to an 8-byte boundary, the compiler adds padding after the tag too:

<img src="image/optimization/union.png" alt="Byte layout of the 16-byte tagged union Value." />

That's pretty big. If we could cut that down, then the VM could pack more values
into the same amount of memory. Most computers have plenty of RAM these days, so
the direct memory savings aren't a huge deal. But a smaller representation means
more Values fit in a cache line. That means fewer cache misses, which affects
*speed*.

If Values need to be aligned to their largest payload size, and a Lox number or
Obj pointer needs a full 8 bytes, how can we get any smaller? In a dynamically
typed language like Lox, each value needs to carry not just its payload, but
enough additional information to determine the value's type at runtime. If a Lox
number is already using the full 8 bytes, where could we squirrel away a couple
of extra bits to tell the runtime "this is a number"?

This is one of the perennial problems for dynamic language hackers. It
particularly bugs them because statically typed languages don't generally have
this problem. The type of each value is known at compile time, so no extra
memory is needed at runtime to track it. When your C compiler compiles a 32-bit
int, the resulting variable gets *exactly* 32 bits of storage.

Dynamic language folks hate losing ground to the static camp, so they've come up
with a number of very clever ways to pack type information and a payload into a
small number of bits. NaN boxing is one of those. It's a particularly good fit
for languages like JavaScript and Lua, where all numbers are double-precision
floating point. Lox is in that same boat.

### What is (and is not) a number?

Before we start optimizing, we need to really understand how our friend the CPU
represents floating-point numbers. Almost all machines today use the same
scheme, encoded in the venerable scroll [IEEE 754][754], known to mortals as the
"IEEE Standard for Floating-Point Arithmetic".

[754]: https://en.wikipedia.org/wiki/IEEE_754

In the eyes of your computer, a <span name="hyphen">64-bit</span>,
double-precision, IEEE floating-point number looks like this:

<aside name="hyphen">

That's a lot of hyphens for one sentence.

</aside>

<img src="image/optimization/double.png" alt="Bit representation of an IEEE 754 double." />

*   Starting from the right, the first 52 bits are the **fraction**,
    **mantissa**, or **significand** bits. They represent the significant digits
    of the number, as a binary integer.

*   Next to that are 11 **exponent** bits. These tell you how far the mantissa
    is shifted away from the decimal (well, binary) point.

*   The highest bit is the <span name="sign">**sign bit**</span>, which
    indicates whether the number is positive or negative.

I know that's a little vague, but this chapter isn't a deep dive on
floating point representation. If you want to know how the exponent and mantissa
play together, there are already better explanations out there than I could
write.

<aside name="sign">

Since the sign bit is always present, even if the number is zero, that implies
that "positive zero" and "negative zero" have different bit representations, and
indeed, IEEE 754 does distinguish those.

</aside>

The important part for our purposes is that the spec carves out a special case
exponent. When all of the exponent bits are set, then instead of just
representing a really big number, the value has a different meaning. These
values are "Not a Number" (hence, **NaN**) values. They represent concepts like
infinity or the result of division by zero.

*Any* double whose exponent bits are all set is a NaN, regardless of the
mantissa bits. That means there's lots and lots of *different* NaN bit patterns.
IEEE 754 divides those into two categories. Values where the highest mantissa
bit is 0 are called **signalling NaNs**, and the others are **quiet NaNs**.
Signalling NaNs are intended to be the result of erroneous computations, like
division by zero. A chip <span name="abort">may</span> detect when one of these
values is produced and abort a program completely. They may self-destruct if you
try to read one.

<aside name="abort">

I don't know if any CPUs actually *do* trap signalling NaNs and abort. The spec
just says they *could*.

</aside>

Quiet NaNs are supposed to be safer to use. They don't represent useful numeric
values, but they should at least not set your hand on fire if you touch them.

Every double with all of its exponent bits set and its highest mantissa bit set
is a quiet NaN. That leaves 52 bits unaccounted for. We'll avoid one of those so
that we don't step on Intel's "QNaN Floating-Point Indefinite" value, leaving us
51 bits. Those remaining bits can be anything. We're talking
2,251,799,813,685,248 unique quiet NaN bit patterns.

<img src="image/optimization/nan.png" alt="The bits in a double that make it a quiet NaN." />

This means a 64-bit double has enough room to store all of the various different
numeric floating-point values and *also* has room for another 51 bits of data
that we can use however we want. That's plenty of room to set aside a couple of
bit patterns to represent Lox's `nil`, `true`, and `false` values. But what
about Obj pointers? Don't pointers need a full 64 bits too?

Fortunately, we have another trick up our other sleeve. Yes, technically
pointers on a 64-bit architecture are 64 bits. But, no architecture I know of
actually uses that entire address space. Instead, most widely used chips today
only ever use the low <span name="48">48</span> bits. The remaining 16 bits are
either unspecified or always zero.

<aside name="48">

48 bits is enough to address 262,144 gigabytes of memory. Modern operating
systems also give each process its own address space, so that should be plenty.

</aside>

If we've got 51 bits, we can stuff a 48-bit pointer in there with three bits to
spare. Those three bits are just enough to store tiny type tags to distinguish
between `nil`, Booleans, and Obj pointers.

That's NaN boxing. Within a single 64-bit double, you can store all of the
different floating-point numeric values, a pointer, or any of a couple of other
special sentinel values. Half the memory usage of our current Value struct,
while retaining all of the fidelity.

What's particularly nice about this representation is that there is no need to
*convert* a numeric double value into a "boxed" form. Lox numbers *are* just
normal, 64-bit doubles. We still need to *check* their type before we use them,
since Lox is dynamically typed, but we don't need to do any bit shifting or
pointer indirection to go from "value" to "number".

For the other value types, there is a conversion step, of course. But,
fortunately, our VM hides all of the mechanism to go from values to raw types
behind a handful of macros. Rewrite those to implement NaN boxing, and the rest
of the VM should just work.

### Conditional support

I know the details of this new representation aren't clear in your head yet.
Don't worry, they will crystallize as we work through the implementation. Before
we get to that, we're going to put some compile-time scaffolding in place.

For our previous optimization, we rewrote the previous slow code and called it
done. This one is a little different. NaN boxing relies on some very low-level
details of how a chip represents floating-point numbers and pointers. It
*probably* works on most CPUs you're likely to encounter, but you can never be
totally sure.

It would suck if our VM completely lost support for an architecture just because
of its value representation. To avoid that, we'll maintain support for *both*
the old tagged union implementation of Value and the new NaN-boxed form. We
select which representation we want at compile time using this flag:

^code define-nan-boxing (2 before, 1 after)

If that's defined, the VM uses the new form. Otherwise, it reverts to the old
style. The few pieces of code that care about the details of the value
representation -- mainly the handful of macros for wrapping and unwrapping
Values -- vary based on whether this flag is set. The rest of the VM can
continue along its merry way.

Most of the work happens in the "value" module where we add a section for the
new type.

^code nan-boxing (2 before, 1 after)

When NaN boxing is enabled, the actual type of a Value is a flat, unsigned
64-bit integer. We could use double instead, which would make the macros for
dealing with Lox numbers a little simpler. But all of the other macros need to
do bitwise operations and uint64_t is a much friendlier type for that. Outside
of this module, the rest of the VM doesn't really care one way or the other.

Before we start re-implementing those macros, we close the `#else` branch of the
`#ifdef` at the end of the definitions for the old representation.

^code end-if-nan-boxing (1 before, 2 after)

Our remaining task is simply to fill in that first `#ifdef` section with new
implementations of all the stuff already in the `#else` side. We'll work through
it one value type at a time, from easiest to hardest.

### Numbers

We'll start with numbers since they have the most direct representation under
NaN boxing. To "convert" a C double to a NaN-boxed clox Value, we don't need to
touch a single bit -- the representation is exactly the same. But we do need to
convince our C compiler of that fact, which we made harder by defining Value to
be uint64_t.

We need to get the compiler to take a set of bits that it thinks are a double
and use those same bits as a uint64_t, or vice versa. This is called **type
punning**. C and C++ programmers have been doing this since the days of bell
bottoms and 8-tracks, but the language specifications have <span
name="hesitate">hesitated</span> to say which of the many ways to do this is
officially sanctioned.

<aside name="hesitate" class="bottom">

Spec authors don't like type punning because it makes optimization harder. A key
optimization technique is reordering instructions to fill the CPU's execution
pipelines. A compiler can reorder code only when doing so doesn't have a
user-visible effect, obviously.

Pointers make that harder. If two pointers point to the same value, then a write
through one and a read through the other cannot be reordered. But what about two
pointers of *different* types? If those could point to the same object, then
basically *any* two pointers could be aliases to the same value. That
drastically limits the amount of code the compiler is free to rearrange.

To avoid that, compilers want to assume **strict aliasing** -- pointers of
incompatible types cannot point to the same value. Type punning, by nature,
breaks that assumption.

</aside>

I know one way to convert a `double` to `Value` and back that I believe is
supported by both the C and C++ specs. Unfortunately, it doesn't fit in a single
expression, so the conversion macros have to call out to helper functions.
Here's the first macro:

^code number-val (1 before, 2 after)

That macro passes the double here:

^code num-to-value (1 before, 2 after)

I know, weird, right? The way to treat a series of bytes as having a different
type without changing their value at all is `memcpy()`? This looks horrendously
slow: Create a local variable. Pass its address to the operating system through
a syscall to copy a few bytes. Then return the result, which is the exact same
bytes as the input. Thankfully, because this *is* the supported idiom for type
punning, most compilers recognize the pattern and optimize away the `memcpy()`
entirely.

"Unwrapping" a Lox number is the mirror image.

^code as-number (1 before, 2 after)

That macro calls this function:

^code value-to-num (1 before, 2 after)

It works exactly the same except we swap the types. Again, the compiler will
eliminate all of it. Even though those calls to
`memcpy()` will disappear, we still need to show the compiler *which* `memcpy()`
we're calling so we also need an <span name="union">include</span>.

<aside name="union" class="bottom">

If you find yourself with a compiler that does not optimize the `memcpy()` away,
try this instead:

```c
double valueToNum(Value value) {
  union {
    uint64_t bits;
    double num;
  } data;
  data.bits = value;
  return data.num;
}
```

</aside>

^code include-string (1 before, 2 after)

That was a lot of code to ultimately do nothing but silence the C type checker.
Doing a runtime type *test* on a Lox number is a little more interesting. If all
we have are exactly the bits for a double, how do we tell that it *is* a double?
It's time to get bit twiddling.

^code is-number (1 before, 2 after)

We know that every Value that is *not* a number will use a special quiet NaN
representation. And we presume we have correctly avoided any of the meaningful
NaN representations that may actually be produced by doing arithmetic on
numbers.

If the double has all of its NaN bits set, and the quiet NaN bit set, and one
more for good measure, we can be <span name="certain">pretty certain</span> it
is one of the bit patterns we ourselves have set aside for other types. To check
that, we mask out all of the bits except for our set of quiet NaN bits. If *all*
of those bits are set, it must be a NaN-boxed value of some other Lox type.
Otherwise, it is actually a number.

<aside name="certain">

Pretty certain, but not strictly guaranteed. As far as I know, there is nothing
preventing a CPU from producing a NaN value as the result of some operation
whose bit representation collides with ones we have claimed. But in my tests
across a number of architectures, I haven't seen it happen.

</aside>

The set of quiet NaN bits are declared like this:

^code qnan (1 before, 2 after)

It would be nice if C supported binary literals. But if you do the conversion,
you'll see that value is the same as this:

<img src="image/optimization/qnan.png" alt="The quiet NaN bits." />

This is exactly all of the exponent bits, plus the quiet NaN bit, plus one extra
to dodge that Intel value.

### Nil, true, and false

The next type to handle is `nil`. That's pretty simple since there's only one
`nil` value and thus we need only a single bit pattern to represent it. There
are two other singleton values, the two Booleans, `true` and `false`. This calls
for three total unique bit patterns.

Two bits give us four different combinations, which is plenty. We claim the two
lowest bits of our unused mantissa space as a "type tag" to determine which of
these three singleton values we're looking at. The three type tags are defined
like so:

^code tags (1 before, 2 after)

Our representation of `nil` is thus all of the bits required to define our
quiet NaN representation along with the `nil` type tag bits:

<img src="image/optimization/nil.png" alt="The bit representation of the nil value." />

In code, we check the bits like so:

^code nil-val (2 before, 1 after)

We simply bitwise <span class="small-caps">OR</span> the quiet NaN bits and the
type tag, and then do a little cast dance to teach the C compiler what we want
those bits to mean.

Since `nil` has only a single bit representation, we can use equality on
uint64_t to see if a Value is `nil`.

<span name="equal"></span>

^code is-nil (2 before, 1 after)

You can guess how we define the `true` and `false` values.

^code false-true-vals (2 before, 1 after)

The bits look like this:

<img src="image/optimization/bools.png" alt="The bit representation of the true and false values." />

To convert a C bool into a Lox Boolean, we rely on these two singleton values
and the good old conditional operator.

^code bool-val (2 before, 1 after)

There's probably a cleverer bitwise way to do this, but my hunch is that the
compiler can figure one out faster than I can. Going the other direction is
simpler.

^code as-bool (2 before, 1 after)

Since we know there are exactly two Boolean bit representations in Lox -- unlike
in C where any non-zero value can be considered "true" -- if it ain't `true`, it
must be `false`. This macro does assume you call it only on a Value that you
know *is* a Lox Boolean. To check that, there's one more macro.

^code is-bool (2 before, 1 after)

That looks a little strange. A more obvious macro would look like this:

```c
#define IS_BOOL(v) ((v) == TRUE_VAL || (v) == FALSE_VAL)
```

Unfortunately, that's not safe. The expansion mentions `v` twice, which means if
that expression has any side effects, they will be executed twice. We could have
the macro call out to a separate function, but, ugh, what a chore.

Instead, we bitwise <span class="small-caps">OR</span> a 1 onto the value to
merge the only two valid Boolean bit patterns. That leaves three potential
states the value can be in:

1. It was `FALSE_VAL` and has now been converted to `TRUE_VAL`.

2. It was `TRUE_VAL` and the `| 1` did nothing and it's still `TRUE_VAL`.

3. It's some other, non-Boolean value.

At that point, we can simply compare the result to `TRUE_VAL` to see if we're
in the first two states or the third.

### Objects

The last value type is the hardest. Unlike the singleton values, there are
billions of different pointer values we need to box inside a NaN. This means we
need both some kind of tag to indicate that these particular NaNs *are* Obj
pointers, and room for the addresses themselves.

The tag bits we used for the singleton values are in the region where I decided
to store the pointer itself, so we can't easily use a different <span
name="ptr">bit</span> there to indicate that the value is an object reference.
However, there is another bit we aren't using. Since all our NaN values are not
numbers -- it's right there in the name -- the sign bit isn't used for anything.
We'll go ahead and use that as the type tag for objects. If one of our quiet
NaNs has its sign bit set, then it's an Obj pointer. Otherwise, it must be one
of the previous singleton values.

<aside name="ptr">

We actually *could* use the lowest bits to store the type tag even when the
value is an Obj pointer. That's because Obj pointers are always aligned to an
8-byte boundary since Obj contains a 64-bit field. That, in turn, implies that
the three lowest bits of an Obj pointer will always be zero. We could store
whatever we wanted in there and just mask it off before dereferencing the
pointer.

This is another value representation optimization called **pointer tagging**.

</aside>

If the sign bit is set, then the remaining low bits store the pointer to the
Obj:

<img src="image/optimization/obj.png" alt="Bit representation of an Obj* stored in a Value." />

To convert a raw Obj pointer to a Value, we take the pointer and set all of the
quiet NaN bits and the sign bit.

^code obj-val (1 before, 2 after)

The pointer itself is a full 64 bits, and in <span name="safe">principle</span>,
it could thus overlap with some of those quiet NaN and sign bits. But in
practice, at least on the architectures I've tested, everything above the 48th
bit in a pointer is always zero. There's a lot of casting going on here, which
I've found is necessary to satisfy some of the pickiest C compilers, but the
end result is just jamming some bits together.

<aside name="safe">

I try to follow the letter of the law when it comes to the code in this book, so
this paragraph is dubious. There comes a point when optimizing where you push
the boundary of not just what the *spec says* you can do, but what a real
compiler and chip let you get away with.

There are risks when stepping outside of the spec, but there are rewards in that
lawless territory too. It's up to you to decide if the gains are worth it.

</aside>

We define the sign bit like so:

^code sign-bit (2 before, 2 after)

To get the Obj pointer back out, we simply mask off all of those extra bits.

^code as-obj (1 before, 2 after)

The tilde (`~`), if you haven't done enough bit manipulation to encounter it
before, is bitwise <span class="small-caps">NOT</span>. It toggles all ones and
zeroes in its operand. By masking the value with the bitwise negation of the
quiet NaN and sign bits, we *clear* those bits and let the pointer bits remain.

One last macro:

^code is-obj (1 before, 2 after)

A Value storing an Obj pointer has its sign bit set, but so does any negative
number. To tell if a Value is an Obj pointer, we need to check that both the
sign bit and all of the quiet NaN bits are set. This is similar to how we detect
the type of the singleton values, except this time we use the sign bit as the
tag.

### Value functions

The rest of the VM usually goes through the macros when working with Values, so
we are almost done. However, there are a couple of functions in the "value"
module that peek inside the otherwise black box of Value and work with its
encoding directly. We need to fix those too.

The first is `printValue()`. It has separate code for each value type. We no
longer have an explicit type enum we can switch on, so instead we use a series
of type tests to handle each kind of value.

^code print-value (1 before, 1 after)

This is technically a tiny bit slower than a switch, but compared to the
overhead of actually writing to a stream, it's negligible.

We still support the original tagged union representation, so we keep the old
code and enclose it in the `#else` conditional section.

^code end-print-value (1 before, 1 after)

The other operation is testing two values for equality.

^code values-equal (1 before, 1 after)

It doesn't get much simpler than that! If the two bit representations are
identical, the values are equal. That does the right thing for the singleton
values since each has a unique bit representation and they are only equal to
themselves. It also does the right thing for Obj pointers, since objects use
identity for equality -- two Obj references are equal only if they point to the
exact same object.

It's *mostly* correct for numbers too. Most floating-point numbers with
different bit representations are distinct numeric values. Alas, IEEE 754
contains a pothole to trip us up. For reasons that aren't entirely clear to me,
the spec mandates that NaN values are *not* equal to *themselves*. This isn't a
problem for the special quiet NaNs that we are using for our own purposes. But
it's possible to produce a "real" arithmetic NaN in Lox, and if we want to
correctly implement IEEE 754 numbers, then the resulting value is not supposed
to be equal to itself. More concretely:

```lox
var nan = 0/0;
print nan == nan;
```

IEEE 754 says this program is supposed to print "false". It does the right thing
with our old tagged union representation because the `VAL_NUMBER` case applies
`==` to two values that the C compiler knows are doubles. Thus the compiler
generates the right CPU instruction to perform an IEEE floating-point equality.

Our new representation breaks that by defining Value to be a uint64_t. If we
want to be *fully* compliant with IEEE 754, we need to handle this case.

^code nan-equality (1 before, 1 after)

I know, it's weird. And there is a performance cost to doing this type test
every time we check two Lox values for equality. If we are willing to sacrifice
a little <span name="java">compatibility</span> -- who *really* cares if NaN is
not equal to itself? -- we could leave this off. I'll leave it up to you to
decide how pedantic you want to be.

<aside name="java">

In fact, jlox gets NaN equality wrong. Java does the right thing when you
compare primitive doubles using `==`, but not if you box those to Double or
Object and compare them using `equals()`, which is how jlox implements equality.

</aside>

Finally, we close the conditional compilation section around the old
implementation.

^code end-values-equal (1 before, 1 after)

And that's it. This optimization is complete, as is our clox virtual machine.
That was the last line of new code in the book.

### Evaluating performance

The code is done, but we still need to figure out if we actually made anything
better with these changes. Evaluating an optimization like this is very
different from the previous one. There, we had a clear hotspot visible in the
profiler. We fixed that part of the code and could instantly see the hotspot
get faster.

The effects of changing the value representation are more diffused. The macros
are expanded in place wherever they are used, so the performance changes are
spread across the codebase in a way that's hard for many profilers to track
well, especially in an <span name="opt">optimized</span> build.

<aside name="opt">

When doing profiling work, you almost always want to profile an optimized
"release" build of your program since that reflects the performance story your
end users experience. Compiler optimizations, like inlining, can dramatically
affect which parts of the code are performance hotspots. Hand-optimizing a debug
build risks sending you off "fixing" problems that the optimizing compiler will
already solve for you.

Make sure you don't accidentally benchmark and optimize your debug build. I seem
to make that mistake at least once a year.

</aside>

We also can't easily *reason* about the effects of our change. We've made values
smaller, which reduces cache misses all across the VM. But the actual real-world
performance effect of that change is highly dependent on the memory use of the
Lox program being run. A tiny Lox microbenchmark may not have enough values
scattered around in memory for the effect to be noticeable, and even things like
the addresses handed out to us by the C memory allocator can impact the results.

If we did our job right, basically everything gets a little faster, especially
on larger, more complex Lox programs. But it is possible that the extra bitwise
operations we do when NaN-boxing values nullify the gains from the better
memory use. Doing performance work like this is unnerving because you can't
easily *prove* that you've made the VM better. You can't point to a single
surgically targeted microbenchmark and say, "There, see?"

Instead, what we really need is a *suite* of larger benchmarks. Ideally, they
would be distilled from real-world applications -- not that such a thing exists
for a toy language like Lox. Then we can measure the aggregate performance
changes across all of those. I did my best to cobble together a handful of
larger Lox programs. On my machine, the new value representation seems to make
everything roughly 10% faster across the board.

That's not a huge improvement, especially compared to the profound effect of
making hash table lookups faster. I added this optimization in large part
because it's a good example of a certain *kind* of performance work you may
experience, and honestly, because I think it's technically really cool. It might
not be the first thing I would reach for if I were seriously trying to make clox
faster. There is probably other, lower-hanging fruit.

But, if you find yourself working on a program where all of the easy wins have
been taken, then at some point you may want to think about tuning your value
representation. I hope this chapter has shined a light on some of the options
you have in that area.

## Where to Next

We'll stop here with the Lox language and our two interpreters. We could tinker
on it forever, adding new language features and clever speed improvements. But,
for this book, I think we've reached a natural place to call our work complete.
I won't rehash everything we've learned in the past many pages. You were there
with me and you remember. Instead, I'd like to take a minute to talk about where
you might go from here. What is the next step in your programming language
journey?

Most of you probably won't spend a significant part of your career working in
compilers or interpreters. It's a pretty small slice of the computer science
academia pie, and an even smaller segment of software engineering in industry.
That's OK. Even if you never work on a compiler again in your life, you will
certainly *use* one, and I hope this book has equipped you with a better
understanding of how the programming languages you use are designed and
implemented.

You have also learned a handful of important, fundamental data structures and
gotten some practice doing low-level profiling and optimization work. That kind
of expertise is helpful no matter what domain you program in.

I also hope I gave you a new way of <span name="domain">looking</span> at and
solving problems. Even if you never work on a language again, you may be
surprised to discover how many programming problems can be seen as
language-*like*. Maybe that report generator you need to write can be modeled as
a series of stack-based "instructions" that the generator "executes". That user
interface you need to render looks an awful lot like traversing an AST.

<aside name="domain">

This goes for other domains too. I don't think there's a single topic I've
learned in programming -- or even outside of programming -- that I haven't ended
up finding useful in other areas. One of my favorite aspects of software
engineering is how much it rewards those with eclectic interests.

</aside>

If you do want to go further down the programming language rabbit hole, here
are some suggestions for which branches in the tunnel to explore:

*   Our simple, single-pass bytecode compiler pushed us towards mostly runtime
    optimization. In a mature language implementation, compile-time optimization
    is generally more important, and the field of compiler optimizations is
    incredibly rich. Grab a classic <span name="cooper">compilers</span> book,
    and rebuild the front end of clox or jlox to be a sophisticated compilation
    pipeline with some interesting intermediate representations and optimization
    passes.

    Dynamic typing will place some restrictions on how far you can go, but there
    is still a lot you can do. Or maybe you want to take a big leap and add
    static types and a type checker to Lox. That will certainly give your front
    end a lot more to chew on.

    <aside name="cooper">

    I like Cooper and Torczon's *Engineering a Compiler* for this. Appel's
    *Modern Compiler Implementation* books are also well regarded.

    </aside>

*   In this book, I aim to be correct, but not particularly rigorous. My goal is
    mostly to give you an *intuition* and a feel for doing language work. If you
    like more precision, then the whole world of programming language academia
    is waiting for you. Languages and compilers have been studied formally since
    before we even had computers, so there is no shortage of books and papers on
    parser theory, type systems, semantics, and formal logic. Going down this
    path will also teach you how to read CS papers, which is a valuable skill in
    its own right.

*   Or, if you just really enjoy hacking on and making languages, you can take
    Lox and turn it into your own <span name="license">plaything</span>. Change
    the syntax to something that delights your eye. Add missing features or
    remove ones you don't like. Jam new optimizations in there.

    <aside name="license">

    The *text* of this book is copyrighted to me, but the *code* and the
    implementations of jlox and clox use the very permissive [MIT license][].
    You are more than welcome to [take either of those interpreters][source] and
    do whatever you want with them. Go to town.

    If you make significant changes to the language, it would be good to also
    change the name, mostly to avoid confusing people about what the name "Lox"
    represents.

    </aside>

    Eventually you may get to a point where you have something you think others
    could use as well. That gets you into the very distinct world of programming
    language *popularity*. Expect to spend a ton of time writing documentation,
    example programs, tools, and useful libraries. The field is crowded with
    languages vying for users. To thrive in that space you'll have to put on
    your marketing hat and *sell*. Not everyone enjoys that kind of
    public-facing work, but if you do, it can be incredibly gratifying to see
    people use your language to express themselves.

Or maybe this book has satisfied your craving and you'll stop here. Whichever
way you go, or don't go, there is one lesson I hope to lodge in your heart. Like
I was, you may have initially been intimidated by programming languages. But in
these chapters, you've seen that even really challenging material can be tackled
by us mortals if we get our hands dirty and take it a step at a time. If you can
handle compilers and interpreters, you can do anything you put your mind to.

[mit license]: https://en.wikipedia.org/wiki/MIT_License
[source]: https://github.com/munificent/craftinginterpreters

<div class="challenges">

## Challenges

Assigning homework on the last day of school seems cruel but if you really want
something to do during your summer vacation:

1.  Fire up your profiler, run a couple of benchmarks, and look for other
    hotspots in the VM. Do you see anything in the runtime that you can improve?

2.  Many strings in real-world user programs are small, often only a character
    or two. This is less of a concern in clox because we intern strings, but
    most VMs don't. For those that don't, heap allocating a tiny character array
    for each of those little strings and then representing the value as a
    pointer to that array is wasteful. Often, the pointer is larger than the
    string's characters. A classic trick is to have a separate value
    representation for small strings that stores the characters inline in the
    value.

    Starting from clox's original tagged union representation, implement that
    optimization. Write a couple of relevant benchmarks and see if it helps.

3.  Reflect back on your experience with this book. What parts of it worked well
    for you? What didn't? Was it easier for you to learn bottom-up or top-down?
    Did the illustrations help or distract? Did the analogies clarify or
    confuse?

    The more you understand your personal learning style, the more effectively
    you can upload knowledge into your head. You can specifically target
    material that teaches you the way you learn best.

</div>


================================================
FILE: book/parsing-expressions.md
================================================
> Grammar, which knows how to control even kings.
> <cite>Molière</cite>

<span name="parse">This</span> chapter marks the first major milestone of the
book. Many of us have cobbled together a mishmash of regular expressions and
substring operations to extract some sense out of a pile of text. The code was
probably riddled with bugs and a beast to maintain. Writing a *real* parser --
one with decent error handling, a coherent internal structure, and the ability
to robustly chew through a sophisticated syntax -- is considered a rare,
impressive skill. In this chapter, you will <span name="attain">attain</span>
it.

<aside name="parse">

"Parse" comes to English from the Old French "pars" for "part of speech". It
means to take a text and map each word to the grammar of the language. We use it
here in the same sense, except that our language is a little more modern than
Old French.

</aside>

<aside name="attain">

Like many rites of passage, you'll probably find it looks a little smaller, a
little less daunting when it's behind you than when it loomed ahead.

</aside>

It's easier than you think, partially because we front-loaded a lot of the hard
work in the [last chapter][]. You already know your way around a formal grammar.
You're familiar with syntax trees, and we have some Java classes to represent
them. The only remaining piece is parsing -- transmogrifying a sequence of
tokens into one of those syntax trees.

[last chapter]: representing-code.html

Some CS textbooks make a big deal out of parsers. In the '60s, computer
scientists -- understandably tired of programming in assembly language --
started designing more sophisticated, <span name="human">human</span>-friendly
languages like Fortran and ALGOL. Alas, they weren't very *machine*-friendly
for the primitive computers of the time.

<aside name="human">

Imagine how harrowing assembly programming on those old machines must have been
that they considered *Fortran* to be an improvement.

</aside>

These pioneers designed languages that they honestly weren't even sure how to
write compilers for, and then did groundbreaking work inventing parsing and
compiling techniques that could handle these new, big languages on those old, tiny
machines.

Classic compiler books read like fawning hagiographies of these heroes and their
tools. The cover of *Compilers: Principles, Techniques, and Tools* literally has
a dragon labeled "complexity of compiler design" being slain by a knight bearing
a sword and shield branded "LALR parser generator" and "syntax directed
translation". They laid it on thick.

A little self-congratulation is well-deserved, but the truth is you don't need
to know most of that stuff to bang out a high quality parser for a modern
machine. As always, I encourage you to broaden your education and take it in
later, but this book omits the trophy case.

## Ambiguity and the Parsing Game

In the last chapter, I said you can "play" a context-free grammar like a game in
order to *generate* strings. Parsers play that game in reverse. Given a string
-- a series of tokens -- we map those tokens to terminals in the grammar to
figure out which rules could have generated that string.

The "could have" part is interesting. It's entirely possible to create a grammar
that is *ambiguous*, where different choices of productions can lead to the same
string. When you're using the grammar to *generate* strings, that doesn't matter
much. Once you have the string, who cares how you got to it?

When parsing, ambiguity means the parser may misunderstand the user's code. As
we parse, we aren't just determining if the string is valid Lox code, we're
also tracking which rules match which parts of it so that we know what part of
the language each token belongs to. Here's the Lox expression grammar we put
together in the last chapter:

```ebnf
expression     → literal
               | unary
               | binary
               | grouping ;

literal        → NUMBER | STRING | "true" | "false" | "nil" ;
grouping       → "(" expression ")" ;
unary          → ( "-" | "!" ) expression ;
binary         → expression operator expression ;
operator       → "==" | "!=" | "<" | "<=" | ">" | ">="
               | "+"  | "-"  | "*" | "/" ;
```

This is a valid string in that grammar:

<img src="image/parsing-expressions/tokens.png" alt="6 / 3 - 1" />

But there are two ways we could have generated it. One way is:

1. Starting at `expression`, pick `binary`.
2. For the left-hand `expression`, pick `NUMBER`, and use `6`.
3. For the operator, pick `"/"`.
4. For the right-hand `expression`, pick `binary` again.
5. In that nested `binary` expression, pick `3 - 1`.

Another is:

1. Starting at `expression`, pick `binary`.
2. For the left-hand `expression`, pick `binary` again.
3. In that nested `binary` expression, pick `6 / 3`.
4. Back at the outer `binary`, for the operator, pick `"-"`.
5. For the right-hand `expression`, pick `NUMBER`, and use `1`.

Those produce the same *strings*, but not the same *syntax trees*:

<img src="image/parsing-expressions/syntax-trees.png" alt="Two valid syntax trees: (6 / 3) - 1 and 6 / (3 - 1)" />

In other words, the grammar allows seeing the expression as `(6 / 3) - 1` or `6
/ (3 - 1)`. The `binary` rule lets operands nest any which way you want. That in
turn affects the result of evaluating the parsed tree. The way mathematicians
have addressed this ambiguity since blackboards were first invented is by
defining rules for precedence and associativity.

*   <span name="nonassociative">**Precedence**</span> determines which operator
    is evaluated first in an expression containing a mixture of different
    operators. Precedence rules tell us that we evaluate the `/` before the `-`
    in the above example. Operators with higher precedence are evaluated
    before operators with lower precedence. Equivalently, higher precedence
    operators are said to "bind tighter".

*   **Associativity** determines which operator is evaluated first in a series
    of the *same* operator. When an operator is **left-associative** (think
    "left-to-right"), operators on the left evaluate before those on the right.
    Since `-` is left-associative, this expression:

    ```lox
    5 - 3 - 1
    ```

    is equivalent to:

    ```lox
    (5 - 3) - 1
    ```

    Assignment, on the other hand, is **right-associative**. This:

    ```lox
    a = b = c
    ```

    is equivalent to:

    ```lox
    a = (b = c)
    ```

<aside name="nonassociative">

While not common these days, some languages specify that certain pairs of
operators have *no* relative precedence. That makes it a syntax error to mix
those operators in an expression without using explicit grouping.

Likewise, some operators are **non-associative**. That means it's an error to
use that operator more than once in a sequence. For example, Perl's range
operator isn't associative, so `a .. b` is OK, but `a .. b .. c` is an error.

</aside>

Without well-defined precedence and associativity, an expression that uses
multiple operators is ambiguous -- it can be parsed into different syntax trees,
which could in turn evaluate to different results. We'll fix that in Lox by
applying the same precedence rules as C, going from lowest to highest.

<table>
<thead>
<tr>
  <td>Name</td>
  <td>Operators</td>
  <td>Associates</td>
</tr>
</thead>
<tbody>
<tr>
  <td>Equality</td>
  <td><code>==</code> <code>!=</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Comparison</td>
  <td><code>&gt;</code> <code>&gt;=</code>
      <code>&lt;</code> <code>&lt;=</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Term</td>
  <td><code>-</code> <code>+</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Factor</td>
  <td><code>/</code> <code>*</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Unary</td>
  <td><code>!</code> <code>-</code></td>
  <td>Right</td>
</tr>
</tbody>
</table>

Right now, the grammar stuffs all expression types into a single `expression`
rule. That same rule is used as the non-terminal for operands, which lets the
grammar accept any kind of expression as a subexpression, regardless of whether
the precedence rules allow it.

We fix that by <span name="massage">stratifying</span> the grammar. We define a
separate rule for each precedence level.

```ebnf
expression     → ...
equality       → ...
comparison     → ...
term           → ...
factor         → ...
unary          → ...
primary        → ...
```

<aside name="massage">

Instead of baking precedence right into the grammar rules, some parser
generators let you keep the same ambiguous-but-simple grammar and then add in a
little explicit operator precedence metadata on the side in order to
disambiguate.

</aside>

Each rule here only matches expressions at its precedence level or higher. For
example, `unary` matches a unary expression like `!negated` or a primary
expression like `1234`. And `term` can match `1 + 2` but also `3 * 4 / 5`. The
final `primary` rule covers the highest-precedence forms -- literals and
parenthesized expressions.

We just need to fill in the productions for each of those rules. We'll do the
easy ones first. The top `expression` rule matches any expression at any
precedence level. Since <span name="equality">`equality`</span> has the lowest
precedence, if we match that, then it covers everything.

<aside name="equality">

We could eliminate `expression` and simply use `equality` in the other rules
that contain expressions, but using `expression` makes those other rules read a
little better.

Also, in later chapters when we expand the grammar to include assignment and
logical operators, we'll only need to change the production for `expression`
instead of touching every rule that contains an expression.

</aside>

```ebnf
expression     → equality
```

Over at the other end of the precedence table, a primary expression contains
all the literals and grouping expressions.

```ebnf
primary        → NUMBER | STRING | "true" | "false" | "nil"
               | "(" expression ")" ;
```

A unary expression starts with a unary operator followed by the operand. Since
unary operators can nest -- `!!true` is a valid if weird expression -- the
operand can itself be a unary operator. A recursive rule handles that nicely.

```ebnf
unary          → ( "!" | "-" ) unary ;
```

But this rule has a problem. It never terminates.

Remember, each rule needs to match expressions at that precedence level *or
higher*, so we also need to let this match a primary expression.

```ebnf
unary          → ( "!" | "-" ) unary
               | primary ;
```

That works.

The remaining rules are all binary operators. We'll start with the rule for
multiplication and division. Here's a first try:

```ebnf
factor         → factor ( "/" | "*" ) unary
               | unary ;
```

The rule recurses to match the left operand. That enables the rule to match a
series of multiplication and division expressions like `1 * 2 / 3`. Putting the
recursive production on the left side and `unary` on the right makes the rule
<span name="mult">left-associative</span> and unambiguous.

<aside name="mult">

In principle, it doesn't matter whether you treat multiplication as left- or
right-associative -- you get the same result either way. Alas, in the real world
with limited precision, roundoff and overflow mean that associativity can affect
the result of a sequence of multiplications. Consider:

```lox
print 0.1 * (0.2 * 0.3);
print (0.1 * 0.2) * 0.3;
```

In languages like Lox that use [IEEE 754][754] double-precision floating-point
numbers, the first evaluates to `0.006`, while the second yields
`0.006000000000000001`. Sometimes that tiny difference matters.
[This][float] is a good place to learn more.

[754]: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
[float]: https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html

</aside>

All of this is correct, but the fact that the first symbol in the body of the
rule is the same as the head of the rule means this production is
**left-recursive**. Some parsing techniques, including the one we're going to
use, have trouble with left recursion. (Recursion elsewhere, like we have in
`unary` and the indirect recursion for grouping in `primary` are not a problem.)

There are many grammars you can define that match the same language. The choice
for how to model a particular language is partially a matter of taste and
partially a pragmatic one. This rule is correct, but not optimal for how we
intend to parse it. Instead of a left recursive rule, we'll use a different one.

```ebnf
factor         → unary ( ( "/" | "*" ) unary )* ;
```

We define a factor expression as a flat *sequence* of multiplications
and divisions. This matches the same syntax as the previous rule, but better
mirrors the code we'll write to parse Lox. We use the same structure for all of
the other binary operator precedence levels, giving us this complete expression
grammar:

```ebnf
expression     → equality ;
equality       → comparison ( ( "!=" | "==" ) comparison )* ;
comparison     → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
term           → factor ( ( "-" | "+" ) factor )* ;
factor         → unary ( ( "/" | "*" ) unary )* ;
unary          → ( "!" | "-" ) unary
               | primary ;
primary        → NUMBER | STRING | "true" | "false" | "nil"
               | "(" expression ")" ;
```

This grammar is more complex than the one we had before, but in return we have
eliminated the previous one's ambiguity. It's just what we need to make a
parser.

## Recursive Descent Parsing

There is a whole pack of parsing techniques whose names are mostly combinations
of "L" and "R" -- [LL(k)][], [LR(1)][lr], [LALR][] -- along with more exotic
beasts like [parser combinators][], [Earley parsers][], [the shunting yard
algorithm][yard], and [packrat parsing][]. For our first interpreter, one
technique is more than sufficient: **recursive descent**.

[ll(k)]: https://en.wikipedia.org/wiki/LL_parser
[lr]: https://en.wikipedia.org/wiki/LR_parser
[lalr]: https://en.wikipedia.org/wiki/LALR_parser
[parser combinators]: https://en.wikipedia.org/wiki/Parser_combinator
[earley parsers]: https://en.wikipedia.org/wiki/Earley_parser
[yard]: https://en.wikipedia.org/wiki/Shunting-yard_algorithm
[packrat parsing]: https://en.wikipedia.org/wiki/Parsing_expression_grammar

Recursive descent is the simplest way to build a parser, and doesn't require
using complex parser generator tools like Yacc, Bison or ANTLR. All you need is
straightforward handwritten code. Don't be fooled by its simplicity, though.
Recursive descent parsers are fast, robust, and can support sophisticated
error handling. In fact, GCC, V8 (the JavaScript VM in Chrome), Roslyn (the C#
compiler written in C#) and many other heavyweight production language
implementations use recursive descent. It rocks.

Recursive descent is considered a **top-down parser** because it starts from the
top or outermost grammar rule (here `expression`) and works its way <span
name="descent">down</span> into the nested subexpressions before finally
reaching the leaves of the syntax tree. This is in contrast with bottom-up
parsers like LR that start with primary expressions and compose them into larger
and larger chunks of syntax.

<aside name="descent">

It's called "recursive *descent*" because it walks *down* the grammar.
Confusingly, we also use direction metaphorically when talking about "high" and
"low" precedence, but the orientation is reversed. In a top-down parser, you
reach the lowest-precedence expressions first because they may in turn contain
subexpressions of higher precedence.

<img src="image/parsing-expressions/direction.png" alt="Top-down grammar rules in order of increasing precedence." />

CS people really need to get together and straighten out their metaphors. Don't
even get me started on which direction a stack grows or why trees have their
roots on top.

</aside>

A recursive descent parser is a literal translation of the grammar's rules
straight into imperative code. Each rule becomes a function. The body of the
rule translates to code roughly like:

<table>
<thead>
<tr>
  <td>Grammar notation</td>
  <td>Code representation</td>
</tr>
</thead>
<tbody>
  <tr><td>Terminal</td><td>Code to match and consume a token</td></tr>
  <tr><td>Nonterminal</td><td>Call to that rule&rsquo;s function</td></tr>
  <tr><td><code>|</code></td><td><code>if</code> or <code>switch</code> statement</td></tr>
  <tr><td><code>*</code> or <code>+</code></td><td><code>while</code> or <code>for</code> loop</td></tr>
  <tr><td><code>?</code></td><td><code>if</code> statement</td></tr>
</tbody>
</table>

The descent is described as "recursive" because when a grammar rule refers to
itself -- directly or indirectly -- that translates to a recursive function
call.

### The parser class

Each grammar rule becomes a method inside this new class:

^code parser

Like the scanner, the parser consumes a flat input sequence, only now we're
reading tokens instead of characters. We store the list of tokens and use
`current` to point to the next token eagerly waiting to be parsed.

We're going to run straight through the expression grammar now and translate
each rule to Java code. The first rule, `expression`, simply expands to the
`equality` rule, so that's straightforward.

^code expression

Each method for parsing a grammar rule produces a syntax tree for that rule and
returns it to the caller. When the body of the rule contains a nonterminal -- a
reference to another rule -- we <span name="left">call</span> that other rule's
method.

<aside name="left">

This is why left recursion is problematic for recursive descent. The function
for a left-recursive rule immediately calls itself, which calls itself again,
and so on, until the parser hits a stack overflow and dies.

</aside>

The rule for equality is a little more complex.

```ebnf
equality       → comparison ( ( "!=" | "==" ) comparison )* ;
```

In Java, that becomes:

^code equality

Let's step through it. The first `comparison` nonterminal in the body translates
to the first call to `comparison()` in the method. We take that result and store
it in a local variable.

Then, the `( ... )*` loop in the rule maps to a `while` loop. We need to know
when to exit that loop. We can see that inside the rule, we must first find
either a `!=` or `==` token. So, if we *don't* see one of those, we must be done
with the sequence of equality operators. We express that check using a handy
`match()` method.

^code match

This checks to see if the current token has any of the given types. If so, it
consumes the token and returns `true`. Otherwise, it returns `false` and leaves
the current token alone. The `match()` method is defined in terms of two more
fundamental operations.

The `check()` method returns `true` if the current token is of the given type.
Unlike `match()`, it never consumes the token, it only looks at it.

^code check

The `advance()` method consumes the current token and returns it, similar to how
our scanner's corresponding method crawled through characters.

^code advance

These methods bottom out on the last handful of primitive operations.

^code utils

`isAtEnd()` checks if we've run out of tokens to parse. `peek()` returns the
current token we have yet to consume, and `previous()` returns the most recently
consumed token. The latter makes it easier to use `match()` and then access the
just-matched token.

That's most of the parsing infrastructure we need. Where were we? Right, so if
we are inside the `while` loop in `equality()`, then we know we have found a
`!=` or `==` operator and must be parsing an equality expression.

We grab the matched operator token so we can track which kind of equality
expression we have. Then we call `comparison()` again to parse the right-hand
operand. We combine the operator and its two operands into a new `Expr.Binary`
syntax tree node, and then loop around. For each iteration, we store the
resulting expression back in the same `expr` local variable. As we zip through a
sequence of equality expressions, that creates a left-associative nested tree of
binary operator nodes.

<span name="sequence"></span>

<img src="image/parsing-expressions/sequence.png" alt="The syntax tree created by parsing 'a == b == c == d == e'" />

<aside name="sequence">

Parsing `a == b == c == d == e`. For each iteration, we create a new binary
expression using the previous one as the left operand.

</aside>

The parser falls out of the loop once it hits a token that's not an equality
operator. Finally, it returns the expression. Note that if the parser never
encounters an equality operator, then it never enters the loop. In that case,
the `equality()` method effectively calls and returns `comparison()`. In that
way, this method matches an equality operator *or anything of higher
precedence*.

Moving on to the next rule...

```ebnf
comparison     → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
```

Translated to Java:

^code comparison

The grammar rule is virtually <span name="handle">identical</span> to `equality`
and so is the corresponding code. The only differences are the token types for
the operators we match, and the method we call for the operands -- now
`term()` instead of `comparison()`. The remaining two binary operator rules
follow the same pattern.

In order of precedence, first addition and subtraction:

<aside name="handle">

If you wanted to do some clever Java 8, you could create a helper method for
parsing a left-associative series of binary operators given a list of token
types, and an operand method handle to simplify this redundant code.

</aside>

^code term

And finally, multiplication and division:

^code factor

That's all of the binary operators, parsed with the correct precedence and
associativity. We're crawling up the precedence hierarchy and now we've reached
the unary operators.

```ebnf
unary          → ( "!" | "-" ) unary
               | primary ;
```

The code for this is a little different.

^code unary

Again, we look at the <span name="current">current</span> token to see how to
parse. If it's a `!` or `-`, we must have a unary expression. In that case, we
grab the token and then recursively call `unary()` again to parse the operand.
Wrap that all up in a unary expression syntax tree and we're done.

<aside name="current">

The fact that the parser looks ahead at upcoming tokens to decide how to parse
puts recursive descent into the category of **predictive parsers**.

</aside>

Otherwise, we must have reached the highest level of precedence, primary
expressions.

```ebnf
primary        → NUMBER | STRING | "true" | "false" | "nil"
               | "(" expression ")" ;
```

Most of the cases for the rule are single terminals, so parsing is
straightforward.

^code primary

The interesting branch is the one for handling parentheses. After we match an
opening `(` and parse the expression inside it, we *must* find a `)` token. If
we don't, that's an error.

## Syntax Errors

A parser really has two jobs:

1.  Given a valid sequence of tokens, produce a corresponding syntax tree.

2.  Given an *invalid* sequence of tokens, detect any errors and tell the
    user about their mistakes.

Don't underestimate how important the second job is! In modern IDEs and editors,
the parser is constantly reparsing code -- often while the user is still editing
it -- in order to syntax highlight and support things like auto-complete. That
means it will encounter code in incomplete, half-wrong states *all the time.*

When the user doesn't realize the syntax is wrong, it is up to the parser to
help guide them back onto the right path. The way it reports errors is a large
part of your language's user interface. Good syntax error handling is hard. By
definition, the code isn't in a well-defined state, so there's no infallible way
to know what the user *meant* to write. The parser can't read your <span
name="telepathy">mind</span>.

<aside name="telepathy">

Not yet at least. With the way things are going in machine learning these days,
who knows what the future will bring?

</aside>

There are a couple of hard requirements for when the parser runs into a syntax
error. A parser must:

*   **Detect and report the error.** If it doesn't detect the <span
    name="error">error</span> and passes the resulting malformed syntax tree on
    to the interpreter, all manner of horrors may be summoned.

    <aside name="error">

    Philosophically speaking, if an error isn't detected and the interpreter
    runs the code, is it *really* an error?

    </aside>

*   **Avoid crashing or hanging.** Syntax errors are a fact of life, and
    language tools have to be robust in the face of them. Segfaulting or getting
    stuck in an infinite loop isn't allowed. While the source may not be valid
    *code*, it's still a valid *input to the parser* because users use the
    parser to learn what syntax is allowed.

Those are the table stakes if you want to get in the parser game at all, but you
really want to raise the ante beyond that. A decent parser should:

*   **Be fast.** Computers are thousands of times faster than they were when
    parser technology was first invented. The days of needing to optimize your
    parser so that it could get through an entire source file during a coffee
    break are over. But programmer expectations have risen as quickly, if not
    faster. They expect their editors to reparse files in milliseconds after
    every keystroke.

*   **Report as many distinct errors as there are.** Aborting after the first
    error is easy to implement, but it's annoying for users if every time they
    fix what they think is the one error in a file, a new one appears. They
    want to see them all.

*   **Minimize *cascaded* errors.** Once a single error is found, the parser no
    longer really knows what's going on. It tries to get itself back on track
    and keep going, but if it gets confused, it may report a slew of ghost
    errors that don't indicate other real problems in the code. When the first
    error is fixed, those phantoms disappear, because they reflect only the
    parser's own confusion. Cascaded errors are annoying because they can scare
    the user into thinking their code is in a worse state than it is.

The last two points are in tension. We want to report as many separate errors as
we can, but we don't want to report ones that are merely side effects of an
earlier one.

The way a parser responds to an error and keeps going to look for later errors
is called **error recovery**. This was a hot research topic in the '60s. Back
then, you'd hand a stack of punch cards to the secretary and come back the next
day to see if the compiler succeeded. With an iteration loop that slow, you
*really* wanted to find every single error in your code in one pass.

Today, when parsers complete before you've even finished typing, it's less of an
issue. Simple, fast error recovery is fine.

### Panic mode error recovery

<aside name="panic">

You know you want to push it.

<img src="image/parsing-expressions/panic.png" alt="A big shiny 'PANIC' button." />

</aside>

Of all the recovery techniques devised in yesteryear, the one that best stood
the test of time is called -- somewhat alarmingly -- <span name="panic">**panic
mode**</span>. As soon as the parser detects an error, it enters panic mode. It
knows at least one token doesn't make sense given its current state in the
middle of some stack of grammar productions.

Before it can get back to parsing, it needs to get its state and the sequence of
forthcoming tokens aligned such that the next token does match the rule being
parsed. This process is called **synchronization**.

To do that, we select some rule in the grammar that will mark the
synchronization point. The parser fixes its parsing state by jumping out of any
nested productions until it gets back to that rule. Then it synchronizes the
token stream by discarding tokens until it reaches one that can appear at that
point in the rule.

Any additional real syntax errors hiding in those discarded tokens aren't
reported, but it also means that any mistaken cascaded errors that are side
effects of the initial error aren't *falsely* reported either, which is a decent
trade-off.

The traditional place in the grammar to synchronize is between statements. We
don't have those yet, so we won't actually synchronize in this chapter, but
we'll get the machinery in place for later.

### Entering panic mode

Back before we went on this side trip around error recovery, we were writing the
code to parse a parenthesized expression. After parsing the expression, the
parser looks for the closing `)` by calling `consume()`. Here, finally, is that
method:

^code consume

It's similar to `match()` in that it checks to see if the next token is of the
expected type. If so, it consumes the token and everything is groovy. If some
other token is there, then we've hit an error. We report it by calling this:

^code error

First, that shows the error to the user by calling:

^code token-error

This reports an error at a given token. It shows the token's location and the
token itself. This will come in handy later since we use tokens throughout the
interpreter to track locations in code.

After we report the error, the user knows about their mistake, but what does the
*parser* do next? Back in `error()`, we create and return a ParseError, an
instance of this new class:

^code parse-error (1 before, 1 after)

This is a simple sentinel class we use to unwind the parser. The `error()`
method *returns* the error instead of *throwing* it because we want to let the
calling method inside the parser decide whether to unwind or not. Some parse
errors occur in places where the parser isn't likely to get into a weird state
and we don't need to <span name="production">synchronize</span>. In those
places, we simply report the error and keep on truckin'.

For example, Lox limits the number of arguments you can pass to a function. If
you pass too many, the parser needs to report that error, but it can and should
simply keep on parsing the extra arguments instead of freaking out and going
into panic mode.

<aside name="production">

Another way to handle common syntax errors is with **error productions**. You
augment the grammar with a rule that *successfully* matches the *erroneous*
syntax. The parser safely parses it but then reports it as an error instead of
producing a syntax tree.

For example, some languages have a unary `+` operator, like `+123`, but Lox does
not. Instead of getting confused when the parser stumbles onto a `+` at the
beginning of an expression, we could extend the unary rule to allow it.

```ebnf
unary → ( "!" | "-" | "+" ) unary
      | primary ;
```

This lets the parser consume `+` without going into panic mode or leaving the
parser in a weird state.

Error productions work well because you, the parser author, know *how* the code
is wrong and what the user was likely trying to do. That means you can give a
more helpful message to get the user back on track, like, "Unary '+' expressions
are not supported." Mature parsers tend to accumulate error productions like
barnacles since they help users fix common mistakes.

</aside>

In our case, though, the syntax error is nasty enough that we want to panic and
synchronize. Discarding tokens is pretty easy, but how do we synchronize the
parser's own state?

### Synchronizing a recursive descent parser

With recursive descent, the parser's state -- which rules it is in the middle of
recognizing -- is not stored explicitly in fields. Instead, we use Java's
own call stack to track what the parser is doing. Each rule in the middle of
being parsed is a call frame on the stack. In order to reset that state, we need
to clear out those call frames.

The natural way to do that in Java is exceptions. When we want to synchronize,
we *throw* that ParseError object. Higher up in the method for the grammar rule
we are synchronizing to, we'll catch it. Since we synchronize on statement
boundaries, we'll catch the exception there. After the exception is caught, the
parser is in the right state. All that's left is to synchronize the tokens.

We want to discard tokens until we're right at the beginning of the next
statement. That boundary is pretty easy to spot -- it's one of the main reasons
we picked it. *After* a semicolon, we're <span name="semicolon">probably</span>
finished with a statement. Most statements start with a keyword -- `for`, `if`,
`return`, `var`, etc. When the *next* token is any of those, we're probably
about to start a statement.

<aside name="semicolon">

I say "probably" because we could hit a semicolon separating clauses in a `for`
loop. Our synchronization isn't perfect, but that's OK. We've already reported
the first error precisely, so everything after that is kind of "best effort".

</aside>

This method encapsulates that logic:

^code synchronize

It discards tokens until it thinks it has found a statement boundary. After
catching a ParseError, we'll call this and then we are hopefully back in sync.
When it works well, we have discarded tokens that would have likely caused
cascaded errors anyway, and now we can parse the rest of the file starting at
the next statement.

Alas, we don't get to see this method in action, since we don't have statements
yet. We'll get to that [in a couple of chapters][statements]. For now, if an
error occurs, we'll panic and unwind all the way to the top and stop parsing.
Since we can parse only a single expression anyway, that's no big loss.

[statements]: statements-and-state.html

## Wiring up the Parser

We are mostly done parsing expressions now. There is one other place where we
need to add a little error handling. As the parser descends through the parsing
methods for each grammar rule, it eventually hits `primary()`. If none of the
cases in there match, it means we are sitting on a token that can't start an
expression. We need to handle that error too.

^code primary-error (5 before, 1 after)

With that, all that remains in the parser is to define an initial method to kick
it off. That method is called, naturally enough, `parse()`.

^code parse

We'll revisit this method later when we add statements to the language. For now,
it parses a single expression and returns it. We also have some temporary code
to exit out of panic mode. Syntax error recovery is the parser's job, so we
don't want the ParseError exception to escape into the rest of the interpreter.

When a syntax error does occur, this method returns `null`. That's OK. The
parser promises not to crash or hang on invalid syntax, but it doesn't promise
to return a *usable syntax tree* if an error is found. As soon as the parser
reports an error, `hadError` gets set, and subsequent phases are skipped.

Finally, we can hook up our brand new parser to the main Lox class and try it
out. We still don't have an interpreter, so for now, we'll parse to a syntax
tree and then use the AstPrinter class from the [last chapter][ast-printer] to
display it.

[ast-printer]: representing-code.html#a-not-very-pretty-printer

Delete the old code to print the scanned tokens and replace it with this:

^code print-ast (1 before, 1 after)

Congratulations, you have crossed the <span name="harder">threshold</span>! That
really is all there is to handwriting a parser. We'll extend the grammar in
later chapters with assignment, statements, and other stuff, but none of that is
any more complex than the binary operators we tackled here.

<aside name="harder">

It is possible to define a more complex grammar than Lox's that's difficult to
parse using recursive descent. Predictive parsing gets tricky when you may need
to look ahead a large number of tokens to figure out what you're sitting on.

In practice, most languages are designed to avoid that. Even in cases where they
aren't, you can usually hack around it without too much pain. If you can parse
C++ using recursive descent -- which many C++ compilers do -- you can parse
anything.

</aside>

Fire up the interpreter and type in some expressions. See how it handles
precedence and associativity correctly? Not bad for less than 200 lines of code.

<div class="challenges">

## Challenges

1.  In C, a block is a statement form that allows you to pack a series of
    statements where a single one is expected. The [comma operator][] is an
    analogous syntax for expressions. A comma-separated series of expressions
    can be given where a single expression is expected (except inside a function
    call's argument list). At runtime, the comma operator evaluates the left
    operand and discards the result. Then it evaluates and returns the right
    operand.

    Add support for comma expressions. Give them the same precedence and
    associativity as in C. Write the grammar, and then implement the necessary
    parsing code.

2.  Likewise, add support for the C-style conditional or "ternary" operator
    `?:`. What precedence level is allowed between the `?` and `:`? Is the whole
    operator left-associative or right-associative?

3.  Add error productions to handle each binary operator appearing without a
    left-hand operand. In other words, detect a binary operator appearing at the
    beginning of an expression. Report that as an error, but also parse and
    discard a right-hand operand with the appropriate precedence.

[comma operator]: https://en.wikipedia.org/wiki/Comma_operator

</div>

<div class="design-note">

## Design Note: Logic Versus History

Let's say we decide to add bitwise `&` and `|` operators to Lox. Where should we
put them in the precedence hierarchy? C -- and most languages that follow in C's
footsteps -- place them below `==`. This is widely considered a mistake because
it means common operations like testing a flag require parentheses.

```c
if (flags & FLAG_MASK == SOME_FLAG) { ... } // Wrong.
if ((flags & FLAG_MASK) == SOME_FLAG) { ... } // Right.
```

Should we fix this for Lox and put bitwise operators higher up the precedence
table than C does? There are two strategies we can take.

You almost never want to use the result of an `==` expression as the operand to
a bitwise operator. By making bitwise bind tighter, users don't need to
parenthesize as often. So if we do that, and users assume the precedence is
chosen logically to minimize parentheses, they're likely to infer it correctly.

This kind of internal consistency makes the language easier to learn because
there are fewer edge cases and exceptions users have to stumble into and then
correct. That's good, because before users can use our language, they have to
load all of that syntax and semantics into their heads. A simpler, more rational
language *makes sense*.

But, for many users there is an even faster shortcut to getting our language's
ideas into their wetware -- *use concepts they already know*. Many newcomers to
our language will be coming from some other language or languages. If our
language uses some of the same syntax or semantics as those, there is much less
for the user to learn (and *unlearn*).

This is particularly helpful with syntax. You may not remember it well today,
but way back when you learned your very first programming language, code
probably looked alien and unapproachable. Only through painstaking effort did
you learn to read and accept it. If you design a novel syntax for your new
language, you force users to start that process all over again.

Taking advantage of what users already know is one of the most powerful tools
you can use to ease adoption of your language. It's almost impossible to
overestimate how valuable this is. But it faces you with a nasty problem: What
happens when the thing the users all know *kind of sucks*? C's bitwise operator
precedence is a mistake that doesn't make sense. But it's a *familiar* mistake
that millions have already gotten used to and learned to live with.

Do you stay true to your language's own internal logic and ignore history? Do
you start from a blank slate and first principles? Or do you weave your language
into the rich tapestry of programming history and give your users a leg up by
starting from something they already know?

There is no perfect answer here, only trade-offs. You and I are obviously biased
towards liking novel languages, so our natural inclination is to burn the
history books and start our own story.

In practice, it's often better to make the most of what users already know.
Getting them to come to your language requires a big leap. The smaller you can
make that chasm, the more people will be willing to cross it. But you can't
*always* stick to history, or your language won't have anything new and
compelling to give people a *reason* to jump over.

</div>


================================================
FILE: book/representing-code.md
================================================
> To dwellers in a wood, almost every species of tree has its voice as well as
> its feature.
> <cite>Thomas Hardy, <em>Under the Greenwood Tree</em></cite>

In the [last chapter][scanning], we took the raw source code as a string and
transformed it into a slightly higher-level representation: a series of tokens.
The parser we'll write in the [next chapter][parsing] takes those tokens and
transforms them yet again, into an even richer, more complex representation.

[scanning]: scanning.html
[parsing]: parsing-expressions.html

Before we can produce that representation, we need to define it. That's the
subject of this chapter. Along the way, we'll <span name="boring">cover</span>
some theory around formal grammars, feel the difference between functional and
object-oriented programming, go over a couple of design patterns, and do some
metaprogramming.

<aside name="boring">

I was so worried about this being one of the most boring chapters in the book
that I kept stuffing more fun ideas into it until I ran out of room.

</aside>

Before we do all that, let's focus on the main goal -- a representation for
code. It should be simple for the parser to produce and easy for the
interpreter to consume. If you haven't written a parser or interpreter yet,
those requirements aren't exactly illuminating. Maybe your intuition can help.
What is your brain doing when you play the part of a *human* interpreter? How do
you mentally evaluate an arithmetic expression like this:

```lox
1 + 2 * 3 - 4
```

Because you understand the order of operations -- the old "[Please Excuse My
Dear Aunt Sally][sally]" stuff -- you know that the multiplication is evaluated
before the addition or subtraction. One way to visualize that precedence is
using a tree. Leaf nodes are numbers, and interior nodes are operators with
branches for each of their operands.

[sally]: https://en.wikipedia.org/wiki/Order_of_operations#Mnemonics

In order to evaluate an arithmetic node, you need to know the numeric values of
its subtrees, so you have to evaluate those first. That means working your way
from the leaves up to the root -- a *post-order* traversal:

<span name="tree-steps"></span>

<img src="image/representing-code/tree-evaluate.png" alt="Evaluating the tree from the bottom up." />

<aside name="tree-steps">

A. Starting with the full tree, evaluate the bottom-most operation, `2 * 3`.

B. Now we can evaluate the `+`.

C. Next, the `-`.

D. The final answer.

</aside>

If I gave you an arithmetic expression, you could draw one of these trees pretty
easily. Given a tree, you can evaluate it without breaking a sweat. So it
intuitively seems like a workable representation of our code is a <span
name="only">tree</span> that matches the grammatical structure -- the operator
nesting -- of the language.

<aside name="only">

That's not to say a tree is the *only* possible representation of our code. In
[Part III][], we'll generate bytecode, another representation that isn't as
human friendly but is closer to the machine.

[part iii]: a-bytecode-virtual-machine.html

</aside>

We need to get more precise about what that grammar is then. Like lexical
grammars in the last chapter, there is a long ton of theory around syntactic
grammars. We're going into that theory a little more than we did when scanning
because it turns out to be a useful tool throughout much of the interpreter.
We start by moving one level up the [Chomsky hierarchy][]...

[chomsky hierarchy]: https://en.wikipedia.org/wiki/Chomsky_hierarchy

## Context-Free Grammars

In the last chapter, the formalism we used for defining the lexical grammar --
the rules for how characters get grouped into tokens -- was called a *regular
language*. That was fine for our scanner, which emits a flat sequence of tokens.
But regular languages aren't powerful enough to handle expressions which can
nest arbitrarily deeply.

We need a bigger hammer, and that hammer is a **context-free grammar**
(**CFG**). It's the next heaviest tool in the toolbox of
**[formal grammars][]**. A formal grammar takes a set of atomic pieces it calls
its "alphabet". Then it defines a (usually infinite) set of "strings" that are
"in" the grammar. Each string is a sequence of "letters" in the alphabet.

[formal grammars]: https://en.wikipedia.org/wiki/Formal_grammar

I'm using all those quotes because the terms get a little confusing as you move
from lexical to syntactic grammars. In our scanner's grammar, the alphabet
consists of individual characters and the strings are the valid lexemes --
roughly "words". In the syntactic grammar we're talking about now, we're at a
different level of granularity. Now each "letter" in the alphabet is an entire
token and a "string" is a sequence of *tokens* -- an entire expression.

Oof. Maybe a table will help:

<table>
<thead>
<tr>
  <td>Terminology</td>
  <td></td>
  <td>Lexical grammar</td>
  <td>Syntactic grammar</td>
</tr>
</thead>
<tbody>
<tr>
  <td>The &ldquo;alphabet&rdquo; is<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.</span></td>
  <td>&rarr;&ensp;</td>
  <td>Characters</td>
  <td>Tokens</td>
</tr>
<tr>
  <td>A &ldquo;string&rdquo; is<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.</span></td>
  <td>&rarr;&ensp;</td>
  <td>Lexeme or token</td>
  <td>Expression</td>
</tr>
<tr>
  <td>It&rsquo;s implemented by the<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.</span></td>
  <td>&rarr;&ensp;</td>
  <td>Scanner</td>
  <td>Parser</td>
</tr>
</tbody>
</table>

A formal grammar's job is to specify which strings are valid and which aren't.
If we were defining a grammar for English sentences, "eggs are tasty for
breakfast" would be in the grammar, but "tasty breakfast for are eggs" would
probably not.

### Rules for grammars

How do we write down a grammar that contains an infinite number of valid
strings? We obviously can't list them all out. Instead, we create a finite set
of rules. You can think of them as a game that you can "play" in one of two
directions.

If you start with the rules, you can use them to *generate* strings that are in
the grammar. Strings created this way are called **derivations** because each is
*derived* from the rules of the grammar. In each step of the game, you pick a
rule and follow what it tells you to do. Most of the lingo around formal
grammars comes from playing them in this direction. Rules are called
**productions** because they *produce* strings in the grammar.

Each production in a context-free grammar has a **head** -- its <span
name="name">name</span> -- and a **body**, which describes what it generates. In
its pure form, the body is simply a list of symbols. Symbols come in two
delectable flavors:

<aside name="name">

Restricting heads to a single symbol is a defining feature of context-free
grammars. More powerful formalisms like **[unrestricted grammars][]** allow a
sequence of symbols in the head as well as in the body.

[unrestricted grammars]: https://en.wikipedia.org/wiki/Unrestricted_grammar

</aside>

*   A **terminal** is a letter from the grammar's alphabet. You can think of it
    like a literal value. In the syntactic grammar we're defining, the terminals
    are individual lexemes -- tokens coming from the scanner like `if` or
    `1234`.

    These are called "terminals", in the sense of an "end point" because they
    don't lead to any further "moves" in the game. You simply produce that one
    symbol.

*   A **nonterminal** is a named reference to another rule in the grammar. It
    means "play that rule and insert whatever it produces here". In this way,
    the grammar composes.

There is one last refinement: you may have multiple rules with the same name.
When you reach a nonterminal with that name, you are allowed to pick any of the
rules for it, whichever floats your boat.

To make this concrete, we need a <span name="turtles">way</span> to write down
these production rules. People have been trying to crystallize grammar all the
way back to Pāṇini's *Ashtadhyayi*, which codified Sanskrit grammar a mere
couple thousand years ago. Not much progress happened until John Backus and
company needed a notation for specifying ALGOL 58 and came up with
[**Backus-Naur form**][bnf] (**BNF**). Since then, nearly everyone uses some
flavor of BNF, tweaked to their own tastes.

[bnf]: https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form

I tried to come up with something clean. Each rule is a name, followed by an
arrow (`→`), followed by a sequence of symbols, and finally ending with a
semicolon (`;`). Terminals are quoted strings, and nonterminals are lowercase
words.

<aside name="turtles">

Yes, we need to define a syntax to use for the rules that define our syntax.
Should we specify that *metasyntax* too? What notation do we use for *it?* It's
languages all the way down!

</aside>

Using that, here's a grammar for <span name="breakfast">breakfast</span> menus:

<aside name="breakfast">

Yes, I really am going to be using breakfast examples throughout this entire
book. Sorry.

</aside>

```ebnf
breakfast  → protein "with" breakfast "on the side" ;
breakfast  → protein ;
breakfast  → bread ;

protein    → crispiness "crispy" "bacon" ;
protein    → "sausage" ;
protein    → cooked "eggs" ;

crispiness → "really" ;
crispiness → "really" crispiness ;

cooked     → "scrambled" ;
cooked     → "poached" ;
cooked     → "fried" ;

bread      → "toast" ;
bread      → "biscuits" ;
bread      → "English muffin" ;
```

We can use this grammar to generate random breakfasts. Let's play a round and
see how it works. By age-old convention, the game starts with the first rule in
the grammar, here `breakfast`. There are three productions for that, and we
randomly pick the first one. Our resulting string looks like:

```text
protein "with" breakfast "on the side"
```

We need to expand that first nonterminal, `protein`, so we pick a production for
that. Let's pick:

```ebnf
protein → cooked "eggs" ;
```

Next, we need a production for `cooked`, and so we pick `"poached"`. That's a
terminal, so we add that. Now our string looks like:

```text
"poached" "eggs" "with" breakfast "on the side"
```

The next non-terminal is `breakfast` again. The first `breakfast` production we
chose recursively refers back to the `breakfast` rule. Recursion in the grammar
is a good sign that the language being defined is context-free instead of
regular. In particular, recursion where the recursive nonterminal has
productions on <span name="nest">both</span> sides implies that the language is
not regular.

<aside name="nest">

Imagine that we've recursively expanded the `breakfast` rule here several times,
like "bacon with bacon with bacon with..." In order to complete the string
correctly, we need to add an *equal* number of "on the side" bits to the end.
Tracking the number of required trailing parts is beyond the capabilities of a
regular grammar. Regular grammars can express *repetition*, but they can't *keep
count* of how many repetitions there are, which is necessary to ensure that the
string has the same number of `with` and `on the side` parts.

</aside>

We could keep picking the first production for `breakfast` over and over again
yielding all manner of breakfasts like "bacon with sausage with scrambled eggs
with bacon..." We won't though. This time we'll pick `bread`. There are three
rules for that, each of which contains only a terminal. We'll pick "English
muffin".

With that, every nonterminal in the string has been expanded until it finally
contains only terminals and we're left with:

<img src="image/representing-code/breakfast.png" alt='"Playing" the grammar to generate a string.' />

Throw in some ham and Hollandaise, and you've got eggs Benedict.

Any time we hit a rule that had multiple productions, we just picked one
arbitrarily. It is this flexibility that allows a short number of grammar rules
to encode a combinatorially larger set of strings. The fact that a rule can
refer to itself -- directly or indirectly -- kicks it up even more, letting us
pack an infinite number of strings into a finite grammar.

### Enhancing our notation

Stuffing an infinite set of strings in a handful of rules is pretty fantastic,
but let's take it further. Our notation works, but it's tedious. So, like any
good language designer, we'll sprinkle a little syntactic sugar on top -- some
extra convenience notation. In addition to terminals and nonterminals, we'll
allow a few other kinds of expressions in the body of a rule:

*   Instead of repeating the rule name each time we want to add another
    production for it, we'll allow a series of productions separated by a pipe
    (`|`).

    ```ebnf
    bread → "toast" | "biscuits" | "English muffin" ;
    ```

*   Further, we'll allow parentheses for grouping and then allow `|` within that
    to select one from a series of options within the middle of a production.

    ```ebnf
    protein → ( "scrambled" | "poached" | "fried" ) "eggs" ;
    ```

*   Using recursion to support repeated sequences of symbols has a certain
    appealing <span name="purity">purity</span>, but it's kind of a chore to
    make a separate named sub-rule each time we want to loop. So, we also use a
    postfix `*` to allow the previous symbol or group to be repeated zero or
    more times.

    ```ebnf
    crispiness → "really" "really"* ;
    ```

<aside name="purity">

This is how the Scheme programming language works. It has no built-in looping
functionality at all. Instead, *all* repetition is expressed in terms of
recursion.

</aside>

*   A postfix `+` is similar, but requires the preceding production to appear
    at least once.

    ```ebnf
    crispiness → "really"+ ;
    ```

*   A postfix `?` is for an optional production. The thing before it can appear
    zero or one time, but not more.

    ```ebnf
    breakfast → protein ( "with" breakfast "on the side" )? ;
    ```

With all of those syntactic niceties, our breakfast grammar condenses down to:

```ebnf
breakfast → protein ( "with" breakfast "on the side" )?
          | bread ;

protein   → "really"+ "crispy" "bacon"
          | "sausage"
          | ( "scrambled" | "poached" | "fried" ) "eggs" ;

bread     → "toast" | "biscuits" | "English muffin" ;
```

Not too bad, I hope. If you're used to grep or using [regular
expressions][regex] in your text editor, most of the punctuation should be
familiar. The main difference is that symbols here represent entire tokens, not
single characters.

[regex]: https://en.wikipedia.org/wiki/Regular_expression#Standards

We'll use this notation throughout the rest of the book to precisely describe
Lox's grammar. As you work on programming languages, you'll find that
context-free grammars (using this or [EBNF][] or some other notation) help you
crystallize your informal syntax design ideas. They are also a handy medium for
communicating with other language hackers about syntax.

[ebnf]: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form

The rules and productions we define for Lox are also our guide to the tree data
structure we're going to implement to represent code in memory. Before we can do
that, we need an actual grammar for Lox, or at least enough of one for us to get
started.

### A Grammar for Lox expressions

In the previous chapter, we did Lox's entire lexical grammar in one fell swoop.
Every keyword and bit of punctuation is there. The syntactic grammar is larger,
and it would be a real bore to grind through the entire thing before we actually
get our interpreter up and running.

Instead, we'll crank through a subset of the language in the next couple of
chapters. Once we have that mini-language represented, parsed, and interpreted,
then later chapters will progressively add new features to it, including the new
syntax. For now, we are going to worry about only a handful of expressions:

*   **Literals.** Numbers, strings, Booleans, and `nil`.

*   **Unary expressions.** A prefix `!` to perform a logical not, and `-` to
    negate a number.

*   **Binary expressions.** The infix arithmetic (`+`, `-`, `*`, `/`) and logic
    operators (`==`, `!=`, `<`, `<=`, `>`, `>=`) we know and love.

*   **Parentheses.** A pair of `(` and `)` wrapped around an expression.

That gives us enough syntax for expressions like:

```lox
1 - (2 * 3) < 4 == false
```

Using our handy dandy new notation, here's a grammar for those:

```ebnf
expression     → literal
               | unary
               | binary
               | grouping ;

literal        → NUMBER | STRING | "true" | "false" | "nil" ;
grouping       → "(" expression ")" ;
unary          → ( "-" | "!" ) expression ;
binary         → expression operator expression ;
operator       → "==" | "!=" | "<" | "<=" | ">" | ">="
               | "+"  | "-"  | "*" | "/" ;
```

There's one bit of extra <span name="play">metasyntax</span> here. In addition
to quoted strings for terminals that match exact lexemes, we `CAPITALIZE`
terminals that are a single lexeme whose text representation may vary. `NUMBER`
is any number literal, and `STRING` is any string literal. Later, we'll do the
same for `IDENTIFIER`.

This grammar is actually ambiguous, which we'll see when we get to parsing it.
But it's good enough for now.

<aside name="play">

If you're so inclined, try using this grammar to generate a few expressions like
we did with the breakfast grammar before. Do the resulting expressions look
right to you? Can you make it generate anything wrong like `1 + / 3`?

</aside>

## Implementing Syntax Trees

Finally, we get to write some code. That little expression grammar is our
skeleton. Since the grammar is recursive -- note how `grouping`, `unary`, and
`binary` all refer back to `expression` -- our data structure will form a tree.
Since this structure represents the syntax of our language, it's called a <span
name="ast">**syntax tree**</span>.

<aside name="ast">

In particular, we're defining an **abstract syntax tree** (**AST**). In a
**parse tree**, every single grammar production becomes a node in the tree. An
AST elides productions that aren't needed by later phases.

</aside>

Our scanner used a single Token class to represent all kinds of lexemes. To
distinguish the different kinds -- think the number `123` versus the string
`"123"` -- we included a simple TokenType enum. Syntax trees are not so <span
name="token-data">homogeneous</span>. Unary expressions have a single operand,
binary expressions have two, and literals have none.

We *could* mush that all together into a single Expression class with an
arbitrary list of children. Some compilers do. But I like getting the most out
of Java's type system. So we'll define a base class for expressions. Then, for
each kind of expression -- each production under `expression` -- we create a
subclass that has fields for the nonterminals specific to that rule. This way,
we get a compile error if we, say, try to access the second operand of a unary
expression.

<aside name="token-data">

Tokens aren't entirely homogeneous either. Tokens for literals store the value,
but other kinds of lexemes don't need that state. I have seen scanners that use
different classes for literals and other kinds of lexemes, but I figured I'd
keep things simpler.

</aside>

Something like this:

```java
package com.craftinginterpreters.lox;

abstract class Expr { // [expr]
  static class Binary extends Expr {
    Binary(Expr left, Token operator, Expr right) {
      this.left = left;
      this.operator = operator;
      this.right = right;
    }

    final Expr left;
    final Token operator;
    final Expr right;
  }

  // Other expressions...
}
```

<aside name="expr">

I avoid abbreviations in my code because they trip up a reader who doesn't know
what they stand for. But in compilers I've looked at, "Expr" and "Stmt" are so
ubiquitous that I may as well start getting you used to them now.

</aside>

Expr is the base class that all expression classes inherit from. As you can see
from `Binary`, the subclasses are nested inside of it. There's no technical need
for this, but it lets us cram all of the classes into a single Java file.

### Disoriented objects

You'll note that, much like the Token class, there aren't any methods here. It's
a dumb structure. Nicely typed, but merely a bag of data. This feels strange in
an object-oriented language like Java. Shouldn't the class *do stuff*?

The problem is that these tree classes aren't owned by any single domain. Should
they have methods for parsing since that's where the trees are created? Or
interpreting since that's where they are consumed? Trees span the border between
those territories, which means they are really owned by *neither*.

In fact, these types exist to enable the parser and interpreter to
*communicate*. That lends itself to types that are simply data with no
associated behavior. This style is very natural in functional languages like
Lisp and ML where *all* data is separate from behavior, but it feels odd in
Java.

Functional programming aficionados right now are jumping up to exclaim "See!
Object-oriented languages are a bad fit for an interpreter!" I won't go that
far. You'll recall that the scanner itself was admirably suited to
object-orientation. It had all of the mutable state to keep track of where it
was in the source code, a well-defined set of public methods, and a handful of
private helpers.

My feeling is that each phase or part of the interpreter works fine in an
object-oriented style. It is the data structures that flow between them that are
stripped of behavior.

### Metaprogramming the trees

Java can express behavior-less classes, but I wouldn't say that it's
particularly great at it. Eleven lines of code to stuff three fields in an
object is pretty tedious, and when we're all done, we're going to have 21 of
these classes.

I don't want to waste your time or my ink writing all that down. Really, what is
the essence of each subclass? A name, and a list of typed fields. That's it.
We're smart language hackers, right? Let's <span
name="automate">automate</span>.

<aside name="automate">

Picture me doing an awkward robot dance when you read that. "AU-TO-MATE."

</aside>

Instead of tediously handwriting each class definition, field declaration,
constructor, and initializer, we'll hack together a <span
name="python">script</span> that does it for us. It has a description of each
tree type -- its name and fields -- and it prints out the Java code needed to
define a class with that name and state.

This script is a tiny Java command-line app that generates a file named
"Expr.java":

<aside name="python">

I got the idea of scripting the syntax tree classes from Jim Hugunin, creator of
Jython and IronPython.

An actual scripting language would be a better fit for this than Java, but I'm
trying not to throw too many languages at you.

</aside>

^code generate-ast

Note that this file is in a different package, `.tool` instead of `.lox`. This
script isn't part of the interpreter itself. It's a tool *we*, the people
hacking on the interpreter, run ourselves to generate the syntax tree classes.
When it's done, we treat "Expr.java" like any other file in the implementation.
We are merely automating how that file gets authored.

To generate the classes, it needs to have some description of each type and its
fields.

^code call-define-ast (1 before, 1 after)

For brevity's sake, I jammed the descriptions of the expression types into
strings. Each is the name of the class followed by `:` and the list of fields,
separated by commas. Each field has a type and a name.

The first thing `defineAst()` needs to do is output the base Expr class.

^code define-ast

When we call this, `baseName` is "Expr", which is both the name of the class and
the name of the file it outputs. We pass this as an argument instead of
hardcoding the name because we'll add a separate family of classes later for
statements.

Inside the base class, we define each subclass.

^code nested-classes (2 before, 1 after)

<aside name="robust">

This isn't the world's most elegant string manipulation code, but that's fine.
It only runs on the exact set of class definitions we give it. Robustness ain't
a priority.

</aside>

That code, in turn, calls:

^code define-type

There we go. All of that glorious Java boilerplate is done. It declares each
field in the class body. It defines a constructor for the class with parameters
for each field and initializes them in the body.

Compile and run this Java program now and it <span name="longer">blasts</span>
out a new &ldquo;.java" file containing a few dozen lines of code. That file's
about to get even longer.

<aside name="longer">

[Appendix II][] contains the code generated by this script once we've finished
implementing jlox and defined all of its syntax tree nodes.

[appendix ii]: appendix-ii.html

</aside>

## Working with Trees

Put on your imagination hat for a moment. Even though we aren't there yet,
consider what the interpreter will do with the syntax trees. Each kind of
expression in Lox behaves differently at runtime. That means the interpreter
needs to select a different chunk of code to handle each expression type. With
tokens, we can simply switch on the TokenType. But we don't have a "type" enum
for the syntax trees, just a separate Java class for each one.

We could write a long chain of type tests:

```java
if (expr instanceof Expr.Binary) {
  // ...
} else if (expr instanceof Expr.Grouping) {
  // ...
} else // ...
```

But all of those sequential type tests are slow. Expression types whose names
are alphabetically later would take longer to execute because they'd fall
through more `if` cases before finding the right type. That's not my idea of an
elegant solution.

We have a family of classes and we need to associate a chunk of behavior with
each one. The natural solution in an object-oriented language like Java is to
put those behaviors into methods on the classes themselves. We could add an
abstract <span name="interpreter-pattern">`interpret()`</span> method on Expr
which each subclass would then implement to interpret itself.

<aside name="interpreter-pattern">

This exact thing is literally called the ["Interpreter pattern"][interp] in
*Design Patterns: Elements of Reusable Object-Oriented Software*, by Erich
Gamma, et al.

[interp]: https://en.wikipedia.org/wiki/Interpreter_pattern

</aside>

This works alright for tiny projects, but it scales poorly. Like I noted before,
these tree classes span a few domains. At the very least, both the parser and
interpreter will mess with them. As [you'll see later][resolution], we need to
do name resolution on them. If our language was statically typed, we'd have a
type checking pass.

[resolution]: resolving-and-binding.html

If we added instance methods to the expression classes for every one of those
operations, that would smush a bunch of different domains together. That
violates [separation of concerns][] and leads to hard-to-maintain code.

[separation of concerns]: https://en.wikipedia.org/wiki/Separation_of_concerns

### The expression problem

This problem is more fundamental than it may seem at first. We have a handful of
types, and a handful of high-level operations like "interpret". For each pair of
type and operation, we need a specific implementation. Picture a table:

<img src="image/representing-code/table.png" alt="A table where rows are labeled with expression classes, and columns are function names." />

Rows are types, and columns are operations. Each cell represents the unique
piece of code to implement that operation on that type.

An object-oriented language like Java assumes that all of the code in one row
naturally hangs together. It figures all the things you do with a type are
likely related to each other, and the language makes it easy to define them
together as methods inside the same class.

<img src="image/representing-code/rows.png" alt="The table split into rows for each class." />

This makes it easy to extend the table by adding new rows. Simply define a new
class. No existing code has to be touched. But imagine if you want to add a new
*operation* -- a new column. In Java, that means cracking open each of those
existing classes and adding a method to it.

Functional paradigm languages in the <span name="ml">ML</span> family flip that
around. There, you don't have classes with methods. Types and functions are
totally distinct. To implement an operation for a number of different types, you
define a single function. In the body of that function, you use *pattern
matching* -- sort of a type-based switch on steroids -- to implement the
operation for each type all in one place.

<aside name="ml">

ML, short for "metalanguage" was created by Robin Milner and friends and forms
one of the main branches in the great programming language family tree. Its
children include SML, Caml, OCaml, Haskell, and F#. Even Scala, Rust, and Swift
bear a strong resemblance.

Much like Lisp, it is one of those languages that is so full of good ideas that
language designers today are still rediscovering them over forty years later.

</aside>

This makes it trivial to add new operations -- simply define another function
that pattern matches on all of the types.

<img src="image/representing-code/columns.png" alt="The table split into columns for each function." />

But, conversely, adding a new type is hard. You have to go back and add a new
case to all of the pattern matches in all of the existing functions.

Each style has a certain "grain" to it. That's what the paradigm name literally
says -- an object-oriented language wants you to *orient* your code along the
rows of types. A functional language instead encourages you to lump each
column's worth of code together into a *function*.

A bunch of smart language nerds noticed that neither style made it easy to add
*both* rows and columns to the <span name="multi">table</span>. They called this
difficulty the "expression problem" because -- like we are now -- they first ran
into it when they were trying to figure out the best way to model expression
syntax tree nodes in a compiler.

<aside name="multi">

Languages with *multimethods*, like Common Lisp's CLOS, Dylan, and Julia do
support adding both new types and operations easily. What they typically
sacrifice is either static type checking, or separate compilation.

</aside>

People have thrown all sorts of language features, design patterns, and
programming tricks to try to knock that problem down but no perfect language has
finished it off yet. In the meantime, the best we can do is try to pick a
language whose orientation matches the natural architectural seams in the
program we're writing.

Object-orientation works fine for many parts of our interpreter, but these tree
classes rub against the grain of Java. Fortunately, there's a design pattern we
can bring to bear on it.

### The Visitor pattern

The **Visitor pattern** is the most widely misunderstood pattern in all of
*Design Patterns*, which is really saying something when you look at the
software architecture excesses of the past couple of decades.

The trouble starts with terminology. The pattern isn't about "visiting", and the
"accept" method in it doesn't conjure up any helpful imagery either. Many think
the pattern has to do with traversing trees, which isn't the case at all. We
*are* going to use it on a set of classes that are tree-like, but that's a
coincidence. As you'll see, the pattern works as well on a single object.

The Visitor pattern is really about approximating the functional style within an
OOP language. It lets us add new columns to that table easily. We can define all
of the behavior for a new operation on a set of types in one place, without
having to touch the types themselves. It does this the same way we solve almost
every problem in computer science: by adding a layer of indirection.

Before we apply it to our auto-generated Expr classes, let's walk through a
simpler example. Say we have two kinds of pastries: <span
name="beignet">beignets</span> and crullers.

<aside name="beignet">

A beignet (pronounced "ben-yay", with equal emphasis on both syllables) is a
deep-fried pastry in the same family as doughnuts. When the French colonized
North America in the 1700s, they brought beignets with them. Today, in the US,
they are most strongly associated with the cuisine of New Orleans.

My preferred way to consume them is fresh out of the fryer at Café du Monde,
piled high in powdered sugar, and washed down with a cup of café au lait while I
watch tourists staggering around trying to shake off their hangover from the
previous night's revelry.

</aside>

^code pastries (no location)

We want to be able to define new pastry operations -- cooking them, eating them,
decorating them, etc. -- without having to add a new method to each class every
time. Here's how we do it. First, we define a separate interface.

^code pastry-visitor (no location)

<aside name="overload">

In *Design Patterns*, both of these methods are confusingly named `visit()`, and
they rely on overloading to distinguish them. This leads some readers to think
that the correct visit method is chosen *at runtime* based on its parameter
type. That isn't the case. Unlike over*riding*, over*loading* is statically
dispatched at compile time.

Using distinct names for each method makes the dispatch more obvious, and also
shows you how to apply this pattern in languages that don't support overloading.

</aside>

Each operation that can be performed on pastries is a new class that implements
that interface. It has a concrete method for each type of pastry. That keeps the
code for the operation on both types all nestled snugly together in one class.

Given some pastry, how do we route it to the correct method on the visitor based
on its type? Polymorphism to the rescue! We add this method to Pastry:

^code pastry-accept (1 before, 1 after, no location)

Each subclass implements it.

^code beignet-accept (1 before, 1 after, no location)

And:

^code cruller-accept (1 before, 1 after, no location)

To perform an operation on a pastry, we call its `accept()` method and pass in
the visitor for the operation we want to execute. The pastry -- the specific
subclass's overriding implementation of `accept()` -- turns around and calls the
appropriate visit method on the visitor and passes *itself* to it.

That's the heart of the trick right there. It lets us use polymorphic dispatch
on the *pastry* classes to select the appropriate method on the *visitor* class.
In the table, each pastry class is a row, but if you look at all of the methods
for a single visitor, they form a *column*.

<img src="image/representing-code/visitor.png" alt="Now all of the cells for one operation are part of the same class, the visitor." />

We added one `accept()` method to each class, and we can use it for as many
visitors as we want without ever having to touch the pastry classes again. It's
a clever pattern.

### Visitors for expressions

OK, let's weave it into our expression classes. We'll also <span
name="context">refine</span> the pattern a little. In the pastry example, the
visit and `accept()` methods don't return anything. In practice, visitors often
want to define operations that produce values. But what return type should
`accept()` have? We can't assume every visitor class wants to produce the same
type, so we'll use generics to let each implementation fill in a return type.

<aside name="context">

Another common refinement is an additional "context" parameter that is passed to
the visit methods and then sent back through as a parameter to `accept()`. That
lets operations take an additional parameter. The visitors we'll define in the
book don't need that, so I omitted it.

</aside>

First, we define the visitor interface. Again, we nest it inside the base class
so that we can keep everything in one file.

^code call-define-visitor (2 before, 1 after)

That function generates the visitor interface.

^code define-visitor

Here, we iterate through all of the subclasses and declare a visit method for
each one. When we define new expression types later, this will automatically
include them.

Inside the base class, we define the abstract `accept()` method.

^code base-accept-method (2 before, 1 after)

Finally, each subclass implements that and calls the right visit method for its
own type.

^code accept-method (1 before, 2 after)

There we go. Now we can define operations on expressions without having to muck
with the classes or our generator script. Compile and run this generator script
to output an updated "Expr.java" file. It contains a generated Visitor
interface and a set of expression node classes that support the Visitor pattern
using it.

Before we end this rambling chapter, let's implement that Visitor interface and
see the pattern in action.

## A (Not Very) Pretty Printer

When we debug our parser and interpreter, it's often useful to look at a parsed
syntax tree and make sure it has the structure we expect. We could inspect it in
the debugger, but that can be a chore.

Instead, we'd like some code that, given a syntax tree, produces an unambiguous
string representation of it. Converting a tree to a string is sort of the
opposite of a parser, and is often called "pretty printing" when the goal is to
produce a string of text that is valid syntax in the source language.

That's not our goal here. We want the string to very explicitly show the nesting
structure of the tree. A printer that returned `1 + 2 * 3` isn't super helpful
if what we're trying to debug is whether operator precedence is handled
correctly. We want to know if the `+` or `*` is at the top of the tree.

To that end, the string representation we produce isn't going to be Lox syntax.
Instead, it will look a lot like, well, Lisp. Each expression is explicitly
parenthesized, and all of its subexpressions and tokens are contained in that.

Given a syntax tree like:

<img src="image/representing-code/expression.png" alt="An example syntax tree." />

It produces:

```text
(* (- 123) (group 45.67))
```

Not exactly "pretty", but it does show the nesting and grouping explicitly. To
implement this, we define a new class.

^code ast-printer

As you can see, it implements the visitor interface. That means we need visit
methods for each of the expression types we have so far.

^code visit-methods (2 before, 1 after)

Literal expressions are easy -- they convert the value to a string with a little
check to handle Java's `null` standing in for Lox's `nil`. The other expressions
have subexpressions, so they use this `parenthesize()` helper method:

^code print-utilities

It takes a name and a list of subexpressions and wraps them all up in
parentheses, yielding a string like:

```text
(+ 1 2)
```

Note that it calls `accept()` on each subexpression and passes in itself. This
is the <span name="tree">recursive</span> step that lets us print an entire
tree.

<aside name="tree">

This recursion is also why people think the Visitor pattern itself has to do
with trees.

</aside>

We don't have a parser yet, so it's hard to see this in action. For now, we'll
hack together a little `main()` method that manually instantiates a tree and
prints it.

^code printer-main

If we did everything right, it prints:

```text
(* (- 123) (group 45.67))
```

You can go ahead and delete this method. We won't need it. Also, as we add new
syntax tree types, I won't bother showing the necessary visit methods for them
in AstPrinter. If you want to (and you want the Java compiler to not yell at
you), go ahead and add them yourself. It will come in handy in the next chapter
when we start parsing Lox code into syntax trees. Or, if you don't care to
maintain AstPrinter, feel free to delete it. We won't need it again.

<div class="challenges">

## Challenges

1.  Earlier, I said that the `|`, `*`, and `+` forms we added to our grammar
    metasyntax were just syntactic sugar. Take this grammar:

    ```ebnf
    expr → expr ( "(" ( expr ( "," expr )* )? ")" | "." IDENTIFIER )+
         | IDENTIFIER
         | NUMBER
    ```

    Produce a grammar that matches the same language but does not use any of
    that notational sugar.

    *Bonus:* What kind of expression does this bit of grammar encode?

1.  The Visitor pattern lets you emulate the functional style in an
    object-oriented language. Devise a complementary pattern for a functional
    language. It should let you bundle all of the operations on one type
    together and let you define new types easily.

    (SML or Haskell would be ideal for this exercise, but Scheme or another Lisp
    works as well.)

1.  In [reverse Polish notation][rpn] (RPN), the operands to an arithmetic
    operator are both placed before the operator, so `1 + 2` becomes `1 2 +`.
    Evaluation proceeds from left to right. Numbers are pushed onto an implicit
    stack. An arithmetic operator pops the top two numbers, performs the
    operation, and pushes the result. Thus, this:

    ```lox
    (1 + 2) * (4 - 3)
    ```

    in RPN becomes:

    ```lox
    1 2 + 4 3 - *
    ```

    Define a visitor class for our syntax tree classes that takes an expression,
    converts it to RPN, and returns the resulting string.

[rpn]: https://en.wikipedia.org/wiki/Reverse_Polish_notation

</div>


================================================
FILE: book/resolving-and-binding.md
================================================
> Once in a while you find yourself in an odd situation. You get into it by
> degrees and in the most natural way but, when you are right in the midst of
> it, you are suddenly astonished and ask yourself how in the world it all came
> about.
>
> <cite>Thor Heyerdahl, <em>Kon-Tiki</em></cite>

Oh, no! Our language implementation is taking on water! Way back when we [added
variables and blocks][statements], we had scoping nice and tight. But when we
[later added closures][functions], a hole opened in our formerly waterproof
interpreter. Most real programs are unlikely to slip through this hole, but as
language implementers, we take a sacred vow to care about correctness even in
the deepest, dampest corners of the semantics.

[statements]: statements-and-state.html
[functions]: functions.html

We will spend this entire chapter exploring that leak, and then carefully
patching it up. In the process, we will gain a more rigorous understanding of
lexical scoping as used by Lox and other languages in the C tradition. We'll
also get a chance to learn about *semantic analysis* -- a powerful technique for
extracting meaning from the user's source code without having to run it.

## Static Scope

A quick refresher: Lox, like most modern languages, uses *lexical* scoping. This
means that you can figure out which declaration a variable name refers to just
by reading the text of the program. For example:

```lox
var a = "outer";
{
  var a = "inner";
  print a;
}
```

Here, we know that the `a` being printed is the variable declared on the
previous line, and not the global one. Running the program doesn't -- *can't* --
affect this. The scope rules are part of the *static* semantics of the language,
which is why they're also called *static scope*.

I haven't spelled out those scope rules, but now is the time for <span
name="precise">precision</span>:

<aside name="precise">

This is still nowhere near as precise as a real language specification. Those
docs must be so explicit that even a Martian or an outright malicious programmer
would be forced to implement the correct semantics provided they followed the
letter of the spec.

That exactitude is important when a language may be implemented by competing
companies who want their product to be incompatible with the others to lock
customers onto their platform. For this book, we can thankfully ignore those
kinds of shady shenanigans.

</aside>

**A variable usage refers to the preceding declaration with the same name in the
innermost scope that encloses the expression where the variable is used.**

There's a lot to unpack in that:

*   I say "variable usage" instead of "variable expression" to cover both
    variable expressions and assignments. Likewise with "expression where the
    variable is used".

*   "Preceding" means appearing before *in the program text*.

    ```lox
    var a = "outer";
    {
      print a;
      var a = "inner";
    }
    ```

    Here, the `a` being printed is the outer one since it appears <span
    name="hoisting">before</span> the `print` statement that uses it. In most
    cases, in straight line code, the declaration preceding in *text* will also
    precede the usage in *time*. But that's not always true. As we'll see,
    functions may defer a chunk of code such that its *dynamic temporal*
    execution no longer mirrors the *static textual* ordering.

    <aside name="hoisting">

    In JavaScript, variables declared using `var` are implicitly "hoisted" to
    the beginning of the block. Any use of that name in the block will refer to
    that variable, even if the use appears before the declaration. When you
    write this in JavaScript:

    ```js
    {
      console.log(a);
      var a = "value";
    }
    ```

    It behaves like:

    ```js
    {
      var a; // Hoist.
      console.log(a);
      a = "value";
    }
    ```

    That means that in some cases you can read a variable before its initializer
    has run -- an annoying source of bugs. The alternate `let` syntax for
    declaring variables was added later to address this problem.

    </aside>

*   "Innermost" is there because of our good friend shadowing. There may be more
    than one variable with the given name in enclosing scopes, as in:

    ```lox
    var a = "outer";
    {
      var a = "inner";
      print a;
    }
    ```

    Our rule disambiguates this case by saying the innermost scope wins.

Since this rule makes no mention of any runtime behavior, it implies that a
variable expression always refers to the same declaration through the entire
execution of the program. Our interpreter so far *mostly* implements the rule
correctly. But when we added closures, an error snuck in.

```lox
var a = "global";
{
  fun showA() {
    print a;
  }

  showA();
  var a = "block";
  showA();
}
```

<span name="tricky">Before</span> you type this in and run it, decide what you
think it *should* print.

<aside name="tricky">

I know, it's a totally pathological, contrived program. It's just *weird*. No
reasonable person would ever write code like this. Alas, more of your life than
you'd expect will be spent dealing with bizarro snippets of code like this if
you stay in the programming language game for long.

</aside>

OK... got it? If you're familiar with closures in other languages, you'll expect
it to print "global" twice. The first call to `showA()` should definitely print
"global" since we haven't even reached the declaration of the inner `a` yet. And
by our rule that a variable expression always resolves to the same variable,
that implies the second call to `showA()` should print the same thing.

Alas, it prints:

```text
global
block
```

Let me stress that this program never reassigns any variable and contains only a
single `print` statement. Yet, somehow, that `print` statement for a
never-assigned variable prints two different values at different points in time.
We definitely broke something somewhere.

### Scopes and mutable environments

In our interpreter, environments are the dynamic manifestation of static scopes.
The two mostly stay in sync with each other -- we create a new environment when
we enter a new scope, and discard it when we leave the scope. There is one other
operation we perform on environments: binding a variable in one. This is where
our bug lies.

Let's walk through that problematic example and see what the environments look
like at each step. First, we declare `a` in the global scope.

<img src="image/resolving-and-binding/environment-1.png" alt="The global environment with 'a' defined in it." />

That gives us a single environment with a single variable in it. Then we enter
the block and execute the declaration of `showA()`.

<img src="image/resolving-and-binding/environment-2.png" alt="A block environment linking to the global one." />

We get a new environment for the block. In that, we declare one name, `showA`,
which is bound to the LoxFunction object we create to represent the function.
That object has a `closure` field that captures the environment where the
function was declared, so it has a reference back to the environment for the
block.

Now we call `showA()`.

<img src="image/resolving-and-binding/environment-3.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the global environment." />

The interpreter dynamically creates a new environment for the function body of
`showA()`. It's empty since that function doesn't declare any variables. The
parent of that environment is the function's closure -- the outer block
environment.

Inside the body of `showA()`, we print the value of `a`. The interpreter looks
up this value by walking the chain of environments. It gets all the way
to the global environment before finding it there and printing `"global"`.
Great.

Next, we declare the second `a`, this time inside the block.

<img src="image/resolving-and-binding/environment-4.png" alt="The block environment has both 'a' and 'showA' now." />

It's in the same block -- the same scope -- as `showA()`, so it goes into the
same environment, which is also the same environment `showA()`'s closure refers
to. This is where it gets interesting. We call `showA()` again.

<img src="image/resolving-and-binding/environment-5.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the block environment." />

We create a new empty environment for the body of `showA()` again, wire it up to
that closure, and run the body. When the interpreter walks the chain of
environments to find `a`, it now discovers the *new* `a` in the block
environment. Boo.

I chose to implement environments in a way that I hoped would agree with your
informal intuition around scopes. We tend to consider all of the code within a
block as being within the same scope, so our interpreter uses a single
environment to represent that. Each environment is a mutable hash table. When a
new local variable is declared, it gets added to the existing environment for
that scope.

That intuition, like many in life, isn't quite right. A block is not necessarily
all the same scope. Consider:

```lox
{
  var a;
  // 1.
  var b;
  // 2.
}
```

At the first marked line, only `a` is in scope. At the second line, both `a` and
`b` are. If you define a "scope" to be a set of declarations, then those are
clearly not the same scope -- they don't contain the same declarations. It's
like each `var` statement <span name="split">splits</span> the block into two
separate scopes, the scope before the variable is declared and the one after,
which includes the new variable.

<aside name="split">

Some languages make this split explicit. In Scheme and ML, when you declare a
local variable using `let`, you also delineate the subsequent code where the new
variable is in scope. There is no implicit "rest of the block".

</aside>

But in our implementation, environments do act like the entire block is one
scope, just a scope that changes over time. Closures do not like that. When a
function is declared, it captures a reference to the current environment. The
function *should* capture a frozen snapshot of the environment *as it existed at
the moment the function was declared*. But instead, in the Java code, it has a
reference to the actual mutable environment object. When a variable is later
declared in the scope that environment corresponds to, the closure sees the new
variable, even though the declaration does *not* precede the function.

### Persistent environments

There is a style of programming that uses what are called **persistent data
structures**. Unlike the squishy data structures you're familiar with in
imperative programming, a persistent data structure can never be directly
modified. Instead, any "modification" to an existing structure produces a <span
name="copy">brand</span> new object that contains all of the original data and
the new modification. The original is left unchanged.

<aside name="copy">

This sounds like it might waste tons of memory and time copying the structure
for each operation. In practice, persistent data structures share most of their
data between the different "copies".

</aside>

If we were to apply that technique to Environment, then every time you declared
a variable it would return a *new* environment that contained all of the
previously declared variables along with the one new name. Declaring a variable
would do the implicit "split" where you have an environment before the variable
is declared and one after:

<img src="image/resolving-and-binding/split.png" alt="Separate environments before and after the variable is declared." />

A closure retains a reference to the Environment instance in play when the
function was declared. Since any later declarations in that block would produce
new Environment objects, the closure wouldn't see the new variables and our bug
would be fixed.

This is a legit way to solve the problem, and it's the classic way to implement
environments in Scheme interpreters. We could do that for Lox, but it would mean
going back and changing a pile of existing code.

I won't drag you through that. We'll keep the way we represent environments the
same. Instead of making the data more statically structured, we'll bake the
static resolution into the access *operation* itself.

## Semantic Analysis

Our interpreter **resolves** a variable -- tracks down which declaration it
refers to -- each and every time the variable expression is evaluated. If that
variable is swaddled inside a loop that runs a thousand times, that variable
gets re-resolved a thousand times.

We know static scope means that a variable usage always resolves to the same
declaration, which can be determined just by looking at the text. Given that,
why are we doing it dynamically every time? Doing so doesn't just open the hole
that leads to our annoying bug, it's also needlessly slow.

A better solution is to resolve each variable use *once*. Write a chunk of code
that inspects the user's program, finds every variable mentioned, and figures
out which declaration each refers to. This process is an example of a **semantic
analysis**. Where a parser tells only if a program is grammatically correct (a
*syntactic* analysis), semantic analysis goes farther and starts to figure out
what pieces of the program actually mean. In this case, our analysis will
resolve variable bindings. We'll know not just that an expression *is* a
variable, but *which* variable it is.

There are a lot of ways we could store the binding between a variable and its
declaration. When we get to the C interpreter for Lox, we'll have a *much* more
efficient way of storing and accessing local variables. But for jlox, I want to
minimize the collateral damage we inflict on our existing codebase. I'd hate to
throw out a bunch of mostly fine code.

Instead, we'll store the resolution in a way that makes the most out of our
existing Environment class. Recall how the accesses of `a` are interpreted in
the problematic example.

<img src="image/resolving-and-binding/environment-3.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the global environment." />

In the first (correct) evaluation, we look at three environments in the chain
before finding the global declaration of `a`. Then, when the inner `a` is later
declared in a block scope, it shadows the global one.

<img src="image/resolving-and-binding/environment-5.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the block environment." />

The next lookup walks the chain, finds `a` in the *second* environment and
stops there. Each environment corresponds to a single lexical scope where
variables are declared. If we could ensure a variable lookup always walked the
*same* number of links in the environment chain, that would ensure that it
found the same variable in the same scope every time.

To "resolve" a variable usage, we only need to calculate how many "hops" away
the declared variable will be in the environment chain. The interesting question
is *when* to do this calculation -- or, put differently, where in our
interpreter's implementation do we stuff the code for it?

Since we're calculating a static property based on the structure of the source
code, the obvious answer is in the parser. That is the traditional home, and is
where we'll put it later in clox. It would work here too, but I want an excuse to
show you another technique. We'll write our resolver as a separate pass.

### A variable resolution pass

After the parser produces the syntax tree, but before the interpreter starts
executing it, we'll do a single walk over the tree to resolve all of the
variables it contains. Additional passes between parsing and execution are
common. If Lox had static types, we could slide a type checker in there.
Optimizations are often implemented in separate passes like this too. Basically,
any work that doesn't rely on state that's only available at runtime can be done
in this way.

Our variable resolution pass works like a sort of mini-interpreter. It walks the
tree, visiting each node, but a static analysis is different from a dynamic
execution:

*   **There are no side effects.** When the static analysis visits a print
    statement, it doesn't actually print anything. Calls to native functions or
    other operations that reach out to the outside world are stubbed out and
    have no effect.

*   **There is no control flow.** Loops are visited only <span
    name="fix">once</span>. Both branches are visited in `if` statements. Logic
    operators are not short-circuited.

<aside name="fix">

Variable resolution touches each node once, so its performance is *O(n)* where
*n* is the number of syntax tree nodes. More sophisticated analyses may have
greater complexity, but most are carefully designed to be linear or not far from
it. It's an embarrassing faux pas if your compiler gets exponentially slower as
the user's program grows.

</aside>

## A Resolver Class

Like everything in Java, our variable resolution pass is embodied in a class.

^code resolver

Since the resolver needs to visit every node in the syntax tree, it implements
the visitor abstraction we already have in place. Only a few kinds of nodes are
interesting when it comes to resolving variables:

*   A block statement introduces a new scope for the statements it contains.

*   A function declaration introduces a new scope for its body and binds its
    parameters in that scope.

*   A variable declaration adds a new variable to the current scope.

*   Variable and assignment expressions need to have their variables resolved.

The rest of the nodes don't do anything special, but we still need to implement
visit methods for them that traverse into their subtrees. Even though a `+`
expression doesn't *itself* have any variables to resolve, either of its
operands might.

### Resolving blocks

We start with blocks since they create the local scopes where all the magic
happens.

^code visit-block-stmt

This begins a new scope, traverses into the statements inside the block, and
then discards the scope. The fun stuff lives in those helper methods. We start
with the simple one.

^code resolve-statements

This walks a list of statements and resolves each one. It in turn calls:

^code resolve-stmt

While we're at it, let's add another overload that we'll need later for
resolving an expression.

^code resolve-expr

These methods are similar to the `evaluate()` and `execute()` methods in
Interpreter -- they turn around and apply the Visitor pattern to the given
syntax tree node.

The real interesting behavior is around scopes. A new block scope is created
like so:

^code begin-scope

Lexical scopes nest in both the interpreter and the resolver. They behave like a
stack. The interpreter implements that stack using a linked list -- the chain of
Environment objects. In the resolver, we use an actual Java Stack.

^code scopes-field (1 before, 2 after)

This field keeps track of the stack of scopes currently, uh, in scope. Each
element in the stack is a Map representing a single block scope. Keys, as in
Environment, are variable names. The values are Booleans, for a reason I'll
explain soon.

The scope stack is only used for local block scopes. Variables declared at the
top level in the global scope are not tracked by the resolver since they are
more dynamic in Lox. When resolving a variable, if we can't find it in the stack
of local scopes, we assume it must be global.

Since scopes are stored in an explicit stack, exiting one is straightforward.

^code end-scope

Now we can push and pop a stack of empty scopes. Let's put some things in them.

### Resolving variable declarations

Resolving a variable declaration adds a new entry to the current innermost
scope's map. That seems simple, but there's a little dance we need to do.

^code visit-var-stmt

We split binding into two steps, declaring then defining, in order to handle
funny edge cases like this:

```lox
var a = "outer";
{
  var a = a;
}
```

What happens when the initializer for a local variable refers to a variable with
the same name as the variable being declared? We have a few options:

1.  **Run the initializer, then put the new variable in scope.** Here, the new
    local `a` would be initialized with "outer", the value of the *global* one.
    In other words, the previous declaration would desugar to:

    ```lox
    var temp = a; // Run the initializer.
    var a;        // Declare the variable.
    a = temp;     // Initialize it.
    ```

2.  **Put the new variable in scope, then run the initializer.** This means you
    could observe a variable before it's initialized, so we would need to figure
    out what value it would have then. Probably `nil`. That means the new local
    `a` would be re-initialized to its own implicitly initialized value, `nil`.
    Now the desugaring would look like:

    ```lox
    var a; // Define the variable.
    a = a; // Run the initializer.
    ```

3.  **Make it an error to reference a variable in its initializer.** Have the
    interpreter fail either at compile time or runtime if an initializer
    mentions the variable being initialized.

Do either of those first two options look like something a user actually
*wants*? Shadowing is rare and often an error, so initializing a shadowing
variable based on the value of the shadowed one seems unlikely to be deliberate.

The second option is even less useful. The new variable will *always* have the
value `nil`. There is never any point in mentioning it by name. You could use an
explicit `nil` instead.

Since the first two options are likely to mask user errors, we'll take the
third. Further, we'll make it a compile error instead of a runtime one. That
way, the user is alerted to the problem before any code is run.

In order to do that, as we visit expressions, we need to know if we're inside
the initializer for some variable. We do that by splitting binding into two
steps. The first is **declaring** it.

^code declare

Declaration adds the variable to the innermost scope so that it shadows any
outer one and so that we know the variable exists. We mark it as "not ready yet"
by binding its name to `false` in the scope map. The value associated with a key
in the scope map represents whether or not we have finished resolving that
variable's initializer.

After declaring the variable, we resolve its initializer expression in that same
scope where the new variable now exists but is unavailable. Once the initializer
expression is done, the variable is ready for prime time. We do that by
**defining** it.

^code define

We set the variable's value in the scope map to `true` to mark it as fully
initialized and available for use. It's alive! 

### Resolving variable expressions

Variable declarations -- and function declarations, which we'll get to -- write
to the scope maps. Those maps are read when we resolve variable expressions.

^code visit-variable-expr

First, we check to see if the variable is being accessed inside its own
initializer. This is where the values in the scope map come into play. If the
variable exists in the current scope but its value is `false`, that means we
have declared it but not yet defined it. We report that error.

After that check, we actually resolve the variable itself using this helper:

^code resolve-local

This looks, for good reason, a lot like the code in Environment for evaluating a
variable. We start at the innermost scope and work outwards, looking in each map
for a matching name. If we find the variable, we resolve it, passing in the
number of scopes between the current innermost scope and the scope where the
variable was found. So, if the variable was found in the current scope, we
pass in 0. If it's in the immediately enclosing scope, 1. You get the idea.

If we walk through all of the block scopes and never find the variable, we leave
it unresolved and assume it's global. We'll get to the implementation of that
`resolve()` method a little later. For now, let's keep on cranking through the
other syntax nodes.

### Resolving assignment expressions

The other expression that references a variable is assignment. Resolving one
looks like this:

^code visit-assign-expr

First, we resolve the expression for the assigned value in case it also contains
references to other variables. Then we use our existing `resolveLocal()` method
to resolve the variable that's being assigned to.

### Resolving function declarations

Finally, functions. Functions both bind names and introduce a scope. The name of
the function itself is bound in the surrounding scope where the function is
declared. When we step into the function's body, we also bind its parameters
into that inner function scope.

^code visit-function-stmt

Similar to `visitVariableStmt()`, we declare and define the name of the function
in the current scope. Unlike variables, though, we define the name eagerly,
before resolving the function's body. This lets a function recursively refer to
itself inside its own body.

Then we resolve the function's body using this:

^code resolve-function

It's a separate method since we will also use it for resolving Lox methods when
we add classes later. It creates a new scope for the body and then binds
variables for each of the function's parameters.

Once that's ready, it resolves the function body in that scope. This is
different from how the interpreter handles function declarations. At *runtime*,
declaring a function doesn't do anything with the function's body. The body
doesn't get touched until later when the function is called. In a *static*
analysis, we immediately traverse into the body right then and there.

### Resolving the other syntax tree nodes

That covers the interesting corners of the grammars. We handle every place where
a variable is declared, read, or written, and every place where a scope is
created or destroyed. Even though they aren't affected by variable resolution,
we also need visit methods for all of the other syntax tree nodes in order to
recurse into their subtrees. <span name="boring">Sorry</span> this bit is
boring, but bear with me. We'll go kind of "top down" and start with statements.

<aside name="boring">

I did say the book would have every single line of code for these interpreters.
I didn't say they'd all be exciting.

</aside>

An expression statement contains a single expression to traverse.

^code visit-expression-stmt

An if statement has an expression for its condition and one or two statements
for the branches.

^code visit-if-stmt

Here, we see how resolution is different from interpretation. When we resolve an
`if` statement, there is no control flow. We resolve the condition and *both*
branches. Where a dynamic execution steps only into the branch that *is* run, a
static analysis is conservative -- it analyzes any branch that *could* be run.
Since either one could be reached at runtime, we resolve both.

Like expression statements, a `print` statement contains a single subexpression.

^code visit-print-stmt

Same deal for return.

^code visit-return-stmt

As in `if` statements, with a `while` statement, we resolve its condition and
resolve the body exactly once.

^code visit-while-stmt

That covers all the statements. On to expressions...

Our old friend the binary expression. We traverse into and resolve both
operands.

^code visit-binary-expr

Calls are similar -- we walk the argument list and resolve them all. The thing
being called is also an expression (usually a variable expression), so that gets
resolved too.

^code visit-call-expr

Parentheses are easy.

^code visit-grouping-expr

Literals are easiest of all.

^code visit-literal-expr

A literal expression doesn't mention any variables and doesn't contain any
subexpressions so there is no work to do.

Since a static analysis does no control flow or short-circuiting, logical
expressions are exactly the same as other binary operators.

^code visit-logical-expr

And, finally, the last node. We resolve its one operand.

^code visit-unary-expr

With all of these visit methods, the Java compiler should be satisfied that
Resolver fully implements Stmt.Visitor and Expr.Visitor. Now is a good time to
take a break, have a snack, maybe a little nap.

## Interpreting Resolved Variables

Let's see what our resolver is good for. Each time it visits a variable, it
tells the interpreter how many scopes there are between the current scope and
the scope where the variable is defined. At runtime, this corresponds exactly to
the number of *environments* between the current one and the enclosing one where
the interpreter can find the variable's value. The resolver hands that number to
the interpreter by calling this:

^code resolve

We want to store the resolution information somewhere so we can use it when the
variable or assignment expression is later executed, but where? One obvious
place is right in the syntax tree node itself. That's a fine approach, and
that's where many compilers store the results of analyses like this.

We could do that, but it would require mucking around with our syntax tree
generator. Instead, we'll take another common approach and store it off to the
<span name="side">side</span> in a map that associates each syntax tree node
with its resolved data.

<aside name="side">

I *think* I've heard this map called a "side table" since it's a tabular data
structure that stores data separately from the objects it relates to. But
whenever I try to Google for that term, I get pages about furniture.

</aside>

Interactive tools like IDEs often incrementally reparse and re-resolve parts of
the user's program. It may be hard to find all of the bits of state that need
recalculating when they're hiding in the foliage of the syntax tree. A benefit
of storing this data outside of the nodes is that it makes it easy to *discard*
it -- simply clear the map.

^code locals-field (1 before, 2 after)

You might think we'd need some sort of nested tree structure to avoid getting
confused when there are multiple expressions that reference the same variable,
but each expression node is its own Java object with its own unique identity. A
single monolithic map doesn't have any trouble keeping them separated.

As usual, using a collection requires us to import a couple of names.

^code import-hash-map (1 before, 1 after)

And:

^code import-map (1 before, 2 after)

### Accessing a resolved variable

Our interpreter now has access to each variable's resolved location. Finally, we
get to make use of that. We replace the visit method for variable expressions
with this:

^code call-look-up-variable (1 before, 1 after)

That delegates to:

^code look-up-variable

There are a couple of things going on here. First, we look up the resolved
distance in the map. Remember that we resolved only *local* variables. Globals
are treated specially and don't end up in the map (hence the name `locals`). So,
if we don't find a distance in the map, it must be global. In that case, we
look it up, dynamically, directly in the global environment. That throws a
runtime error if the variable isn't defined.

If we *do* get a distance, we have a local variable, and we get to take
advantage of the results of our static analysis. Instead of calling `get()`, we
call this new method on Environment:

^code get-at

The old `get()` method dynamically walks the chain of enclosing environments,
scouring each one to see if the variable might be hiding in there somewhere. But
now we know exactly which environment in the chain will have the variable. We
reach it using this helper method:

^code ancestor

This walks a fixed number of hops up the parent chain and returns the
environment there. Once we have that, `getAt()` simply returns the value of the
variable in that environment's map. It doesn't even have to check to see if the
variable is there -- we know it will be because the resolver already found it
before.

<aside name="coupled">

The way the interpreter assumes the variable is in that map feels like flying
blind. The interpreter code trusts that the resolver did its job and resolved
the variable correctly. This implies a deep coupling between these two classes.
In the resolver, each line of code that touches a scope must have its exact
match in the interpreter for modifying an environment.

I felt that coupling firsthand because as I wrote the code for the book, I
ran into a couple of subtle bugs where the resolver and interpreter code were
slightly out of sync. Tracking those down was difficult. One tool to make that
easier is to have the interpreter explicitly assert -- using Java's assert
statements or some other validation tool -- the contract it expects the resolver
to have already upheld.

</aside>

### Assigning to a resolved variable

We can also use a variable by assigning to it. The changes to visiting an
assignment expression are similar.

^code resolved-assign (2 before, 1 after)

Again, we look up the variable's scope distance. If not found, we assume it's
global and handle it the same way as before. Otherwise, we call this new method:

^code assign-at

As `getAt()` is to `get()`, `assignAt()` is to `assign()`. It walks a fixed
number of environments, and then stuffs the new value in that map.

Those are the only changes to Interpreter. This is why I chose a representation
for our resolved data that was minimally invasive. All of the rest of the nodes
continue working as they did before. Even the code for modifying environments is
unchanged.

### Running the resolver

We do need to actually *run* the resolver, though. We insert the new pass after
the parser does its magic.

^code create-resolver (3 before, 1 after)

We don't run the resolver if there are any parse errors. If the code has a
syntax error, it's never going to run, so there's little value in resolving it.
If the syntax is clean, we tell the resolver to do its thing. The resolver has a
reference to the interpreter and pokes the resolution data directly into it as
it walks over variables. When the interpreter runs next, it has everything it
needs.

At least, that's true if the resolver *succeeds*. But what about errors during
resolution?

## Resolution Errors

Since we are doing a semantic analysis pass, we have an opportunity to make
Lox's semantics more precise, and to help users catch bugs early before running
their code. Take a look at this bad boy:

```lox
fun bad() {
  var a = "first";
  var a = "second";
}
```

We do allow declaring multiple variables with the same name in the *global*
scope, but doing so in a local scope is probably a mistake. If they knew the
variable already existed, they would have assigned to it instead of using `var`.
And if they *didn't* know it existed, they probably didn't intend to overwrite
the previous one.

We can detect this mistake statically while resolving.

^code duplicate-variable (1 before, 1 after)

When we declare a variable in a local scope, we already know the names of every
variable previously declared in that same scope. If we see a collision, we
report an error.

### Invalid return errors

Here's another nasty little script:

```lox
return "at top level";
```

This executes a `return` statement, but it's not even inside a function at all.
It's top-level code. I don't know what the user *thinks* is going to happen, but
I don't think we want Lox to allow this.

We can extend the resolver to detect this statically. Much like we track scopes
as we walk the tree, we can track whether or not the code we are currently
visiting is inside a function declaration.

^code function-type-field (1 before, 2 after)

Instead of a bare Boolean, we use this funny enum:

^code function-type

It seems kind of dumb now, but we'll add a couple more cases to it later and
then it will make more sense. When we resolve a function declaration, we pass
that in.

^code pass-function-type (2 before, 1 after)

Over in `resolveFunction()`, we take that parameter and store it in the field
before resolving the body.

^code set-current-function (1 after)

We stash the previous value of the field in a local variable first. Remember,
Lox has local functions, so you can nest function declarations arbitrarily
deeply. We need to track not just that we're in a function, but *how many* we're
in.

We could use an explicit stack of FunctionType values for that, but instead
we'll piggyback on the JVM. We store the previous value in a local on the Java
stack. When we're done resolving the function body, we restore the field to that
value.

^code restore-current-function (1 before, 1 after)

Now that we can always tell whether or not we're inside a function declaration,
we check that when resolving a `return` statement.

^code return-from-top (1 before, 1 after)

Neat, right?

There's one more piece. Back in the main Lox class that stitches everything
together, we are careful to not run the interpreter if any parse errors are
encountered. That check runs *before* the resolver so that we don't try to
resolve syntactically invalid code.

But we also need to skip the interpreter if there are resolution errors, so we
add *another* check.

^code resolution-error (1 before, 2 after)

You could imagine doing lots of other analysis in here. For example, if we added
`break` statements to Lox, we would probably want to ensure they are only used
inside loops.

We could go farther and report warnings for code that isn't necessarily *wrong*
but probably isn't useful. For example, many IDEs will warn if you have
unreachable code after a `return` statement, or a local variable whose value is
never read. All of that would be pretty easy to add to our static visiting pass,
or as <span name="separate">separate</span> passes.

<aside name="separate">

The choice of how many different analyses to lump into a single pass is
difficult. Many small isolated passes, each with their own responsibility, are
simpler to implement and maintain. However, there is a real runtime cost to
traversing the syntax tree itself, so bundling multiple analyses into a single
pass is usually faster.

</aside>

But, for now, we'll stick with that limited amount of analysis. The important
part is that we fixed that one weird annoying edge case bug, though it might be
surprising that it took this much work to do it.

<div class="challenges">

## Challenges

1.  Why is it safe to eagerly define the variable bound to a function's name
    when other variables must wait until after they are initialized before they
    can be used?

1.  How do other languages you know handle local variables that refer to the
    same name in their initializer, like:

    ```lox
    var a = "outer";
    {
      var a = a;
    }
    ```

    Is it a runtime error? Compile error? Allowed? Do they treat global
    variables differently? Do you agree with their choices? Justify your answer.

1.  Extend the resolver to report an error if a local variable is never used.

1.  Our resolver calculates *which* environment the variable is found in, but
    it's still looked up by name in that map. A more efficient environment
    representation would store local variables in an array and look them up by
    index.

    Extend the resolver to associate a unique index for each local variable
    declared in a scope. When resolving a variable access, look up both the
    scope the variable is in and its index and store that. In the interpreter,
    use that to quickly access a variable by its index instead of using a map.

</div>


================================================
FILE: book/scanning-on-demand.md
================================================
> Literature is idiosyncratic arrangements in horizontal lines in only
> twenty-six phonetic symbols, ten Arabic numbers, and about eight punctuation
> marks.
>
> <cite>Kurt Vonnegut, <em>Like Shaking Hands With God: A Conversation about Writing</em></cite>

Our second interpreter, clox, has three phases -- scanner, compiler, and virtual
machine. A data structure joins each pair of phases. Tokens flow from scanner to
compiler, and chunks of bytecode from compiler to VM. We began our
implementation near the end with [chunks][] and the [VM][]. Now, we're going to
hop back to the beginning and build a scanner that makes tokens. In the
[next chapter][], we'll tie the two ends together with our bytecode compiler.

[chunks]: chunks-of-bytecode.html
[vm]: a-virtual-machine.html
[next chapter]: compiling-expressions.html

<img src="image/scanning-on-demand/pipeline.png" alt="Source code &rarr; scanner &rarr; tokens &rarr; compiler &rarr; bytecode chunk &rarr; VM." />

I'll admit, this is not the most exciting chapter in the book. With two
implementations of the same language, there's bound to be some redundancy. I did
sneak in a few interesting differences compared to jlox's scanner. Read on to
see what they are.

## Spinning Up the Interpreter

Now that we're building the front end, we can get clox running like a real
interpreter. No more hand-authored chunks of bytecode. It's time for a REPL and
script loading. Tear out most of the code in `main()` and replace it with:

^code args (3 before, 2 after)

If you pass <span name="args">no arguments</span> to the executable, you are
dropped into the REPL. A single command line argument is understood to be the
path to a script to run.

<aside name="args">

The code tests for one and two arguments, not zero and one, because the first
argument in `argv` is always the name of the executable being run.

</aside>

We'll need a few system headers, so let's get them all out of the way.

^code main-includes (1 after)

Next, we get the REPL up and REPL-ing.

^code repl (1 before)

A quality REPL handles input that spans multiple lines gracefully and doesn't
have a hardcoded line length limit. This REPL here is a little more, ahem,
austere, but it's fine for our purposes.

The real work happens in `interpret()`. We'll get to that soon, but first let's
take care of loading scripts.

^code run-file

We read the file and execute the resulting string of Lox source code. Then,
based on the result of that, we set the exit code appropriately because we're
scrupulous tool builders and care about little details like that.

We also need to free the source code string because `readFile()` dynamically
allocates it and passes ownership to its caller. That function looks like this:

<aside name="owner">

C asks us not just to manage memory explicitly, but *mentally*. We programmers
have to remember the ownership rules and hand-implement them throughout the
program. Java just does it for us. C++ gives us tools to encode the policy
directly so that the compiler validates it for us.

I like C's simplicity, but we pay a real price for it -- the language requires
us to be more conscientious.

</aside>

^code read-file

Like a lot of C code, it takes more effort than it seems like it should,
especially for a language expressly designed for operating systems. The
difficult part is that we want to allocate a big enough string to read the whole
file, but we don't know how big the file is until we've read it.

The code here is the classic trick to solve that. We open the file, but before
reading it, we seek to the very end using `fseek()`. Then we call `ftell()`
which tells us how many bytes we are from the start of the file. Since we seeked
(sought?) to the end, that's the size. We rewind back to the beginning, allocate
a string of that <span name="one">size</span>, and read the whole file in a
single batch.

<aside name="one">

Well, that size *plus one*. Always gotta remember to make room for the null
byte.

</aside>

So we're done, right? Not quite. These function calls, like most calls in the C
standard library, can fail. If this were Java, the failures would be thrown as
exceptions and automatically unwind the stack so we wouldn't *really* need to
handle them. In C, if we don't check for them, they silently get ignored.

This isn't really a book on good C programming practice, but I hate to encourage
bad style, so let's go ahead and handle the errors. It's good for us, like
eating our vegetables or flossing.

Fortunately, we don't need to do anything particularly clever if a failure
occurs. If we can't correctly read the user's script, all we can really do is
tell the user and exit the interpreter gracefully. First of all, we might fail
to open the file.

^code no-file (1 before, 2 after)

This can happen if the file doesn't exist or the user doesn't have access to it.
It's pretty common -- people mistype paths all the time.

This failure is much rarer:

^code no-buffer (1 before, 1 after)

If we can't even allocate enough memory to read the Lox script, the user's
probably got bigger problems to worry about, but we should do our best to at
least let them know.

Finally, the read itself may fail.

^code no-read (1 before, 1 after)

This is also unlikely. Actually, the <span name="printf"> calls</span> to
`fseek()`, `ftell()`, and `rewind()` could theoretically fail too, but let's not
go too far off in the weeds, shall we?

<aside name="printf">

Even good old `printf()` can fail. Yup. How many times have you handled *that*
error?

</aside>

### Opening the compilation pipeline

We've got ourselves a string of Lox source code, so now we're ready to set up a
pipeline to scan, compile, and execute it. It's driven by `interpret()`. Right
now, that function runs our old hardcoded test chunk. Let's change it to
something closer to its final incarnation.

^code vm-interpret-h (1 before, 1 after)

Where before we passed in a Chunk, now we pass in the string of source code.
Here's the new implementation:

^code vm-interpret-c (1 after)

We won't build the actual *compiler* yet in this chapter, but we can start
laying out its structure. It lives in a new module.

^code vm-include-compiler (1 before, 1 after)

For now, the one function in it is declared like so:

^code compiler-h

That signature will change, but it gets us going.

The first phase of compilation is scanning -- the thing we're doing in this
chapter -- so right now all the compiler does is set that up.

^code compiler-c

This will also grow in later chapters, naturally.

### The scanner scans

There are still a few more feet of scaffolding to stand up before we can start
writing useful code. First, a new header:

^code scanner-h

And its corresponding implementation:

^code scanner-c

As our scanner chews through the user's source code, it tracks how far it's
gone. Like we did with the VM, we wrap that state in a struct and then create a
single top-level module variable of that type so we don't have to pass it around
all of the various functions.

There are surprisingly few fields. The `start` pointer marks the beginning of
the current lexeme being scanned, and `current` points to the current character
being looked at.

<span name="fields"></span>

<img src="image/scanning-on-demand/fields.png" alt="The start and current fields pointing at 'print bacon;'. Start points at 'b' and current points at 'o'." />

<aside name="fields">

Here, we are in the middle of scanning the identifier `bacon`. The current
character is `o` and the character we most recently consumed is `c`.

</aside>

We have a `line` field to track what line the current lexeme is on for error
reporting. That's it! We don't even keep a pointer to the beginning of the
source code string. The scanner works its way through the code once and is done
after that.

Since we have some state, we should initialize it.

^code init-scanner

We start at the very first character on the very first line, like a runner
crouched at the starting line.

## A Token at a Time

In jlox, when the starting gun went off, the scanner raced ahead and eagerly
scanned the whole program, returning a list of tokens. This would be a challenge
in clox. We'd need some sort of growable array or list to store the tokens in.
We'd need to manage allocating and freeing the tokens, and the collection
itself. That's a lot of code, and a lot of memory churn.

At any point in time, the compiler needs only one or two tokens -- remember our
grammar requires only a single token of lookahead -- so we don't need to keep
them *all* around at the same time. Instead, the simplest solution is to not
scan a token until the compiler needs one. When the scanner provides one, it
returns the token by value. It doesn't need to dynamically allocate anything --
it can just pass tokens around on the C stack.

Unfortunately, we don't have a compiler yet that can ask the scanner for tokens,
so the scanner will just sit there doing nothing. To kick it into action, we'll
write some temporary code to drive it.

^code dump-tokens (1 before, 1 after)

<aside name="format">

That `%.*s` in the format string is a neat feature. Usually, you set the output
precision -- the number of characters to show -- by placing a number inside the
format string. Using `*` instead lets you pass the precision as an argument. So
that `printf()` call prints the first `token.length` characters of the string at
`token.start`. We need to limit the length like that because the lexeme points
into the original source string and doesn't have a terminator at the end.

</aside>

This loops indefinitely. Each turn through the loop, it scans one token and
prints it. When it reaches a special "end of file" token or an error, it stops.
For example, if we run the interpreter on this program:

```lox
print 1 + 2;
```

It prints out:

```text
   1 31 'print'
   | 21 '1'
   |  7 '+'
   | 21 '2'
   |  8 ';'
   2 39 ''
```

The first column is the line number, the second is the numeric value of the
token <span name="token">type</span>, and then finally the lexeme. That last
empty lexeme on line 2 is the EOF token.

<aside name="token">

Yeah, the raw index of the token type isn't exactly human readable, but it's all
C gives us.

</aside>

The goal for the rest of the chapter is to make that blob of code work by
implementing this key function:

^code scan-token-h (1 before, 2 after)

Each call scans and returns the next token in the source code. A token looks
like this:

^code token-struct (1 before, 2 after)

It's pretty similar to jlox's Token class. We have an enum identifying what type
of token it is -- number, identifier, `+` operator, etc. The enum is virtually
identical to the one in jlox, so let's just hammer out the whole thing.

^code token-type (2 before, 2 after)

Aside from prefixing all the names with `TOKEN_` (since C tosses enum names in
the top-level namespace) the only difference is that extra `TOKEN_ERROR` type.
What's that about?

There are only a couple of errors that get detected during scanning:
unterminated strings and unrecognized characters. In jlox, the scanner reports
those itself. In clox, the scanner produces a synthetic "error" token for that
error and passes it over to the compiler. This way, the compiler knows an error
occurred and can kick off error recovery before reporting it.

The novel part in clox's Token type is how it represents the lexeme. In jlox,
each Token stored the lexeme as its own separate little Java string. If we did
that for clox, we'd have to figure out how to manage the memory for those
strings. That's especially hard since we pass tokens by value
-- multiple tokens could point to the same lexeme string. Ownership gets weird.

Instead, we use the original source string as our character store. We represent
a lexeme by a pointer to its first character and the number of characters it
contains. This means we don't need to worry about managing memory for lexemes at
all and we can freely copy tokens around. As long as the main source code string
<span name="outlive">outlives</span> all of the tokens, everything works fine.

<aside name="outlive">

I don't mean to sound flippant. We really do need to think about and ensure that
the source string, which is created far away over in the "main" module, has a
long enough lifetime. That's why `runFile()` doesn't free the string until
`interpret()` finishes executing the code and returns.

</aside>

### Scanning tokens

We're ready to scan some tokens. We'll work our way up to the complete
implementation, starting with this:

^code scan-token

Since each call to this function scans a complete token, we know we are at the
beginning of a new token when we enter the function. Thus, we set
`scanner.start` to point to the current character so we remember where the
lexeme we're about to scan starts.

Then we check to see if we've reached the end of the source code. If so, we
return an EOF token and stop. This is a sentinel value that signals to the
compiler to stop asking for more tokens.

If we aren't at the end, we do some... stuff... to scan the next token. But we
haven't written that code yet. We'll get to that soon. If that code doesn't
successfully scan and return a token, then we reach the end of the function.
That must mean we're at a character that the scanner can't recognize, so we
return an error token for that.

This function relies on a couple of helpers, most of which are familiar from
jlox. First up:

^code is-at-end

We require the source string to be a good null-terminated C string. If the
current character is the null byte, then we've reached the end.

To create a token, we have this constructor-like function:

^code make-token

It uses the scanner's `start` and `current` pointers to capture the token's
lexeme. It sets a couple of other obvious fields then returns the token. It has
a sister function for returning error tokens.

^code error-token

<span name="axolotl"></span>

<aside name="axolotl">

This part of the chapter is pretty dry, so here's a picture of an axolotl.

<img src="image/scanning-on-demand/axolotl.png" alt="A drawing of an axolotl." />

</aside>

The only difference is that the "lexeme" points to the error message string
instead of pointing into the user's source code. Again, we need to ensure that
the error message sticks around long enough for the compiler to read it. In
practice, we only ever call this function with C string literals. Those are
constant and eternal, so we're fine.

What we have now is basically a working scanner for a language with an empty
lexical grammar. Since the grammar has no productions, every character is an
error. That's not exactly a fun language to program in, so let's fill in the
rules.

## A Lexical Grammar for Lox

The simplest tokens are only a single character. We recognize those like so:

^code scan-char (1 before, 2 after)

We read the next character from the source code, and then do a straightforward
switch to see if it matches any of Lox's one-character lexemes. To read the next
character, we use a new helper which consumes the current character and returns
it.

^code advance

Next up are the two-character punctuation tokens like `!=` and `>=`. Each of
these also has a corresponding single-character token. That means that when we
see a character like `!`, we don't know if we're in a `!` token or a `!=` until
we look at the next character too. We handle those like so:

^code two-char (1 before, 1 after)

After consuming the first character, we look for an `=`. If found, we consume it
and return the corresponding two-character token. Otherwise, we leave the
current character alone (so it can be part of the *next* token) and return the
appropriate one-character token.

That logic for conditionally consuming the second character lives here:

^code match

If the current character is the desired one, we advance and return `true`.
Otherwise, we return `false` to indicate it wasn't matched.

Now our scanner supports all of the punctuation-like tokens. Before we get to
the longer ones, let's take a little side trip to handle characters that aren't
part of a token at all.

### Whitespace

Our scanner needs to handle spaces, tabs, and newlines, but those characters
don't become part of any token's lexeme. We could check for those inside the
main character switch in `scanToken()` but it gets a little tricky to ensure
that the function still correctly finds the next token *after* the whitespace
when you call it. We'd have to wrap the whole body of the function in a loop or
something.

Instead, before starting the token, we shunt off to a separate function.

^code call-skip-whitespace (1 before, 1 after)

This advances the scanner past any leading whitespace. After this call returns,
we know the very next character is a meaningful one (or we're at the end of the
source code).

^code skip-whitespace

It's sort of a separate mini-scanner. It loops, consuming every whitespace
character it encounters. We need to be careful that it does *not* consume any
*non*-whitespace characters. To support that, we use this:

^code peek

This simply returns the current character, but doesn't consume it. The previous
code handles all the whitespace characters except for newlines.

^code newline (1 before, 2 after)

When we consume one of those, we also bump the current line number.

### Comments

Comments aren't technically "whitespace", if you want to get all precise with
your terminology, but as far as Lox is concerned, they may as well be, so we
skip those too.

^code comment (1 before, 2 after)

Comments start with `//` in Lox, so as with `!=` and friends, we need a second
character of lookahead. However, with `!=`, we still wanted to consume the `!`
even if the `=` wasn't found. Comments are different. If we don't find a second
`/`, then `skipWhitespace()` needs to not consume the *first* slash either.

To handle that, we add:

^code peek-next

This is like `peek()` but for one character past the current one. If the current
character and the next one are both `/`, we consume them and then any other
characters until the next newline or the end of the source code.

We use `peek()` to check for the newline but not consume it. That way, the
newline will be the current character on the next turn of the outer loop in
`skipWhitespace()` and we'll recognize it and increment `scanner.line`.

### Literal tokens

Number and string tokens are special because they have a runtime value
associated with them. We'll start with strings because they are easy to
recognize -- they always begin with a double quote.

^code scan-string (1 before, 1 after)

That calls a new function.

^code string

Similar to jlox, we consume characters until we reach the closing quote. We also
track newlines inside the string literal. (Lox supports multi-line strings.)
And, as ever, we gracefully handle running out of source code before we find the
end quote.

The main change here in clox is something that's *not* present. Again, it
relates to memory management. In jlox, the Token class had a field of type
Object to store the runtime value converted from the literal token's lexeme.

Implementing that in C would require a lot of work. We'd need some sort of union
and type tag to tell whether the token contains a string or double value. If
it's a string, we'd need to manage the memory for the string's character array
somehow.

Instead of adding that complexity to the scanner, we defer <span
name="convert">converting</span> the literal lexeme to a runtime value until
later. In clox, tokens only store the lexeme -- the character sequence exactly
as it appears in the user's source code. Later in the compiler, we'll convert
that lexeme to a runtime value right when we are ready to store it in the
chunk's constant table.

<aside name="convert">

Doing the lexeme-to-value conversion in the compiler does introduce some
redundancy. The work to scan a number literal is awfully similar to the work
required to convert a sequence of digit characters to a number value. But there
isn't *that* much redundancy, it isn't in anything performance critical, and it
keeps our scanner simpler.

</aside>

Next up, numbers. Instead of adding a switch case for each of the ten digits
that can start a number, we handle them here:

^code scan-number (1 before, 2 after)

That uses this obvious utility function:

^code is-digit

We finish scanning the number using this:

^code number

It's virtually identical to jlox's version except, again, we don't convert the
lexeme to a double yet.

## Identifiers and Keywords

The last batch of tokens are identifiers, both user-defined and reserved. This
section should be fun -- the way we recognize keywords in clox is quite
different from how we did it in jlox, and touches on some important data
structures.

First, though, we have to scan the lexeme. Names start with a letter or
underscore.

^code scan-identifier (1 before, 1 after)

We recognize those using this:

^code is-alpha

Once we've found an identifier, we scan the rest of it here:

^code identifier

After the first letter, we allow digits too, and we keep consuming alphanumerics
until we run out of them. Then we produce a token with the proper type.
Determining that "proper" type is the unique part of this chapter.

^code identifier-type

Okay, I guess that's not very exciting yet. That's what it looks like if we
have no reserved words at all. How should we go about recognizing keywords? In
jlox, we stuffed them all in a Java Map and looked them up by name. We don't
have any sort of hash table structure in clox, at least not yet.

A hash table would be overkill anyway. To look up a string in a hash <span
name="hash">table</span>, we need to walk the string to calculate its hash code,
find the corresponding bucket in the hash table, and then do a
character-by-character equality comparison on any string it happens to find
there.

<aside name="hash">

Don't worry if this is unfamiliar to you. When we get to [building our own hash
table from scratch][hash], we'll learn all about it in exquisite detail.

[hash]: hash-tables.html

</aside>

Let's say we've scanned the identifier "gorgonzola". How much work *should* we
need to do to tell if that's a reserved word? Well, no Lox keyword starts with
"g", so looking at the first character is enough to definitively answer no.
That's a lot simpler than a hash table lookup.

What about "cardigan"? We do have a keyword in Lox that starts with "c":
"class". But the second character in "cardigan", "a", rules that out. What about
"forest"? Since "for" is a keyword, we have to go farther in the string before
we can establish that we don't have a reserved word. But, in most cases, only a
character or two is enough to tell we've got a user-defined name on our hands.
We should be able to recognize that and fail fast.

Here's a visual representation of that branching character-inspection logic:

<span name="down"></span>

<img src="image/scanning-on-demand/keywords.png" alt="A trie that contains all of Lox's keywords." />

<aside name="down">

Read down each chain of nodes and you'll see Lox's keywords emerge.

</aside>

We start at the root node. If there is a child node whose letter matches the
first character in the lexeme, we move to that node. Then repeat for the next
letter in the lexeme and so on. If at any point the next letter in the lexeme
doesn't match a child node, then the identifier must not be a keyword and we
stop. If we reach a double-lined box, and we're at the last character of the
lexeme, then we found a keyword.

### Tries and state machines

This tree diagram is an example of a thing called a <span
name="trie">[**trie**][trie]</span>. A trie stores a set of strings. Most other
data structures for storing strings contain the raw character arrays and then
wrap them inside some larger construct that helps you search faster. A trie is
different. Nowhere in the trie will you find a whole string.

[trie]: https://en.wikipedia.org/wiki/Trie

<aside name="trie">

"Trie" is one of the most confusing names in CS. Edward Fredkin yanked it out of
the middle of the word "retrieval", which means it should be pronounced like
"tree". But, uh, there is already a pretty important data structure pronounced
"tree" *which tries are a special case of*, so unless you never speak of these
things out loud, no one can tell which one you're talking about. Thus, people
these days often pronounce it like "try" to avoid the headache.

</aside>

Instead, each string the trie "contains" is represented as a *path* through the
tree of character nodes, as in our traversal above. Nodes that match the last
character in a string have a special marker -- the double lined boxes in the
illustration. That way, if your trie contains, say, "banquet" and "ban", you are
able to tell that it does *not* contain "banque" -- the "e" node won't have that
marker, while the "n" and "t" nodes will.

Tries are a special case of an even more fundamental data structure: a
[**deterministic finite automaton**][dfa] (**DFA**). You might also know these
by other names: **finite state machine**, or just **state machine**. State
machines are rad. They end up useful in everything from [game
programming][state] to implementing networking protocols.

[dfa]: https://en.wikipedia.org/wiki/Deterministic_finite_automaton
[state]: http://gameprogrammingpatterns.com/state.html

In a DFA, you have a set of *states* with *transitions* between them, forming a
graph. At any point in time, the machine is "in" exactly one state. It gets to
other states by following transitions. When you use a DFA for lexical analysis,
each transition is a character that gets matched from the string. Each state
represents a set of allowed characters.

Our keyword tree is exactly a DFA that recognizes Lox keywords. But DFAs are
more powerful than simple trees because they can be arbitrary *graphs*.
Transitions can form cycles between states. That lets you recognize arbitrarily
long strings. For example, here's a DFA that recognizes number literals:

<span name="railroad"></span>

<img src="image/scanning-on-demand/numbers.png" alt="A syntax diagram that recognizes integer and floating point literals." />

<aside name="railroad">

This style of diagram is called a [**syntax diagram**][syntax diagram] or the
more charming **railroad diagram**. The latter name is because it looks
something like a switching yard for trains.

Back before Backus-Naur Form was a thing, this was one of the predominant ways
of documenting a language's grammar. These days, we mostly use text, but there's
something delightful about the official specification for a *textual language*
relying on an *image*.

[syntax diagram]: https://en.wikipedia.org/wiki/Syntax_diagram

</aside>

I've collapsed the nodes for the ten digits together to keep it more readable,
but the basic process works the same -- you work through the path, entering
nodes whenever you consume a corresponding character in the lexeme. If we were
so inclined, we could construct one big giant DFA that does *all* of the lexical
analysis for Lox, a single state machine that recognizes and spits out all of
the tokens we need.

However, crafting that mega-DFA by <span name="regex">hand</span> would be
challenging. That's why [Lex][] was created. You give it a simple textual
description of your lexical grammar -- a bunch of regular expressions -- and it
automatically generates a DFA for you and produces a pile of C code that
implements it.

[lex]: https://en.wikipedia.org/wiki/Lex_(software)

<aside name="regex">

This is also how most regular expression engines in programming languages and
text editors work under the hood. They take your regex string and convert it to
a DFA, which they then use to match strings.

If you want to learn the algorithm to convert a regular expression into a DFA,
[the dragon book][dragon] has you covered.

[dragon]: https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools

</aside>

We won't go down that road. We already have a perfectly serviceable hand-rolled
scanner. We just need a tiny trie for recognizing keywords. How should we map
that to code?

The absolute simplest <span name="v8">solution</span> is to use a switch
statement for each node with cases for each branch. We'll start with the root
node and handle the easy keywords.

<aside name="v8">

Simple doesn't mean dumb. The same approach is [essentially what V8 does][v8],
and that's currently one of the world's most sophisticated, fastest language
implementations.

[v8]: https://github.com/v8/v8/blob/e77eebfe3b747fb315bd3baad09bec0953e53e68/src/parsing/scanner.cc#L1643

</aside>

^code keywords (1 before, 1 after)

These are the initial letters that correspond to a single keyword. If we see an
"s", the only keyword the identifier could possibly be is `super`. It might not
be, though, so we still need to check the rest of the letters too. In the tree
diagram, this is basically that straight path hanging off the "s".

We won't roll a switch for each of those nodes. Instead, we have a utility
function that tests the rest of a potential keyword's lexeme.

^code check-keyword

We use this for all of the unbranching paths in the tree. Once we've found a
prefix that could only be one possible reserved word, we need to verify two
things. The lexeme must be exactly as long as the keyword. If the first letter
is "s", the lexeme could still be "sup" or "superb". And the remaining
characters must match exactly -- "supar" isn't good enough.

If we do have the right number of characters, and they're the ones we want, then
it's a keyword, and we return the associated token type. Otherwise, it must be a
normal identifier.

We have a couple of keywords where the tree branches again after the first
letter. If the lexeme starts with "f", it could be `false`, `for`, or `fun`. So
we add another switch for the branches coming off the "f" node.

^code keyword-f (1 before, 1 after)

Before we switch, we need to check that there even *is* a second letter. "f" by
itself is a valid identifier too, after all. The other letter that branches is
"t".

^code keyword-t (1 before, 1 after)

That's it. A couple of nested `switch` statements. Not only is this code <span
name="short">short</span>, but it's very, very fast. It does the minimum amount
of work required to detect a keyword, and bails out as soon as it can tell the
identifier will not be a reserved one.

And with that, our scanner is complete.

<aside name="short">

We sometimes fall into the trap of thinking that performance comes from
complicated data structures, layers of caching, and other fancy optimizations.
But, many times, all that's required is to do less work, and I often find that
writing the simplest code I can is sufficient to accomplish that.

</aside>

<div class="challenges">

## Challenges

1.  Many newer languages support [**string interpolation**][interp]. Inside a
    string literal, you have some sort of special delimiters -- most commonly
    `${` at the beginning and `}` at the end. Between those delimiters, any
    expression can appear. When the string literal is executed, the inner
    expression is evaluated, converted to a string, and then merged with the
    surrounding string literal.

    For example, if Lox supported string interpolation, then this...

    ```lox
    var drink = "Tea";
    var steep = 4;
    var cool = 2;
    print "${drink} will be ready in ${steep + cool} minutes.";
    ```

    ...would print:

    ```text
    Tea will be ready in 6 minutes.
    ```

    What token types would you define to implement a scanner for string
    interpolation? What sequence of tokens would you emit for the above string
    literal?

    What tokens would you emit for:

    ```text
    "Nested ${"interpolation?! Are you ${"mad?!"}"}"
    ```

    Consider looking at other language implementations that support
    interpolation to see how they handle it.

2.  Several languages use angle brackets for generics and also have a `>>` right
    shift operator. This led to a classic problem in early versions of C++:

    ```c++
    vector<vector<string>> nestedVectors;
    ```

    This would produce a compile error because the `>>` was lexed to a single
    right shift token, not two `>` tokens. Users were forced to avoid this by
    putting a space between the closing angle brackets.

    Later versions of C++ are smarter and can handle the above code. Java and C#
    never had the problem. How do those languages specify and implement this?

3.  Many languages, especially later in their evolution, define "contextual
    keywords". These are identifiers that act like reserved words in some
    contexts but can be normal user-defined identifiers in others.

    For example, `await` is a keyword inside an `async` method in C#, but
    in other methods, you can use `await` as your own identifier.

    Name a few contextual keywords from other languages, and the context where
    they are meaningful. What are the pros and cons of having contextual
    keywords? How would you implement them in your language's front end if you
    needed to?

[interp]: https://en.wikipedia.org/wiki/String_interpolation

</div>


================================================
FILE: book/scanning.md
================================================
> Take big bites. Anything worth doing is worth overdoing.
>
> <cite>Robert A. Heinlein, <em>Time Enough for Love</em></cite>

The first step in any compiler or interpreter is <span
name="lexing">scanning</span>. The scanner takes in raw source code as a series
of characters and groups it into a series of chunks we call **tokens**. These
are the meaningful "words" and "punctuation" that make up the language's
grammar.

<aside name="lexing">

This task has been variously called "scanning" and "lexing" (short for "lexical
analysis") over the years. Way back when computers were as big as Winnebagos but
had less memory than your watch, some people used "scanner" only to refer to the
piece of code that dealt with reading raw source code characters from disk and
buffering them in memory. Then "lexing" was the subsequent phase that did useful
stuff with the characters.

These days, reading a source file into memory is trivial, so it's rarely a
distinct phase in the compiler. Because of that, the two terms are basically
interchangeable.

</aside>

Scanning is a good starting point for us too because the code isn't very hard --
pretty much a `switch` statement with delusions of grandeur. It will help us
warm up before we tackle some of the more interesting material later. By the end
of this chapter, we'll have a full-featured, fast scanner that can take any
string of Lox source code and produce the tokens that we'll feed into the parser
in the next chapter.

## The Interpreter Framework

Since this is our first real chapter, before we get to actually scanning some
code we need to sketch out the basic shape of our interpreter, jlox. Everything
starts with a class in Java.

^code lox-class

<aside name="64">

For exit codes, I'm using the conventions defined in the UNIX
["sysexits.h"][sysexits] header. It's the closest thing to a standard I could
find.

[sysexits]: https://www.freebsd.org/cgi/man.cgi?query=sysexits&amp;apropos=0&amp;sektion=0&amp;manpath=FreeBSD+4.3-RELEASE&amp;format=html

</aside>

Stick that in a text file, and go get your IDE or Makefile or whatever set up.
I'll be right here when you're ready. Good? OK!

Lox is a scripting language, which means it executes directly from source. Our
interpreter supports two ways of running code. If you start jlox from the
command line and give it a path to a file, it reads the file and executes it.

^code run-file

If you want a more intimate conversation with your interpreter, you can also run
it interactively. Fire up jlox without any arguments, and it drops you into a
prompt where you can enter and execute code one line at a time.

<aside name="repl">

An interactive prompt is also called a "REPL" (pronounced like "rebel" but with
a "p"). The name comes from Lisp where implementing one is as simple as
wrapping a loop around a few built-in functions:

```lisp
(print (eval (read)))
```

Working outwards from the most nested call, you **R**ead a line of input,
**E**valuate it, **P**rint the result, then **L**oop and do it all over again.

</aside>

^code prompt

The `readLine()` function, as the name so helpfully implies, reads a line of
input from the user on the command line and returns the result. To kill an
interactive command-line app, you usually type Control-D. Doing so signals an
"end-of-file" condition to the program. When that happens `readLine()` returns
`null`, so we check for that to exit the loop.

Both the prompt and the file runner are thin wrappers around this core function:

^code run

It's not super useful yet since we haven't written the interpreter, but baby
steps, you know? Right now, it prints out the tokens our forthcoming scanner
will emit so that we can see if we're making progress.

### Error handling

While we're setting things up, another key piece of infrastructure is *error
handling*. Textbooks sometimes gloss over this because it's more a practical
matter than a formal computer science-y problem. But if you care about making a
language that's actually *usable*, then handling errors gracefully is vital.

The tools our language provides for dealing with errors make up a large portion
of its user interface. When the user's code is working, they aren't thinking
about our language at all -- their headspace is all about *their program*. It's
usually only when things go wrong that they notice our implementation.

<span name="errors">When</span> that happens, it's up to us to give the user all
the information they need to understand what went wrong and guide them gently
back to where they are trying to go. Doing that well means thinking about error
handling all through the implementation of our interpreter, starting now.

<aside name="errors">

Having said all that, for *this* interpreter, what we'll build is pretty bare
bones. I'd love to talk about interactive debuggers, static analyzers, and other
fun stuff, but there's only so much ink in the pen.

</aside>

^code lox-error

This `error()` function and its `report()` helper tells the user some syntax
error occurred on a given line. That is really the bare minimum to be able to
claim you even *have* error reporting. Imagine if you accidentally left a
dangling comma in some function call and the interpreter printed out:

```text
Error: Unexpected "," somewhere in your code. Good luck finding it!
```

That's not very helpful. We need to at least point them to the right line. Even
better would be the beginning and end column so they know *where* in the line.
Even better than *that* is to *show* the user the offending line, like:

```text
Error: Unexpected "," in argument list.

    15 | function(first, second,);
                               ^-- Here.
```

I'd love to implement something like that in this book but the honest truth is
that it's a lot of grungy string manipulation code. Very useful for users, but
not super fun to read in a book and not very technically interesting. So we'll
stick with just a line number. In your own interpreters, please do as I say and
not as I do.

The primary reason we're sticking this error reporting function in the main Lox
class is because of that `hadError` field. It's defined here:

^code had-error (1 before)

We'll use this to ensure we don't try to execute code that has a known error.
Also, it lets us exit with a non-zero exit code like a good command line citizen
should.

^code exit-code (1 before, 1 after)

We need to reset this flag in the interactive loop. If the user makes a mistake,
it shouldn't kill their entire session.

^code reset-had-error (1 before, 1 after)

The other reason I pulled the error reporting out here instead of stuffing it
into the scanner and other phases where the error might occur is to remind you
that it's good engineering practice to separate the code that *generates* the
errors from the code that *reports* them.

Various phases of the front end will detect errors, but it's not really their
job to know how to present that to a user. In a full-featured language
implementation, you will likely have multiple ways errors get displayed: on
stderr, in an IDE's error window, logged to a file, etc. You don't want that
code smeared all over your scanner and parser.

Ideally, we would have an actual abstraction, some kind of <span
name="reporter">"ErrorReporter"</span> interface that gets passed to the scanner
and parser so that we can swap out different reporting strategies. For our
simple interpreter here, I didn't do that, but I did at least move the code for
error reporting into a different class.

<aside name="reporter">

I had exactly that when I first implemented jlox. I ended up tearing it out
because it felt over-engineered for the minimal interpreter in this book.

</aside>

With some rudimentary error handling in place, our application shell is ready.
Once we have a Scanner class with a `scanTokens()` method, we can start running
it. Before we get to that, let's get more precise about what tokens are.

## Lexemes and Tokens

Here's a line of Lox code:

```lox
var language = "lox";
```

Here, `var` is the keyword for declaring a variable. That three-character
sequence "v-a-r" means something. But if we yank three letters out of the
middle of `language`, like "g-u-a", those don't mean anything on their own.

That's what lexical analysis is about. Our job is to scan through the list of
characters and group them together into the smallest sequences that still
represent something. Each of these blobs of characters is called a **lexeme**.
In that example line of code, the lexemes are:

<img src="image/scanning/lexemes.png" alt="'var', 'language', '=', 'lox', ';'" />

The lexemes are only the raw substrings of the source code. However, in the
process of grouping character sequences into lexemes, we also stumble upon some
other useful information. When we take the lexeme and bundle it together with
that other data, the result is a token. It includes useful stuff like:

### Token type

Keywords are part of the shape of the language's grammar, so the parser often
has code like, "If the next token is `while` then do..." That means the parser
wants to know not just that it has a lexeme for some identifier, but that it has
a *reserved* word, and *which* keyword it is.

The <span name="ugly">parser</span> could categorize tokens from the raw lexeme
by comparing the strings, but that's slow and kind of ugly. Instead, at the
point that we recognize a lexeme, we also remember which *kind* of lexeme it
represents. We have a different type for each keyword, operator, bit of
punctuation, and literal type.

<aside name="ugly">

After all, string comparison ends up looking at individual characters, and isn't
that the scanner's job?

</aside>

^code token-type

### Literal value

There are lexemes for literal values -- numbers and strings and the like. Since
the scanner has to walk each character in the literal to correctly identify it,
it can also convert that textual representation of a value to the living runtime
object that will be used by the interpreter later.

### Location information

Back when I was preaching the gospel about error handling, we saw that we need
to tell users *where* errors occurred. Tracking that starts here. In our simple
interpreter, we note only which line the token appears on, but more
sophisticated implementations include the column and length too.

<aside name="location">

Some token implementations store the location as two numbers: the offset from
the beginning of the source file to the beginning of the lexeme, and the length
of the lexeme. The scanner needs to know these anyway, so there's no overhead to
calculate them.

An offset can be converted to line and column positions later by looking back at
the source file and counting the preceding newlines. That sounds slow, and it
is. However, you need to do it *only when you need to actually display a line
and column to the user*. Most tokens never appear in an error message. For
those, the less time you spend calculating position information ahead of time,
the better.

</aside>

We take all of this data and wrap it in a class.

^code token-class

Now we have an object with enough structure to be useful for all of the later
phases of the interpreter.

## Regular Languages and Expressions

Now that we know what we're trying to produce, let's, well, produce it. The core
of the scanner is a loop. Starting at the first character of the source code,
the scanner figures out what lexeme the character belongs to, and consumes it
and any following characters that are part of that lexeme. When it reaches the
end of that lexeme, it emits a token.

Then it loops back and does it again, starting from the very next character in
the source code. It keeps doing that, eating characters and occasionally, uh,
excreting tokens, until it reaches the end of the input.

<span name="alligator"></span>

<img src="image/scanning/lexigator.png" alt="An alligator eating characters and, well, you don't want to know." />

<aside name="alligator">

Lexical analygator.

</aside>

The part of the loop where we look at a handful of characters to figure out
which kind of lexeme it "matches" may sound familiar. If you know regular
expressions, you might consider defining a regex for each kind of lexeme and
using those to match characters. For example, Lox has the same rules as C for
identifiers (variable names and the like). This regex matches one:

```text
[a-zA-Z_][a-zA-Z_0-9]*
```

If you did think of regular expressions, your intuition is a deep one. The rules
that determine how a particular language groups characters into lexemes are
called its <span name="theory">**lexical grammar**</span>. In Lox, as in most
programming languages, the rules of that grammar are simple enough for the
language to be classified a **[regular language][]**. That's the same "regular"
as in regular expressions.

[regular language]: https://en.wikipedia.org/wiki/Regular_language

<aside name="theory">

It pains me to gloss over the theory so much, especially when it's as
interesting as I think the [Chomsky hierarchy][] and [finite-state machines][]
are. But the honest truth is other books cover this better than I could.
[*Compilers: Principles, Techniques, and Tools*][dragon] (universally known as
"the dragon book") is the canonical reference.

[chomsky hierarchy]: https://en.wikipedia.org/wiki/Chomsky_hierarchy
[dragon]: https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools
[finite-state machines]: https://en.wikipedia.org/wiki/Finite-state_machine

</aside>

You very precisely *can* recognize all of the different lexemes for Lox using
regexes if you want to, and there's a pile of interesting theory underlying why
that is and what it means. Tools like [Lex][] or
[Flex][] are designed expressly to let you do this -- throw a handful of regexes
at them, and they give you a complete scanner <span name="lex">back</span>.

<aside name="lex">

Lex was created by Mike Lesk and Eric Schmidt. Yes, the same Eric Schmidt who
was executive chairman of Google. I'm not saying programming languages are a
surefire path to wealth and fame, but we *can* count at least one
mega billionaire among us.

</aside>

[lex]: http://dinosaur.compilertools.net/lex/
[flex]: https://github.com/westes/flex

Since our goal is to understand how a scanner does what it does, we won't be
delegating that task. We're about handcrafted goods.

## The Scanner Class

Without further ado, let's make ourselves a scanner.

^code scanner-class

<aside name="static-import">

I know static imports are considered bad style by some, but they save me from
having to sprinkle `TokenType.` all over the scanner and parser. Forgive me, but
every character counts in a book.

</aside>

We store the raw source code as a simple string, and we have a list ready to
fill with tokens we're going to generate. The aforementioned loop that does that
looks like this:

^code scan-tokens

The scanner works its way through the source code, adding tokens until it runs
out of characters. Then it appends one final "end of file" token. That isn't
strictly needed, but it makes our parser a little cleaner.

This loop depends on a couple of fields to keep track of where the scanner is in
the source code.

^code scan-state (1 before, 2 after)

The `start` and `current` fields are offsets that index into the string. The
`start` field points to the first character in the lexeme being scanned, and
`current` points at the character currently being considered. The `line` field
tracks what source line `current` is on so we can produce tokens that know their
location.

Then we have one little helper function that tells us if we've consumed all the
characters.

^code is-at-end

## Recognizing Lexemes

In each turn of the loop, we scan a single token. This is the real heart of the
scanner. We'll start simple. Imagine if every lexeme were only a single character
long. All you would need to do is consume the next character and pick a token type for
it. Several lexemes *are* only a single character in Lox, so let's start with
those.

^code scan-token

<aside name="slash">

Wondering why `/` isn't in here? Don't worry, we'll get to it.

</aside>

Again, we need a couple of helper methods.

^code advance-and-add-token

The `advance()` method consumes the next character in the source file and
returns it. Where `advance()` is for input, `addToken()` is for output. It grabs
the text of the current lexeme and creates a new token for it. We'll use the
other overload to handle tokens with literal values soon.

### Lexical errors

Before we get too far in, let's take a moment to think about errors at the
lexical level. What happens if a user throws a source file containing some
characters Lox doesn't use, like `@#^`, at our interpreter? Right now, those
characters get silently discarded. They aren't used by the Lox language, but
that doesn't mean the interpreter can pretend they aren't there. Instead, we
report an error.

^code char-error (1 before, 1 after)

Note that the erroneous character is still *consumed* by the earlier call to
`advance()`. That's important so that we don't get stuck in an infinite loop.

Note also that we <span name="shotgun">*keep scanning*</span>. There may be
other errors later in the program. It gives our users a better experience if we
detect as many of those as possible in one go. Otherwise, they see one tiny
error and fix it, only to have the next error appear, and so on. Syntax error
Whac-A-Mole is no fun.

(Don't worry. Since `hadError` gets set, we'll never try to *execute* any of the
code, even though we keep going and scan the rest of it.)

<aside name="shotgun">

The code reports each invalid character separately, so this shotguns the user
with a blast of errors if they accidentally paste a big blob of weird text.
Coalescing a run of invalid characters into a single error would give a nicer
user experience.

</aside>

### Operators

We have single-character lexemes working, but that doesn't cover all of Lox's
operators. What about `!`? It's a single character, right? Sometimes, yes, but
if the very next character is an equals sign, then we should instead create a
`!=` lexeme. Note that the `!` and `=` are *not* two independent operators. You
can't write `!   =` in Lox and have it behave like an inequality operator.
That's why we need to scan `!=` as a single lexeme. Likewise, `<`, `>`, and `=`
can all be followed by `=` to create the other equality and comparison
operators.

For all of these, we need to look at the second character.

^code two-char-tokens (1 before, 2 after)

Those cases use this new method:

^code match

It's like a conditional `advance()`. We only consume the current character if
it's what we're looking for.

Using `match()`, we recognize these lexemes in two stages. When we reach, for
example, `!`, we jump to its switch case. That means we know the lexeme *starts*
with `!`. Then we look at the next character to determine if we're on a `!=` or
merely a `!`.

## Longer Lexemes

We're still missing one operator: `/` for division. That character needs a
little special handling because comments begin with a slash too.

^code slash (1 before, 2 after)

This is similar to the other two-character operators, except that when we find a
second `/`, we don't end the token yet. Instead, we keep consuming characters
until we reach the end of the line.

This is our general strategy for handling longer lexemes. After we detect the
beginning of one, we shunt over to some lexeme-specific code that keeps eating
characters until it sees the end.

We've got another helper:

^code peek

It's sort of like `advance()`, but doesn't consume the character. This is called
<span name="match">**lookahead**</span>. Since it only looks at the current
unconsumed character, we have *one character of lookahead*. The smaller this
number is, generally, the faster the scanner runs. The rules of the lexical
grammar dictate how much lookahead we need. Fortunately, most languages in wide
use peek only one or two characters ahead.

<aside name="match">

Technically, `match()` is doing lookahead too. `advance()` and `peek()` are the
fundamental operators and `match()` combines them.

</aside>

Comments are lexemes, but they aren't meaningful, and the parser doesn't want
to deal with them. So when we reach the end of the comment, we *don't* call
`addToken()`. When we loop back around to start the next lexeme, `start` gets
reset and the comment's lexeme disappears in a puff of smoke.

While we're at it, now's a good time to skip over those other meaningless
characters: newlines and whitespace.

^code whitespace (1 before, 3 after)

When encountering whitespace, we simply go back to the beginning of the scan
loop. That starts a new lexeme *after* the whitespace character. For newlines,
we do the same thing, but we also increment the line counter. (This is why we
used `peek()` to find the newline ending a comment instead of `match()`. We want
that newline to get us here so we can update `line`.)

Our scanner is getting smarter. It can handle fairly free-form code like:

```lox
// this is a comment
(( )){} // grouping stuff
!*+-/=<> <= == // operators
```

### String literals

Now that we're comfortable with longer lexemes, we're ready to tackle literals.
We'll do strings first, since they always begin with a specific character, `"`.

^code string-start (1 before, 2 after)

That calls:

^code string

Like with comments, we consume characters until we hit the `"` that ends the
string. We also gracefully handle running out of input before the string is
closed and report an error for that.

For no particular reason, Lox supports multi-line strings. There are pros and
cons to that, but prohibiting them was a little more complex than allowing them,
so I left them in. That does mean we also need to update `line` when we hit a
newline inside a string.

Finally, the last interesting bit is that when we create the token, we also
produce the actual string *value* that will be used later by the interpreter.
Here, that conversion only requires a `substring()` to strip off the surrounding
quotes. If Lox supported escape sequences like `\n`, we'd unescape those here.

### Number literals

All numbers in Lox are floating point at runtime, but both integer and decimal
literals are supported. A number literal is a series of <span
name="minus">digits</span> optionally followed by a `.` and one or more trailing
digits.

<aside name="minus">

Since we look only for a digit to start a number, that means `-123` is not a
number *literal*. Instead, `-123`, is an *expression* that applies `-` to the
number literal `123`. In practice, the result is the same, though it has one
interesting edge case if we were to add method calls on numbers. Consider:

```lox
print -123.abs();
```

This prints `-123` because negation has lower precedence than method calls. We
could fix that by making `-` part of the number literal. But then consider:

```lox
var n = 123;
print -n.abs();
```

This still produces `-123`, so now the language seems inconsistent. No matter
what you do, some case ends up weird.

</aside>

```lox
1234
12.34
```

We don't allow a leading or trailing decimal point, so these are both invalid:

```lox
.1234
1234.
```

We could easily support the former, but I left it out to keep things simple. The
latter gets weird if we ever want to allow methods on numbers like `123.sqrt()`.

To recognize the beginning of a number lexeme, we look for any digit. It's kind
of tedious to add cases for every decimal digit, so we'll stuff it in the
default case instead.

^code digit-start (1 before, 1 after)

This relies on this little utility:

^code is-digit

<aside name="is-digit">

The Java standard library provides [`Character.isDigit()`][is-digit], which seems
like a good fit. Alas, that method allows things like Devanagari digits,
full-width numbers, and other funny stuff we don't want.

[is-digit]: http://docs.oracle.com/javase/7/docs/api/java/lang/Character.html#isDigit(char)

</aside>

Once we know we are in a number, we branch to a separate method to consume the
rest of the literal, like we do with strings.

^code number

We consume as many digits as we find for the integer part of the literal. Then
we look for a fractional part, which is a decimal point (`.`) followed by at
least one digit. If we do have a fractional part, again, we consume as many
digits as we can find.

Looking past the decimal point requires a second character of lookahead since we
don't want to consume the `.` until we're sure there is a digit *after* it. So
we add:

^code peek-next

<aside name="peek-next">

I could have made `peek()` take a parameter for the number of characters ahead
to look instead of defining two functions, but that would allow *arbitrarily*
far lookahead. Providing these two functions makes it clearer to a reader of the
code that our scanner looks ahead at most two characters.

</aside>


Finally, we convert the lexeme to its numeric value. Our interpreter uses Java's
`Double` type to represent numbers, so we produce a value of that type. We're
using Java's own parsing method to convert the lexeme to a real Java double. We
could implement that ourselves, but, honestly, unless you're trying to cram for
an upcoming programming interview, it's not worth your time.

The remaining literals are Booleans and `nil`, but we handle those as keywords,
which gets us to...

## Reserved Words and Identifiers

Our scanner is almost done. The only remaining pieces of the lexical grammar to
implement are identifiers and their close cousins, the reserved words. You might
think we could match keywords like `or` in the same way we handle
multiple-character operators like `<=`.

```java
case 'o':
  if (match('r')) {
    addToken(OR);
  }
  break;
```

Consider what would happen if a user named a variable `orchid`. The scanner
would see the first two letters, `or`, and immediately emit an `or` keyword
token. This gets us to an important principle called <span
name="maximal">**maximal munch**</span>. When two lexical grammar rules can both
match a chunk of code that the scanner is looking at, *whichever one matches the
most characters wins*.

That rule states that if we can match `orchid` as an identifier and `or` as a
keyword, then the former wins. This is also why we tacitly assumed, previously,
that `<=` should be scanned as a single `<=` token and not `<` followed by `=`.

<aside name="maximal">

Consider this nasty bit of C code:

```c
---a;
```

Is it valid? That depends on how the scanner splits the lexemes. What if the scanner
sees it like this:

```c
- --a;
```

Then it could be parsed. But that would require the scanner to know about the
grammatical structure of the surrounding code, which entangles things more than
we want. Instead, the maximal munch rule says that it is *always* scanned like:

```c
-- -a;
```

It scans it that way even though doing so leads to a syntax error later in the
parser.

</aside>

Maximal munch means we can't easily detect a reserved word until we've reached
the end of what might instead be an identifier. After all, a reserved word *is*
an identifier, it's just one that has been claimed by the language for its own
use. That's where the term **reserved word** comes from.

So we begin by assuming any lexeme starting with a letter or underscore is an
identifier.

^code identifier-start (3 before, 3 after)

The rest of the code lives over here:

^code identifier

We define that in terms of these helpers:

^code is-alpha

That gets identifiers working. To handle keywords, we see if the identifier's
lexeme is one of the reserved words. If so, we use a token type specific to that
keyword. We define the set of reserved words in a map.

^code keyword-map

Then, after we scan an identifier, we check to see if it matches anything in the
map.

^code keyword-type (2 before, 1 after)

If so, we use that keyword's token type. Otherwise, it's a regular user-defined
identifier.

And with that, we now have a complete scanner for the entire Lox lexical
grammar. Fire up the REPL and type in some valid and invalid code. Does it
produce the tokens you expect? Try to come up with some interesting edge cases
and see if it handles them as it should.

<div class="challenges">

## Challenges

1.  The lexical grammars of Python and Haskell are not *regular*. What does that
    mean, and why aren't they?

1.  Aside from separating tokens -- distinguishing `print foo` from `printfoo`
    -- spaces aren't used for much in most languages. However, in a couple of
    dark corners, a space *does* affect how code is parsed in CoffeeScript,
    Ruby, and the C preprocessor. Where and what effect does it have in each of
    those languages?

1.  Our scanner here, like most, discards comments and whitespace since those
    aren't needed by the parser. Why might you want to write a scanner that does
    *not* discard those? What would it be useful for?

1.  Add support to Lox's scanner for C-style `/* ... */` block comments. Make
    sure to handle newlines in them. Consider allowing them to nest. Is adding
    support for nesting more work than you expected? Why?

</div>

<div class="design-note">

## Design Note: Implicit Semicolons

Programmers today are spoiled for choice in languages and have gotten picky
about syntax. They want their language to look clean and modern. One bit of
syntactic lichen that almost every new language scrapes off (and some ancient
ones like BASIC never had) is `;` as an explicit statement terminator.

Instead, they treat a newline as a statement terminator where it makes sense to
do so. The "where it makes sense" part is the challenging bit. While *most*
statements are on their own line, sometimes you need to spread a single
statement across a couple of lines. Those intermingled newlines should not be
treated as terminators.

Most of the obvious cases where the newline should be ignored are easy to
detect, but there are a handful of nasty ones:

* A return value on the next line:

    ```js
    if (condition) return
    "value"
    ```

    Is "value" the value being returned, or do we have a `return` statement with
    no value followed by an expression statement containing a string literal?

* A parenthesized expression on the next line:

    ```js
    func
    (parenthesized)
    ```

    Is this a call to `func(parenthesized)`, or two expression statements, one
    for `func` and one for a parenthesized expression?

* A `-` on the next line:

    ```js
    first
    -second
    ```

    Is this `first - second` -- an infix subtraction -- or two expression
    statements, one for `first` and one to negate `second`?

In all of these, either treating the newline as a separator or not would both
produce valid code, but possibly not the code the user wants. Across languages,
there is an unsettling variety of rules used to decide which newlines are
separators. Here are a couple:

*   [Lua][] completely ignores newlines, but carefully controls its grammar such
    that no separator between statements is needed at all in most cases. This is
    perfectly legit:

    ```lua
    a = 1 b = 2
    ```

    Lua avoids the `return` problem by requiring a `return` statement to be the
    very last statement in a block. If there is a value after `return` before
    the keyword `end`, it *must* be for the `return`. For the other two cases,
    they allow an explicit `;` and expect users to use that. In practice, that
    almost never happens because there's no point in a parenthesized or unary
    negation expression statement.

*   [Go][] handles newlines in the scanner. If a newline appears following one
    of a handful of token types that are known to potentially end a statement,
    the newline is treated like a semicolon. Otherwise it is ignored. The Go
    team provides a canonical code formatter, [gofmt][], and the ecosystem is
    fervent about its use, which ensures that idiomatic styled code works well
    with this simple rule.

*   [Python][] treats all newlines as significant unless an explicit backslash
    is used at the end of a line to continue it to the next line. However,
    newlines anywhere inside a pair of brackets (`()`, `[]`, or `{}`) are
    ignored. Idiomatic style strongly prefers the latter.

    This rule works well for Python because it is a highly statement-oriented
    language. In particular, Python's grammar ensures a statement never appears
    inside an expression. C does the same, but many other languages which have a
    "lambda" or function literal syntax do not.

    An example in JavaScript:

    ```js
    console.log(function() {
      statement();
    });
    ```

    Here, the `console.log()` *expression* contains a function literal which
    in turn contains the *statement* `statement();`.

    Python would need a different set of rules for implicitly joining lines if
    you could get back *into* a <span name="lambda">statement</span> where
    newlines should become meaningful while still nested inside brackets.

<aside name="lambda">

And now you know why Python's `lambda` allows only a single expression body.

</aside>

*   JavaScript's "[automatic semicolon insertion][asi]" rule is the real odd
    one. Where other languages assume most newlines *are* meaningful and only a
    few should be ignored in multi-line statements, JS assumes the opposite. It
    treats all of your newlines as meaningless whitespace *unless* it encounters
    a parse error. If it does, it goes back and tries turning the previous
    newline into a semicolon to get something grammatically valid.

    This design note would turn into a design diatribe if I went into complete
    detail about how that even *works*, much less all the various ways that
    JavaScript's "solution" is a bad idea. It's a mess. JavaScript is the only
    language I know where many style guides demand explicit semicolons after
    every statement even though the language theoretically lets you elide them.

If you're designing a new language, you almost surely *should* avoid an explicit
statement terminator. Programmers are creatures of fashion like other humans, and
semicolons are as passé as ALL CAPS KEYWORDS. Just make sure you pick a set of
rules that make sense for your language's particular grammar and idioms. And
don't do what JavaScript did.

</div>

[lua]: https://www.lua.org/pil/1.1.html
[go]: https://golang.org/ref/spec#Semicolons
[gofmt]: https://golang.org/cmd/gofmt/
[python]: https://docs.python.org/3.5/reference/lexical_analysis.html#implicit-line-joining
[asi]: https://www.ecma-international.org/ecma-262/5.1/#sec-7.9


================================================
FILE: book/statements-and-state.md
================================================
> All my life, my heart has yearned for a thing I cannot name.
> <cite>Andr&eacute; Breton, <em>Mad Love</em></cite>

The interpreter we have so far feels less like programming a real language and
more like punching buttons on a calculator. "Programming" to me means building
up a system out of smaller pieces. We can't do that yet because we have no way
to bind a name to some data or function. We can't compose software without a way
to refer to the pieces.

To support bindings, our interpreter needs internal state. When you define a
variable at the beginning of the program and use it at the end, the interpreter
has to hold on to the value of that variable in the meantime. So in this
chapter, we will give our interpreter a brain that can not just process, but
*remember*.

<img src="image/statements-and-state/brain.png" alt="A brain, presumably remembering stuff." />

State and <span name="expr">statements</span> go hand in hand. Since statements,
by definition, don't evaluate to a value, they need to do something else to be
useful. That something is called a **side effect**. It could mean producing
user-visible output or modifying some state in the interpreter that can be
detected later. The latter makes them a great fit for defining variables or
other named entities.

<aside name="expr">

You could make a language that treats variable declarations as expressions that
both create a binding and produce a value. The only language I know that does
that is Tcl. Scheme seems like a contender, but note that after a `let`
expression is evaluated, the variable it bound is forgotten. The `define` syntax
is not an expression.

</aside>

In this chapter, we'll do all of that. We'll define statements that produce
output (`print`) and create state (`var`). We'll add expressions to access and
assign to variables. Finally, we'll add blocks and local scope. That's a lot to
stuff into one chapter, but we'll chew through it all one bite at a time.

## Statements

We start by extending Lox's grammar with statements. They aren't very different
from expressions. We start with the two simplest kinds:

1.  An **expression statement** lets you place an expression where a statement
    is expected. They exist to evaluate expressions that have side effects. You
    may not notice them, but you use them all the time in <span
    name="expr-stmt">C</span>, Java, and other languages. Any time you see a
    function or method call followed by a `;`, you're looking at an expression
    statement.

    <aside name="expr-stmt">

    Pascal is an outlier. It distinguishes between *procedures* and *functions*.
    Functions return values, but procedures cannot. There is a statement form
    for calling a procedure, but functions can only be called where an
    expression is expected. There are no expression statements in Pascal.

    </aside>

2.  A **`print` statement** evaluates an expression and displays the result to
    the user. I admit it's weird to bake printing right into the language
    instead of making it a library function. Doing so is a concession to the
    fact that we're building this interpreter one chapter at a time and want to
    be able to play with it before it's all done. To make print a library
    function, we'd have to wait until we had all of the machinery for defining
    and calling functions <span name="print">before</span> we could witness any
    side effects.

    <aside name="print">

    I will note with only a modicum of defensiveness that BASIC and Python
    have dedicated `print` statements and they are real languages. Granted,
    Python did remove their `print` statement in 3.0...

    </aside>

New syntax means new grammar rules. In this chapter, we finally gain the ability
to parse an entire Lox script. Since Lox is an imperative, dynamically typed
language, the "top level" of a script is simply a list of statements. The new
rules are:

```ebnf
program        → statement* EOF ;

statement      → exprStmt
               | printStmt ;

exprStmt       → expression ";" ;
printStmt      → "print" expression ";" ;
```

The first rule is now `program`, which is the starting point for the grammar and
represents a complete Lox script or REPL entry. A program is a list of
statements followed by the special "end of file" token. The mandatory end token
ensures the parser consumes the entire input and doesn't silently ignore
erroneous unconsumed tokens at the end of a script.

Right now, `statement` only has two cases for the two kinds of statements we've
described. We'll fill in more later in this chapter and in the following ones.
The next step is turning this grammar into something we can store in memory --
syntax trees.

### Statement syntax trees

There is no place in the grammar where both an expression and a statement are
allowed. The operands of, say, `+` are always expressions, never statements. The
body of a `while` loop is always a statement.

Since the two syntaxes are disjoint, we don't need a single base class that they
all inherit from. Splitting expressions and statements into separate class
hierarchies enables the Java compiler to help us find dumb mistakes like passing
a statement to a Java method that expects an expression.

That means a new base class for statements. As our elders did before us, we will
use the cryptic name "Stmt". With great <span name="foresight">foresight</span>,
I have designed our little AST metaprogramming script in anticipation of this.
That's why we passed in "Expr" as a parameter to `defineAst()`. Now we add
another call to define Stmt and its <span name="stmt-ast">subclasses</span>.

<aside name="foresight">

Not really foresight: I wrote all the code for the book before I sliced it into
chapters.

</aside>

^code stmt-ast (2 before, 1 after)

<aside name="stmt-ast">

The generated code for the new nodes is in [Appendix II][appendix-ii]: [Expression statement][], [Print statement][].

[appendix-ii]: appendix-ii.html
[expression statement]: appendix-ii.html#expression-statement
[print statement]: appendix-ii.html#print-statement

</aside>

Run the AST generator script and behold the resulting "Stmt.java" file with the
syntax tree classes we need for expression and `print` statements. Don't forget
to add the file to your IDE project or makefile or whatever.

### Parsing statements

The parser's `parse()` method that parses and returns a single expression was a
temporary hack to get the last chapter up and running. Now that our grammar has
the correct starting rule, `program`, we can turn `parse()` into the real deal.

^code parse

<aside name="parse-error-handling">

What about the code we had in here for catching `ParseError` exceptions? We'll
put better parse error handling in place soon when we add support for additional
statement types.

</aside>

This parses a series of statements, as many as it can find until it hits the end
of the input. This is a pretty direct translation of the `program` rule into
recursive descent style. We must also chant a minor prayer to the Java verbosity
gods since we are using ArrayList now.

^code parser-imports (2 before, 1 after)

A program is a list of statements, and we parse one of those statements using
this method:

^code parse-statement

A little bare bones, but we'll fill it in with more statement types later. We
determine which specific statement rule is matched by looking at the current
token. A `print` token means it's obviously a `print` statement.

If the next token doesn't look like any known kind of statement, we assume it
must be an expression statement. That's the typical final fallthrough case when
parsing a statement, since it's hard to proactively recognize an expression from
its first token.

Each statement kind gets its own method. First `print`:

^code parse-print-statement

Since we already matched and consumed the `print` token itself, we don't need to
do that here. We parse the subsequent expression, consume the terminating
semicolon, and emit the syntax tree.

If we didn't match a `print` statement, we must have one of these:

^code parse-expression-statement

Similar to the previous method, we parse an expression followed by a semicolon.
We wrap that Expr in a Stmt of the right type and return it.

### Executing statements

We're running through the previous couple of chapters in microcosm, working our
way through the front end. Our parser can now produce statement syntax trees, so
the next and final step is to interpret them. As in expressions, we use the
Visitor pattern, but we have a new visitor interface, Stmt.Visitor, to
implement since statements have their own base class.

We add that to the list of interfaces Interpreter implements.

^code interpreter (1 after)

<aside name="void">

Java doesn't let you use lowercase "void" as a generic type argument for obscure
reasons having to do with type erasure and the stack. Instead, there is a
separate "Void" type specifically for this use. Sort of a "boxed void", like
"Integer" is for "int".

</aside>

Unlike expressions, statements produce no values, so the return type of the
visit methods is Void, not Object. We have two statement types, and we need a
visit method for each. The easiest is expression statements.

^code visit-expression-stmt

We evaluate the inner expression using our existing `evaluate()` method and
<span name="discard">discard</span> the value. Then we return `null`. Java
requires that to satisfy the special capitalized Void return type. Weird, but
what can you do?

<aside name="discard">

Appropriately enough, we discard the value returned by `evaluate()` by placing
that call inside a *Java* expression statement.

</aside>

The `print` statement's visit method isn't much different.

^code visit-print

Before discarding the expression's value, we convert it to a string using the
`stringify()` method we introduced in the last chapter and then dump it to
stdout.

Our interpreter is able to visit statements now, but we have some work to do to
feed them to it. First, modify the old `interpret()` method in the Interpreter
class to accept a list of statements -- in other words, a program.

^code interpret

This replaces the old code which took a single expression. The new code relies
on this tiny helper method:

^code execute

That's the statement analogue to the `evaluate()` method we have for
expressions. Since we're working with lists now, we need to let Java know.

^code import-list (2 before, 2 after)

The main Lox class is still trying to parse a single expression and pass it to
the interpreter. We fix the parsing line like so:

^code parse-statements (1 before, 2 after)

And then replace the call to the interpreter with this:

^code interpret-statements (2 before, 1 after)

Basically just plumbing the new syntax through. OK, fire up the interpreter and
give it a try. At this point, it's worth sketching out a little Lox program in a
text file to run as a script. Something like:

```lox
print "one";
print true;
print 2 + 1;
```

It almost looks like a real program! Note that the REPL, too, now requires you
to enter a full statement instead of a simple expression. Don't forget your
semicolons.

## Global Variables

Now that we have statements, we can start working on state. Before we get into
all of the complexity of lexical scoping, we'll start off with the easiest kind
of variables -- <span name="globals">globals</span>. We need two new constructs.

1.  A **variable declaration** statement brings a new variable into the world.

    ```lox
    var beverage = "espresso";
    ```

    This creates a new binding that associates a name (here "beverage") with a
    value (here, the string `"espresso"`).

2.  Once that's done, a **variable expression** accesses that binding. When the
    identifier "beverage" is used as an expression, it looks up the value bound
    to that name and returns it.

    ```lox
    print beverage; // "espresso".
    ```

Later, we'll add assignment and block scope, but that's enough to get moving.

<aside name="globals">

Global state gets a bad rap. Sure, lots of global state -- especially *mutable*
state -- makes it hard to maintain large programs. It's good software
engineering to minimize how much you use.

But when you're slapping together a simple programming language or, heck, even
learning your first language, the flat simplicity of global variables helps. My
first language was BASIC and, though I outgrew it eventually, it was nice that I
didn't have to wrap my head around scoping rules before I could make a computer
do fun stuff.

</aside>

### Variable syntax

As before, we'll work through the implementation from front to back, starting
with the syntax. Variable declarations are statements, but they are different
from other statements, and we're going to split the statement grammar in two to
handle them. That's because the grammar restricts where some kinds of statements
are allowed.

The clauses in control flow statements -- think the then and else branches of
an `if` statement or the body of a `while` -- are each a single statement. But
that statement is not allowed to be one that declares a name. This is OK:

```lox
if (monday) print "Ugh, already?";
```

But this is not:

```lox
if (monday) var beverage = "espresso";
```

We *could* allow the latter, but it's confusing. What is the scope of that
`beverage` variable? Does it persist after the `if` statement? If so, what is
its value on days other than Monday? Does the variable exist at all on those
days?

Code like this is weird, so C, Java, and friends all disallow it. It's as if
there are two levels of <span name="brace">"precedence"</span> for statements.
Some places where a statement is allowed -- like inside a block or at the top
level -- allow any kind of statement, including declarations. Others allow only
the "higher" precedence statements that don't declare names.

<aside name="brace">

In this analogy, block statements work sort of like parentheses do for
expressions. A block is itself in the "higher" precedence level and can be used
anywhere, like in the clauses of an `if` statement. But the statements it
*contains* can be lower precedence. You're allowed to declare variables and
other names inside the block. The curlies let you escape back into the full
statement grammar from a place where only some statements are allowed.

</aside>

To accommodate the distinction, we add another rule for kinds of statements that
declare names.

```ebnf
program        → declaration* EOF ;

declaration    → varDecl
               | statement ;

statement      → exprStmt
               | printStmt ;
```

Declaration statements go under the new `declaration` rule. Right now, it's only
variables, but later it will include functions and classes. Any place where a
declaration is allowed also allows non-declaring statements, so the
`declaration` rule falls through to `statement`. Obviously, you can declare
stuff at the top level of a script, so `program` routes to the new rule.

The rule for declaring a variable looks like:

```ebnf
varDecl        → "var" IDENTIFIER ( "=" expression )? ";" ;
```

Like most statements, it starts with a leading keyword. In this case, `var`.
Then an identifier token for the name of the variable being declared, followed
by an optional initializer expression. Finally, we put a bow on it with the
semicolon.

To access a variable, we define a new kind of primary expression.

```ebnf
primary        → "true" | "false" | "nil"
               | NUMBER | STRING
               | "(" expression ")"
               | IDENTIFIER ;
```

That `IDENTIFIER` clause matches a single identifier token, which is understood
to be the name of the variable being accessed.

These new grammar rules get their corresponding syntax trees. Over in the AST
generator, we add a <span name="var-stmt-ast">new statement</span> node for a
variable declaration.

^code var-stmt-ast (1 before, 1 after)

<aside name="var-stmt-ast">

The generated code for the new node is in [Appendix II][appendix-var-stmt].

[appendix-var-stmt]: appendix-ii.html#variable-statement

</aside>

It stores the name token so we know what it's declaring, along with the
initializer expression. (If there isn't an initializer, that field is `null`.)

Then we add an expression node for accessing a variable.

^code var-expr (1 before, 1 after)

<span name="var-expr-ast">It's</span> simply a wrapper around the token for the
variable name. That's it. As always, don't forget to run the AST generator
script so that you get updated "Expr.java" and "Stmt.java" files.

<aside name="var-expr-ast">

The generated code for the new node is in [Appendix II][appendix-var-expr].

[appendix-var-expr]: appendix-ii.html#variable-expression

</aside>

### Parsing variables

Before we parse variable statements, we need to shift around some code to make
room for the new `declaration` rule in the grammar. The top level of a program
is now a list of declarations, so the entrypoint method to the parser changes.

^code parse-declaration (3 before, 4 after)

That calls this new method:

^code declaration

Hey, do you remember way back in that [earlier chapter][parsing] when we put the
infrastructure in place to do error recovery? We are finally ready to hook that
up.

[parsing]: parsing-expressions.html
[error recovery]: parsing-expressions.html#panic-mode-error-recovery

This `declaration()` method is the method we call repeatedly when parsing a
series of statements in a block or a script, so it's the right place to
synchronize when the parser goes into panic mode. The whole body of this method
is wrapped in a try block to catch the exception thrown when the parser begins
error recovery. This gets it back to trying to parse the beginning of the next
statement or declaration.

The real parsing happens inside the try block. First, it looks to see if we're
at a variable declaration by looking for the leading `var` keyword. If not, it
falls through to the existing `statement()` method that parses `print` and
expression statements.

Remember how `statement()` tries to parse an expression statement if no other
statement matches? And `expression()` reports a syntax error if it can't parse
an expression at the current token? That chain of calls ensures we report an
error if a valid declaration or statement isn't parsed.

When the parser matches a `var` token, it branches to:

^code parse-var-declaration

As always, the recursive descent code follows the grammar rule. The parser has
already matched the `var` token, so next it requires and consumes an identifier
token for the variable name.

Then, if it sees an `=` token, it knows there is an initializer expression and
parses it. Otherwise, it leaves the initializer `null`. Finally, it consumes the
required semicolon at the end of the statement. All this gets wrapped in a
Stmt.Var syntax tree node and we're groovy.

Parsing a variable expression is even easier. In `primary()`, we look for an
identifier token.

^code parse-identifier (2 before, 2 after)

That gives us a working front end for declaring and using variables. All that's
left is to feed it into the interpreter. Before we get to that, we need to talk
about where variables live in memory.

## Environments

The bindings that associate variables to values need to be stored somewhere.
Ever since the Lisp folks invented parentheses, this data structure has been
called an <span name="env">**environment**</span>.

<img src="image/statements-and-state/environment.png" alt="An environment containing two bindings." />

<aside name="env">

I like to imagine the environment literally, as a sylvan wonderland where
variables and values frolic.

</aside>

You can think of it like a <span name="map">map</span> where the keys are
variable names and the values are the variable's, uh, values. In fact, that's
how we'll implement it in Java. We could stuff that map and the code to manage
it right into Interpreter, but since it forms a nicely delineated concept, we'll
pull it out into its own class.

Start a new file and add:

<aside name="map">

Java calls them **maps** or **hashmaps**. Other languages call them **hash
tables**, **dictionaries** (Python and C#), **hashes** (Ruby and Perl),
**tables** (Lua), or **associative arrays** (PHP). Way back when, they were
known as **scatter tables**.

</aside>

^code environment-class

There's a Java Map in there to store the bindings. It uses bare strings for the
keys, not tokens. A token represents a unit of code at a specific place in the
source text, but when it comes to looking up variables, all identifier tokens
with the same name should refer to the same variable (ignoring scope for now).
Using the raw string ensures all of those tokens refer to the same map key.

There are two operations we need to support. First, a variable definition binds
a new name to a value.

^code environment-define

Not exactly brain surgery, but we have made one interesting semantic choice.
When we add the key to the map, we don't check to see if it's already present.
That means that this program works:

```lox
var a = "before";
print a; // "before".
var a = "after";
print a; // "after".
```

A variable statement doesn't just define a *new* variable, it can also be used
to *re*define an existing variable. We could <span name="scheme">choose</span>
to make this an error instead. The user may not intend to redefine an existing
variable. (If they did mean to, they probably would have used assignment, not
`var`.) Making redefinition an error would help them find that bug.

However, doing so interacts poorly with the REPL. In the middle of a REPL
session, it's nice to not have to mentally track which variables you've already
defined. We could allow redefinition in the REPL but not in scripts, but then
users would have to learn two sets of rules, and code copied and pasted from one
form to the other might not work.

<aside name="scheme">

My rule about variables and scoping is, "When in doubt, do what Scheme does".
The Scheme folks have probably spent more time thinking about variable scope
than we ever will -- one of the main goals of Scheme was to introduce lexical
scoping to the world -- so it's hard to go wrong if you follow in their
footsteps.

Scheme allows redefining variables at the top level.

</aside>

So, to keep the two modes consistent, we'll allow it -- at least for global
variables. Once a variable exists, we need a way to look it up.

^code environment-get (2 before, 1 after)

This is a little more semantically interesting. If the variable is found, it
simply returns the value bound to it. But what if it's not? Again, we have a
choice:

* Make it a syntax error.

* Make it a runtime error.

* Allow it and return some default value like `nil`.

Lox is pretty lax, but the last option is a little *too* permissive to me.
Making it a syntax error -- a compile-time error -- seems like a smart choice.
Using an undefined variable is a bug, and the sooner you detect the mistake, the
better.

The problem is that *using* a variable isn't the same as *referring* to it. You
can refer to a variable in a chunk of code without immediately evaluating it if
that chunk of code is wrapped inside a function. If we make it a static error to
*mention* a variable before it's been declared, it becomes much harder to define
recursive functions.

We could accommodate single recursion -- a function that calls itself -- by
declaring the function's own name before we examine its body. But that doesn't
help with mutually recursive procedures that call each other. Consider:

<span name="contrived"></span>

```lox
fun isOdd(n) {
  if (n == 0) return false;
  return isEven(n - 1);
}

fun isEven(n) {
  if (n == 0) return true;
  return isOdd(n - 1);
}
```

<aside name="contrived">

Granted, this is probably not the most efficient way to tell if a number is even
or odd (not to mention the bad things that happen if you pass a non-integer or
negative number to them). Bear with me.

</aside>

The `isEven()` function isn't defined by the <span name="declare">time</span> we
are looking at the body of `isOdd()` where it's called. If we swap the order of
the two functions, then `isOdd()` isn't defined when we're looking at
`isEven()`'s body.

<aside name="declare">

Some statically typed languages like Java and C# solve this by specifying that
the top level of a program isn't a sequence of imperative statements. Instead, a
program is a set of declarations which all come into being simultaneously. The
implementation declares *all* of the names before looking at the bodies of *any*
of the functions.

Older languages like C and Pascal don't work like this. Instead, they force you
to add explicit *forward declarations* to declare a name before it's fully
defined. That was a concession to the limited computing power at the time. They
wanted to be able to compile a source file in one single pass through the text,
so those compilers couldn't gather up all of the declarations first before
processing function bodies.

</aside>

Since making it a *static* error makes recursive declarations too difficult,
we'll defer the error to runtime. It's OK to refer to a variable before it's
defined as long as you don't *evaluate* the reference. That lets the program
for even and odd numbers work, but you'd get a runtime error in:

```lox
print a;
var a = "too late!";
```

As with type errors in the expression evaluation code, we report a runtime error
by throwing an exception. The exception contains the variable's token so we can
tell the user where in their code they messed up.

### Interpreting global variables

The Interpreter class gets an instance of the new Environment class.

^code environment-field (2 before, 1 after)

We store it as a field directly in Interpreter so that the variables stay in
memory as long as the interpreter is still running.

We have two new syntax trees, so that's two new visit methods. The first is for
declaration statements.

^code visit-var

If the variable has an initializer, we evaluate it. If not, we have another
choice to make. We could have made this a syntax error in the parser by
*requiring* an initializer. Most languages don't, though, so it feels a little
harsh to do so in Lox.

We could make it a runtime error. We'd let you define an uninitialized variable,
but if you accessed it before assigning to it, a runtime error would occur. It's
not a bad idea, but most dynamically typed languages don't do that. Instead,
we'll keep it simple and say that Lox sets a variable to `nil` if it isn't
explicitly initialized.

```lox
var a;
print a; // "nil".
```

Thus, if there isn't an initializer, we set the value to `null`, which is the
Java representation of Lox's `nil` value. Then we tell the environment to bind
the variable to that value.

Next, we evaluate a variable expression.

^code visit-variable

This simply forwards to the environment which does the heavy lifting to make
sure the variable is defined. With that, we've got rudimentary variables
working. Try this out:

```lox
var a = 1;
var b = 2;
print a + b;
```

We can't reuse *code* yet, but we can start to build up programs that reuse
*data*.

## Assignment

It's possible to create a language that has variables but does not let you
reassign -- or **mutate** -- them. Haskell is one example. SML supports only
mutable references and arrays -- variables cannot be reassigned. Rust steers you
away from mutation by requiring a `mut` modifier to enable assignment.

Mutating a variable is a side effect and, as the name suggests, some language
folks think side effects are <span name="pure">dirty</span> or inelegant. Code
should be pure math that produces values -- crystalline, unchanging ones -- like
an act of divine creation. Not some grubby automaton that beats blobs of data
into shape, one imperative grunt at a time.

<aside name="pure">

I find it delightful that the same group of people who pride themselves on
dispassionate logic are also the ones who can't resist emotionally loaded terms
for their work: "pure", "side effect", "lazy", "persistent", "first-class",
"higher-order".

</aside>

Lox is not so austere. Lox is an imperative language, and mutation comes with
the territory. Adding support for assignment doesn't require much work. Global
variables already support redefinition, so most of the machinery is there now.
Mainly, we're missing an explicit assignment notation.

### Assignment syntax

That little `=` syntax is more complex than it might seem. Like most C-derived
languages, assignment is an <span name="assign">expression</span> and not a
statement. As in C, it is the lowest precedence expression form. That means the
rule slots between `expression` and `equality` (the next lowest precedence
expression).

<aside name="assign">

In some other languages, like Pascal, Python, and Go, assignment is a statement.

</aside>

```ebnf
expression     → assignment ;
assignment     → IDENTIFIER "=" assignment
               | equality ;
```

This says an `assignment` is either an identifier followed by an `=` and an
expression for the value, or an `equality` (and thus any other) expression.
Later, `assignment` will get more complex when we add property setters on
objects, like:

```lox
instance.field = "value";
```

The easy part is adding the <span name="assign-ast">new syntax tree node</span>.

^code assign-expr (1 before, 1 after)

<aside name="assign-ast">

The generated code for the new node is in [Appendix II][appendix-assign].

[appendix-assign]: appendix-ii.html#assign-expression

</aside>

It has a token for the variable being assigned to, and an expression for the new
value. After you run the AstGenerator to get the new Expr.Assign class, swap out
the body of the parser's existing `expression()` method to match the updated
rule.

^code expression (1 before, 1 after)

Here is where it gets tricky. A single token lookahead recursive descent parser
can't see far enough to tell that it's parsing an assignment until *after* it
has gone through the left-hand side and stumbled onto the `=`. You might wonder
why it even needs to. After all, we don't know we're parsing a `+` expression
until after we've finished parsing the left operand.

The difference is that the left-hand side of an assignment isn't an expression
that evaluates to a value. It's a sort of pseudo-expression that evaluates to a
"thing" you can assign to. Consider:

```lox
var a = "before";
a = "value";
```

On the second line, we don't *evaluate* `a` (which would return the string
"before"). We figure out what variable `a` refers to so we know where to store
the right-hand side expression's value. The [classic terms][l-value] for these
two <span name="l-value">constructs</span> are **l-value** and **r-value**. All
of the expressions that we've seen so far that produce values are r-values. An
l-value "evaluates" to a storage location that you can assign into.

[l-value]: https://en.wikipedia.org/wiki/Value_(computer_science)#lrvalue

<aside name="l-value">

In fact, the names come from assignment expressions: *l*-values appear on the
*left* side of the `=` in an assignment, and *r*-values on the *right*.

</aside>

We want the syntax tree to reflect that an l-value isn't evaluated like a normal
expression. That's why the Expr.Assign node has a *Token* for the left-hand
side, not an Expr. The problem is that the parser doesn't know it's parsing an
l-value until it hits the `=`. In a complex l-value, that may occur <span
name="many">many</span> tokens later.

```lox
makeList().head.next = node;
```

<aside name="many">

Since the receiver of a field assignment can be any expression, and expressions
can be as long as you want to make them, it may take an *unbounded* number of
tokens of lookahead to find the `=`.

</aside>

We have only a single token of lookahead, so what do we do? We use a little
trick, and it looks like this:

^code parse-assignment

Most of the code for parsing an assignment expression looks similar to that of
the other binary operators like `+`. We parse the left-hand side, which can be
any expression of higher precedence. If we find an `=`, we parse the right-hand
side and then wrap it all up in an assignment expression tree node.

<aside name="no-throw">

We *report* an error if the left-hand side isn't a valid assignment target, but
we don't *throw* it because the parser isn't in a confused state where we need
to go into panic mode and synchronize.

</aside>

One slight difference from binary operators is that we don't loop to build up a
sequence of the same operator. Since assignment is right-associative, we instead
recursively call `assignment()` to parse the right-hand side.

The trick is that right before we create the assignment expression node, we look
at the left-hand side expression and figure out what kind of assignment target
it is. We convert the r-value expression node into an l-value representation.

This conversion works because it turns out that every valid assignment target
happens to also be <span name="converse">valid syntax</span> as a normal
expression. Consider a complex field assignment like:

<aside name="converse">

You can still use this trick even if there are assignment targets that are not
valid expressions. Define a **cover grammar**, a looser grammar that accepts
all of the valid expression *and* assignment target syntaxes. When you hit
an `=`, report an error if the left-hand side isn't within the valid assignment
target grammar. Conversely, if you *don't* hit an `=`, report an error if the
left-hand side isn't a valid *expression*.

</aside>

```lox
newPoint(x + 2, 0).y = 3;
```

The left-hand side of that assignment could also work as a valid expression.

```lox
newPoint(x + 2, 0).y;
```

The first example sets the field, the second gets it.

This means we can parse the left-hand side *as if it were* an expression and
then after the fact produce a syntax tree that turns it into an assignment
target. If the left-hand side expression isn't a <span name="paren">valid</span>
assignment target, we fail with a syntax error. That ensures we report an error
on code like this:

```lox
a + b = c;
```

<aside name="paren">

Way back in the parsing chapter, I said we represent parenthesized expressions
in the syntax tree because we'll need them later. This is why. We need to be
able to distinguish these cases:

```lox
a = 3;   // OK.
(a) = 3; // Error.
```

</aside>

Right now, the only valid target is a simple variable expression, but we'll add
fields later. The end result of this trick is an assignment expression tree node
that knows what it is assigning to and has an expression subtree for the value
being assigned. All with only a single token of lookahead and no backtracking.

### Assignment semantics

We have a new syntax tree node, so our interpreter gets a new visit method.

^code visit-assign

For obvious reasons, it's similar to variable declaration. It evaluates the
right-hand side to get the value, then stores it in the named variable. Instead
of using `define()` on Environment, it calls this new method:

^code environment-assign

The key difference between assignment and definition is that assignment is not
<span name="new">allowed</span> to create a *new* variable. In terms of our
implementation, that means it's a runtime error if the key doesn't already exist
in the environment's variable map.

<aside name="new">

Unlike Python and Ruby, Lox doesn't do [implicit variable declaration][].

[implicit variable declaration]: #design-note

</aside>

The last thing the `visit()` method does is return the assigned value. That's
because assignment is an expression that can be nested inside other expressions,
like so:

```lox
var a = 1;
print a = 2; // "2".
```

Our interpreter can now create, read, and modify variables. It's about as
sophisticated as early <span name="basic">BASICs</span>. Global variables are
simple, but writing a large program when any two chunks of code can accidentally
step on each other's state is no fun. We want *local* variables, which means
it's time for *scope*.

<aside name="basic">

Maybe a little better than that. Unlike some old BASICs, Lox can handle variable
names longer than two characters.

</aside>

## Scope

A **scope** defines a region where a name maps to a certain entity. Multiple
scopes enable the same name to refer to different things in different contexts.
In my house, "Bob" usually refers to me. But maybe in your town you know a
different Bob. Same name, but different dudes based on where you say it.

<span name="lexical">**Lexical scope**</span> (or the less commonly heard
**static scope**) is a specific style of scoping where the text of the program
itself shows where a scope begins and ends. In Lox, as in most modern languages,
variables are lexically scoped. When you see an expression that uses some
variable, you can figure out which variable declaration it refers to just by
statically reading the code.

<aside name="lexical">

"Lexical" comes from the Greek "lexikos" which means "related to words". When we
use it in programming languages, it usually means a thing you can figure out
from source code itself without having to execute anything.

Lexical scope came onto the scene with ALGOL. Earlier languages were often
dynamically scoped. Computer scientists back then believed dynamic scope was
faster to execute. Today, thanks to early Scheme hackers, we know that isn't
true. If anything, it's the opposite.

Dynamic scope for variables lives on in some corners. Emacs Lisp defaults to
dynamic scope for variables. The [`binding`][binding] macro in Clojure provides
it. The widely disliked [`with` statement][with] in JavaScript turns properties
on an object into dynamically scoped variables.

[binding]: http://clojuredocs.org/clojure.core/binding
[with]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/with

</aside>

For example:

```lox
{
  var a = "first";
  print a; // "first".
}

{
  var a = "second";
  print a; // "second".
}
```

Here, we have two blocks with a variable `a` declared in each of them. You and
I can tell just from looking at the code that the use of `a` in the first
`print` statement refers to the first `a`, and the second one refers to the
second.

<img src="image/statements-and-state/blocks.png" alt="An environment for each 'a'." />

This is in contrast to **dynamic scope** where you don't know what a name refers
to until you execute the code. Lox doesn't have dynamically scoped *variables*,
but methods and fields on objects are dynamically scoped.

```lox
class Saxophone {
  play() {
    print "Careless Whisper";
  }
}

class GolfClub {
  play() {
    print "Fore!";
  }
}

fun playIt(thing) {
  thing.play();
}
```

When `playIt()` calls `thing.play()`, we don't know if we're about to hear
"Careless Whisper" or "Fore!" It depends on whether you pass a Saxophone or a
GolfClub to the function, and we don't know that until runtime.

Scope and environments are close cousins. The former is the theoretical concept,
and the latter is the machinery that implements it. As our interpreter works its
way through code, syntax tree nodes that affect scope will change the
environment. In a C-ish syntax like Lox's, scope is controlled by curly-braced
blocks. (That's why we call it **block scope**.)

```lox
{
  var a = "in block";
}
print a; // Error! No more "a".
```

The beginning of a block introduces a new local scope, and that scope ends when
execution passes the closing `}`. Any variables declared inside the block
disappear.

### Nesting and shadowing

A first cut at implementing block scope might work like this:

1.  As we visit each statement inside the block, keep track of any variables
    declared.

2.  After the last statement is executed, tell the environment to delete all of
    those variables.

That would work for the previous example. But remember, one motivation for
local scope is encapsulation -- a block of code in one corner of the program
shouldn't interfere with some other block. Check this out:

```lox
// How loud?
var volume = 11;

// Silence.
volume = 0;

// Calculate size of 3x4x5 cuboid.
{
  var volume = 3 * 4 * 5;
  print volume;
}
```

Look at the block where we calculate the volume of the cuboid using a local
declaration of `volume`. After the block exits, the interpreter will delete the
*global* `volume` variable. That ain't right. When we exit the block, we should
remove any variables declared inside the block, but if there is a variable with
the same name declared outside of the block, *that's a different variable*. It
shouldn't get touched.

When a local variable has the same name as a variable in an enclosing scope, it
**shadows** the outer one. Code inside the block can't see it any more -- it is
hidden in the "shadow" cast by the inner one -- but it's still there.

When we enter a new block scope, we need to preserve variables defined in outer
scopes so they are still around when we exit the inner block. We do that by
defining a fresh environment for each block containing only the variables
defined in that scope. When we exit the block, we discard its environment and
restore the previous one.

We also need to handle enclosing variables that are *not* shadowed.

```lox
var global = "outside";
{
  var local = "inside";
  print global + local;
}
```

Here, `global` lives in the outer global environment and `local` is defined
inside the block's environment. In that `print` statement, both of those
variables are in scope. In order to find them, the interpreter must search not
only the current innermost environment, but also any enclosing ones.

We implement this by <span name="cactus">chaining</span> the environments
together. Each environment has a reference to the environment of the immediately
enclosing scope. When we look up a variable, we walk that chain from innermost
out until we find the variable. Starting at the inner scope is how we make local
variables shadow outer ones.

<img src="image/statements-and-state/chaining.png" alt="Environments for each scope, linked together." />

<aside name="cactus">

While the interpreter is running, the environments form a linear list of
objects, but consider the full set of environments created during the entire
execution. An outer scope may have multiple blocks nested within it, and each
will point to the outer one, giving a tree-like structure, though only one path
through the tree exists at a time.

The boring name for this is a [**parent-pointer tree**][parent pointer], but I
much prefer the evocative **cactus stack**.

[parent pointer]: https://en.wikipedia.org/wiki/Parent_pointer_tree

<img class="above" src="image/statements-and-state/cactus.png" alt="Each branch points to its parent. The root is global scope." />

</aside>

Before we add block syntax to the grammar, we'll beef up our Environment class
with support for this nesting. First, we give each environment a reference to
its enclosing one.

^code enclosing-field (1 before, 1 after)

This field needs to be initialized, so we add a couple of constructors.

^code environment-constructors

The no-argument constructor is for the global scope's environment, which ends
the chain. The other constructor creates a new local scope nested inside the
given outer one.

We don't have to touch the `define()` method -- a new variable is always
declared in the current innermost scope. But variable lookup and assignment work
with existing variables and they need to walk the chain to find them. First,
lookup:

^code environment-get-enclosing (2 before, 3 after)

If the variable isn't found in this environment, we simply try the enclosing
one. That in turn does the same thing <span name="recurse">recursively</span>,
so this will ultimately walk the entire chain. If we reach an environment with
no enclosing one and still don't find the variable, then we give up and report
an error as before.

Assignment works the same way.

<aside name="recurse">

It's likely faster to iteratively walk the chain, but I think the recursive
solution is prettier. We'll do something *much* faster in clox.

</aside>

^code environment-assign-enclosing (4 before, 1 after)

Again, if the variable isn't in this environment, it checks the outer one,
recursively.

### Block syntax and semantics

Now that Environments nest, we're ready to add blocks to the language. Behold
the grammar:

```ebnf
statement      → exprStmt
               | printStmt
               | block ;

block          → "{" declaration* "}" ;
```

A block is a (possibly empty) series of statements or declarations surrounded by
curly braces. A block is itself a statement and can appear anywhere a statement
is allowed. The <span name="block-ast">syntax tree</span> node looks like this:

^code block-ast (1 before, 1 after)

<aside name="block-ast">

The generated code for the new node is in [Appendix II][appendix-block].

[appendix-block]: appendix-ii.html#block-statement

</aside>

<span name="generate">It</span> contains the list of statements that are inside
the block. Parsing is straightforward. Like other statements, we detect the
beginning of a block by its leading token -- in this case the `{`. In the
`statement()` method, we add:

<aside name="generate">

As always, don't forget to run "GenerateAst.java".

</aside>

^code parse-block (1 before, 2 after)

All the real work happens here:

^code block

We <span name="list">create</span> an empty list and then parse statements and
add them to the list until we reach the end of the block, marked by the closing
`}`. Note that the loop also has an explicit check for `isAtEnd()`. We have to
be careful to avoid infinite loops, even when parsing invalid code. If the user
forgets a closing `}`, the parser needs to not get stuck.

<aside name="list">

Having `block()` return the raw list of statements and leaving it to
`statement()` to wrap the list in a Stmt.Block looks a little odd. I did it that
way because we'll reuse `block()` later for parsing function bodies and we don't
want that body wrapped in a Stmt.Block.

</aside>

That's it for syntax. For semantics, we add another visit method to Interpreter.

^code visit-block

To execute a block, we create a new environment for the block's scope and pass
it off to this other method:

^code execute-block

This new method executes a list of statements in the context of a given <span
name="param">environment</span>. Up until now, the `environment` field in
Interpreter always pointed to the same environment -- the global one. Now, that
field represents the *current* environment. That's the environment that
corresponds to the innermost scope containing the code to be executed.

To execute code within a given scope, this method updates the interpreter's
`environment` field, visits all of the statements, and then restores the
previous value. As is always good practice in Java, it restores the previous
environment using a finally clause. That way it gets restored even if an
exception is thrown.

<aside name="param">

Manually changing and restoring a mutable `environment` field feels inelegant.
Another classic approach is to explicitly pass the environment as a parameter to
each visit method. To "change" the environment, you pass a different one as you
recurse down the tree. You don't have to restore the old one, since the new one
lives on the Java stack and is implicitly discarded when the interpreter returns
from the block's visit method.

I considered that for jlox, but it's kind of tedious and verbose adding an
environment parameter to every single visit method. To keep the book a little
simpler, I went with the mutable field.

</aside>

Surprisingly, that's all we need to do in order to fully support local
variables, nesting, and shadowing. Go ahead and try this out:

```lox
var a = "global a";
var b = "global b";
var c = "global c";
{
  var a = "outer a";
  var b = "outer b";
  {
    var a = "inner a";
    print a;
    print b;
    print c;
  }
  print a;
  print b;
  print c;
}
print a;
print b;
print c;
```

Our little interpreter can remember things now. We are inching closer to
something resembling a full-featured programming language.

<div class="challenges">

## Challenges

1.  The REPL no longer supports entering a single expression and automatically
    printing its result value. That's a drag. Add support to the REPL to let
    users type in both statements and expressions. If they enter a statement,
    execute it. If they enter an expression, evaluate it and display the result
    value.

2.  Maybe you want Lox to be a little more explicit about variable
    initialization. Instead of implicitly initializing variables to `nil`, make
    it a runtime error to access a variable that has not been initialized or
    assigned to, as in:

    ```lox
    // No initializers.
    var a;
    var b;

    a = "assigned";
    print a; // OK, was assigned first.

    print b; // Error!
    ```

3.  What does the following program do?

    ```lox
    var a = 1;
    {
      var a = a + 2;
      print a;
    }
    ```

    What did you *expect* it to do? Is it what you think it should do? What
    does analogous code in other languages you are familiar with do? What do
    you think users will expect this to do?

</div>

<div class="design-note">

## Design Note: Implicit Variable Declaration

Lox has distinct syntax for declaring a new variable and assigning to an
existing one. Some languages collapse those to only assignment syntax. Assigning
to a non-existent variable automatically brings it into being. This is called
**implicit variable declaration** and exists in Python, Ruby, and CoffeeScript,
among others. JavaScript has an explicit syntax to declare variables, but can
also create new variables on assignment. Visual Basic has [an option to enable
or disable implicit variables][vb].

[vb]: https://msdn.microsoft.com/en-us/library/xe53dz5w(v=vs.100).aspx

When the same syntax can assign or create a variable, each language must decide
what happens when it isn't clear about which behavior the user intends. In
particular, each language must choose how implicit declaration interacts with
shadowing, and which scope an implicitly declared variable goes into.

*   In Python, assignment always creates a variable in the current function's
    scope, even if there is a variable with the same name declared outside of
    the function.

*   Ruby avoids some ambiguity by having different naming rules for local and
    global variables. However, blocks in Ruby (which are more like closures than
    like "blocks" in C) have their own scope, so it still has the problem.
    Assignment in Ruby assigns to an existing variable outside of the current
    block if there is one with the same name. Otherwise, it creates a new
    variable in the current block's scope.

*   CoffeeScript, which takes after Ruby in many ways, is similar. It explicitly
    disallows shadowing by saying that assignment always assigns to a variable
    in an outer scope if there is one, all the way up to the outermost global
    scope. Otherwise, it creates the variable in the current function scope.

*   In JavaScript, assignment modifies an existing variable in any enclosing
    scope, if found. If not, it implicitly creates a new variable in the
    *global* scope.

The main advantage to implicit declaration is simplicity. There's less syntax
and no "declaration" concept to learn. Users can just start assigning stuff and
the language figures it out.

Older, statically typed languages like C benefit from explicit declaration
because they give the user a place to tell the compiler what type each variable
has and how much storage to allocate for it. In a dynamically typed,
garbage-collected language, that isn't really necessary, so you can get away
with making declarations implicit. It feels a little more "scripty", more "you
know what I mean".

But is that a good idea? Implicit declaration has some problems.

*   A user may intend to assign to an existing variable, but may have misspelled
    it. The interpreter doesn't know that, so it goes ahead and silently creates
    some new variable and the variable the user wanted to assign to still has
    its old value. This is particularly heinous in JavaScript where a typo will
    create a *global* variable, which may in turn interfere with other code.

*   JS, Ruby, and CoffeeScript use the presence of an existing variable with the
    same name -- even in an outer scope -- to determine whether or not an
    assignment creates a new variable or assigns to an existing one. That means
    adding a new variable in a surrounding scope can change the meaning of
    existing code. What was once a local variable may silently turn into an
    assignment to that new outer variable.

*   In Python, you may *want* to assign to some variable outside of the current
    function instead of creating a new variable in the current one, but you
    can't.

Over time, the languages I know with implicit variable declaration ended up
adding more features and complexity to deal with these problems.

*   Implicit declaration of global variables in JavaScript is universally
    considered a mistake today. "Strict mode" disables it and makes it a compile
    error.

*   Python added a `global` statement to let you explicitly assign to a global
    variable from within a function. Later, as functional programming and nested
    functions became more popular, they added a similar `nonlocal` statement to
    assign to variables in enclosing functions.

*   Ruby extended its block syntax to allow declaring certain variables to be
    explicitly local to the block even if the same name exists in an outer
    scope.

Given those, I think the simplicity argument is mostly lost. There is an
argument that implicit declaration is the right *default* but I personally find
that less compelling.

My opinion is that implicit declaration made sense in years past when most
scripting languages were heavily imperative and code was pretty flat. As
programmers have gotten more comfortable with deep nesting, functional
programming, and closures, it's become much more common to want access to
variables in outer scopes. That makes it more likely that users will run into
the tricky cases where it's not clear whether they intend their assignment to
create a new variable or reuse a surrounding one.

So I prefer explicitly declaring variables, which is why Lox requires it.

</div>


================================================
FILE: book/strings.md
================================================
> "Ah? A small aversion to menial labor?" The doctor cocked an eyebrow.
> "Understandable, but misplaced. One should treasure those hum-drum
> tasks that keep the body occupied but leave the mind and heart unfettered."
>
> <cite>Tad Williams, <em>The Dragonbone Chair</em></cite>

Our little VM can represent three types of values right now: numbers, Booleans,
and `nil`. Those types have two important things in common: they're immutable
and they're small. Numbers are the largest, and they still fit into two 64-bit
words. That's a small enough price that we can afford to pay it for all values,
even Booleans and nils which don't need that much space.

Strings, unfortunately, are not so petite. There's no maximum length for a
string. Even if we were to artificially cap it at some contrived limit like
<span name="pascal">255</span> characters, that's still too much memory to spend
on every single value.

<aside name="pascal">

UCSD Pascal, one of the first implementations of Pascal, had this exact limit.
Instead of using a terminating null byte to indicate the end of the string like
C, Pascal strings started with a length value. Since UCSD used only a single
byte to store the length, strings couldn't be any longer than 255 characters.

<img src="image/strings/pstring.png" alt="The Pascal string 'hello' with a length byte of 5 preceding it." />

</aside>

We need a way to support values whose sizes vary, sometimes greatly. This is
exactly what dynamic allocation on the heap is designed for. We can allocate as
many bytes as we need. We get back a pointer that we'll use to keep track of the
value as it flows through the VM.

## Values and Objects

Using the heap for larger, variable-sized values and the stack for smaller,
atomic ones leads to a two-level representation. Every Lox value that you can
store in a variable or return from an expression will be a Value. For small,
fixed-size types like numbers, the payload is stored directly inside the Value
struct itself.

If the object is larger, its data lives on the heap. Then the Value's payload is
a *pointer* to that blob of memory. We'll eventually have a handful of
heap-allocated types in clox: strings, instances, functions, you get the idea.
Each type has its own unique data, but there is also state they all share that
[our future garbage collector][gc] will use to manage their memory.

<img src="image/strings/value.png" class="wide" alt="Field layout of number and obj values." />

[gc]: garbage-collection.html

We'll call this common representation <span name="short">"Obj"</span>. Each Lox
value whose state lives on the heap is an Obj. We can thus use a single new
ValueType case to refer to all heap-allocated types.

<aside name="short">

"Obj" is short for "object", natch.

</aside>

^code val-obj (1 before, 1 after)

When a Value's type is `VAL_OBJ`, the payload is a pointer to the heap memory,
so we add another case to the union for that.

^code union-object (1 before, 1 after)

As we did with the other value types, we crank out a couple of helpful macros
for working with Obj values.

^code is-obj (1 before, 2 after)

This evaluates to `true` if the given Value is an Obj. If so, we can use this:

^code as-obj (2 before, 1 after)

It extracts the Obj pointer from the value. We can also go the other way.

^code obj-val (1 before, 2 after)

This takes a bare Obj pointer and wraps it in a full Value.

## Struct Inheritance

Every heap-allocated value is an Obj, but <span name="objs">Objs</span> are
not all the same. For strings, we need the array of characters. When we get to
instances, they will need their data fields. A function object will need its
chunk of bytecode. How do we handle different payloads and sizes? We can't use
another union like we did for Value since the sizes are all over the place.

<aside name="objs">

No, I don't know how to pronounce "objs" either. Feels like there should be a
vowel in there somewhere.

</aside>

Instead, we'll use another technique. It's been around for ages, to the point
that the C specification carves out specific support for it, but I don't know
that it has a canonical name. It's an example of [*type punning*][pun], but that
term is too broad. In the absence of any better ideas, I'll call it **struct
inheritance**, because it relies on structs and roughly follows how
single-inheritance of state works in object-oriented languages.

[pun]: https://en.wikipedia.org/wiki/Type_punning

Like a tagged union, each Obj starts with a tag field that identifies what kind
of object it is -- string, instance, etc. Following that are the payload fields.
Instead of a union with cases for each type, each type is its own separate
struct. The tricky part is how to treat these structs uniformly since C has no
concept of inheritance or polymorphism. I'll explain that soon, but first lets
get the preliminary stuff out of the way.

The name "Obj" itself refers to a struct that contains the state shared across
all object types. It's sort of like the "base class" for objects. Because of
some cyclic dependencies between values and objects, we forward-declare it in
the "value" module.

^code forward-declare-obj (2 before, 1 after)

And the actual definition is in a new module.

^code object-h

Right now, it contains only the type tag. Shortly, we'll add some other
bookkeeping information for memory management. The type enum is this:

^code obj-type (1 before, 2 after)

Obviously, that will be more useful in later chapters after we add more
heap-allocated types. Since we'll be accessing these tag types frequently, it's
worth making a little macro that extracts the object type tag from a given
Value.

^code obj-type-macro (1 before, 2 after)

That's our foundation.

Now, let's build strings on top of it. The payload for strings is defined in a
separate struct. Again, we need to forward-declare it.

^code forward-declare-obj-string (1 before, 2 after)

The definition lives alongside Obj.

^code obj-string (1 before, 2 after)

A string object contains an array of characters. Those are stored in a separate,
heap-allocated array so that we set aside only as much room as needed for each
string. We also store the number of bytes in the array. This isn't strictly
necessary but lets us tell how much memory is allocated for the string without
walking the character array to find the null terminator.

Because ObjString is an Obj, it also needs the state all Objs share. It
accomplishes that by having its first field be an Obj. C specifies that struct
fields are arranged in memory in the order that they are declared. Also, when
you nest structs, the inner struct's fields are expanded right in place. So the
memory for Obj and for ObjString looks like this:

<img src="image/strings/obj.png" alt="The memory layout for the fields in Obj and ObjString." />

Note how the first bytes of ObjString exactly line up with Obj. This is not a
coincidence -- C <span name="spec">mandates</span> it. This is designed to
enable a clever pattern: You can take a pointer to a struct and safely convert
it to a pointer to its first field and back.

<aside name="spec">

The key part of the spec is:

> &sect; 6.7.2.1 13
>
> Within a structure object, the non-bit-field members and the units in which
> bit-fields reside have addresses that increase in the order in which they
> are declared. A pointer to a structure object, suitably converted, points to
> its initial member (or if that member is a bit-field, then to the unit in
> which it resides), and vice versa. There may be unnamed padding within a
> structure object, but not at its beginning.

</aside>

Given an `ObjString*`, you can safely cast it to `Obj*` and then access the
`type` field from it. Every ObjString "is" an Obj in the OOP sense of "is". When
we later add other object types, each struct will have an Obj as its first
field. Any code that wants to work with all objects can treat them as base
`Obj*` and ignore any other fields that may happen to follow.

You can go in the other direction too. Given an `Obj*`, you can "downcast" it to
an `ObjString*`. Of course, you need to ensure that the `Obj*` pointer you have
does point to the `obj` field of an actual ObjString. Otherwise, you are
unsafely reinterpreting random bits of memory. To detect that such a cast is
safe, we add another macro.

^code is-string (1 before, 2 after)

It takes a Value, not a raw `Obj*` because most code in the VM works with
Values. It relies on this inline function:

^code is-obj-type (2 before, 2 after)

Pop quiz: Why not just put the body of this function right in the macro? What's
different about this one compared to the others? Right, it's because the body
uses `value` twice. A macro is expanded by inserting the argument *expression*
every place the parameter name appears in the body. If a macro uses a parameter
more than once, that expression gets evaluated multiple times.

That's bad if the expression has side effects. If we put the body of
`isObjType()` into the macro definition and then you did, say,

```c
IS_STRING(POP())
```

then it would pop two values off the stack! Using a function fixes that.

As long as we ensure that we set the type tag correctly whenever we create an
Obj of some type, this macro will tell us when it's safe to cast a value to a
specific object type. We can do that using these:

^code as-string (1 before, 2 after)

These two macros take a Value that is expected to contain a pointer to a valid
ObjString on the heap. The first one returns the `ObjString*` pointer. The
second one steps through that to return the character array itself, since that's
often what we'll end up needing.

## Strings

OK, our VM can now represent string values. It's time to add strings to the
language itself. As usual, we begin in the front end. The lexer already
tokenizes string literals, so it's the parser's turn.

^code table-string (1 before, 1 after)

When the parser hits a string token, it calls this parse function:

^code parse-string

This takes the string's characters <span name="escape">directly</span> from the
lexeme. The `+ 1` and `- 2` parts trim the leading and trailing quotation marks.
It then creates a string object, wraps it in a Value, and stuffs it into the
constant table.

<aside name="escape">

If Lox supported string escape sequences like `\n`, we'd translate those here.
Since it doesn't, we can take the characters as they are.

</aside>

To create the string, we use `copyString()`, which is declared in `object.h`.

^code copy-string-h (2 before, 1 after)

The compiler module needs to include that.

^code compiler-include-object (2 before, 1 after)

Our "object" module gets an implementation file where we define the new
function.

^code object-c

First, we allocate a new array on the heap, just big enough for the string's
characters and the trailing <span name="terminator">terminator</span>, using
this low-level macro that allocates an array with a given element type and
count:

^code allocate (2 before, 1 after)

Once we have the array, we copy over the characters from the lexeme and
terminate it.

<aside name="terminator" class="bottom">

We need to terminate the string ourselves because the lexeme points at a range
of characters inside the monolithic source string and isn't terminated.

Since ObjString stores the length explicitly, we *could* leave the character
array unterminated, but slapping a terminator on the end costs us only a byte
and lets us pass the character array to C standard library functions that expect
a terminated string.

</aside>

You might wonder why the ObjString can't just point back to the original
characters in the source string. Some ObjStrings will be created dynamically at
runtime as a result of string operations like concatenation. Those strings
obviously need to dynamically allocate memory for the characters, which means
the string needs to *free* that memory when it's no longer needed.

If we had an ObjString for a string literal, and tried to free its character
array that pointed into the original source code string, bad things would
happen. So, for literals, we preemptively copy the characters over to the heap.
This way, every ObjString reliably owns its character array and can free it.

The real work of creating a string object happens in this function:

^code allocate-string (2 before)

It creates a new ObjString on the heap and then initializes its fields. It's
sort of like a constructor in an OOP language. As such, it first calls the "base
class" constructor to initialize the Obj state, using a new macro.

^code allocate-obj (1 before, 2 after)

<span name="factored">Like</span> the previous macro, this exists mainly to
avoid the need to redundantly cast a `void*` back to the desired type. The
actual functionality is here:

<aside name="factored">

I admit this chapter has a sea of helper functions and macros to wade through. I
try to keep the code nicely factored, but that leads to a scattering of tiny
functions. They will pay off when we reuse them later.

</aside>

^code allocate-object (2 before, 2 after)

It allocates an object of the given size on the heap. Note that the size is
*not* just the size of Obj itself. The caller passes in the number of bytes so
that there is room for the extra payload fields needed by the specific object
type being created.

Then it initializes the Obj state -- right now, that's just the type tag. This
function returns to `allocateString()`, which finishes initializing the ObjString
fields. <span name="viola">*Voilà*</span>, we can compile and execute string
literals.

<aside name="viola">

<img src="image/strings/viola.png" class="above" alt="A viola." />

Don't get "voilà" confused with "viola". One means "there it is" and the other
is a string instrument, the middle child between a violin and a cello. Yes, I
did spend two hours drawing a viola just to mention that.

</aside>

## Operations on Strings

Our fancy strings are there, but they don't do much of anything yet. A good
first step is to make the existing print code not barf on the new value type.

^code call-print-object (1 before, 1 after)

If the value is a heap-allocated object, it defers to a helper function over in
the "object" module.

^code print-object-h (1 before, 2 after)

The implementation looks like this:

^code print-object

We have only a single object type now, but this function will sprout additional
switch cases in later chapters. For string objects, it simply <span
name="term-2">prints</span> the character array as a C string.

<aside name="term-2">

I told you terminating the string would come in handy.

</aside>

The equality operators also need to gracefully handle strings. Consider:

```lox
"string" == "string"
```

These are two separate string literals. The compiler will make two separate
calls to `copyString()`, create two distinct ObjString objects and store them as
two constants in the chunk. They are different objects in the heap. But our
users (and thus we) expect strings to have value equality. The above expression
should evaluate to `true`. That requires a little special support.

^code strings-equal (1 before, 1 after)

If the two values are both strings, then they are equal if their character
arrays contain the same characters, regardless of whether they are two separate
objects or the exact same one. This does mean that string equality is slower
than equality on other types since it has to walk the whole string. We'll revise
that [later][hash], but this gives us the right semantics for now.

[hash]: hash-tables.html

Finally, in order to use `memcmp()` and the new stuff in the "object" module, we
need a couple of includes. Here:

^code value-include-string (1 before, 2 after)

And here:

^code value-include-object (2 before, 1 after)

### Concatenation

Full-grown languages provide lots of operations for working with strings --
access to individual characters, the string's length, changing case, splitting,
joining, searching, etc. When you implement your language, you'll likely want
all that. But for this book, we keep things *very* minimal.

The only interesting operation we support on strings is `+`. If you use that
operator on two string objects, it produces a new string that's a concatenation
of the two operands. Since Lox is dynamically typed, we can't tell which
behavior is needed at compile time because we don't know the types of the
operands until runtime. Thus, the `OP_ADD` instruction dynamically inspects the
operands and chooses the right operation.

^code add-strings (1 before, 1 after)

If both operands are strings, it concatenates. If they're both numbers, it adds
them. Any other <span name="convert">combination</span> of operand types is a
runtime error.

<aside name="convert" class="bottom">

This is more conservative than most languages. In other languages, if one
operand is a string, the other can be any type and it will be implicitly
converted to a string before concatenating the two.

I think that's a fine feature, but would require writing tedious "convert to
string" code for each type, so I left it out of Lox.

</aside>

To concatenate strings, we define a new function.

^code concatenate

It's pretty verbose, as C code that works with strings tends to be. First, we
calculate the length of the result string based on the lengths of the operands.
We allocate a character array for the result and then copy the two halves in. As
always, we carefully ensure the string is terminated.

In order to call `memcpy()`, the VM needs an include.

^code vm-include-string (1 before, 2 after)

Finally, we produce an ObjString to contain those characters. This time we use a
new function, `takeString()`.

^code take-string-h (2 before, 1 after)

The implementation looks like this:

^code take-string

The previous `copyString()` function assumes it *cannot* take ownership of the
characters you pass in. Instead, it conservatively creates a copy of the
characters on the heap that the ObjString can own. That's the right thing for
string literals where the passed-in characters are in the middle of the source
string.

But, for concatenation, we've already dynamically allocated a character array on
the heap. Making another copy of that would be redundant (and would mean
`concatenate()` has to remember to free its copy). Instead, this function claims
ownership of the string you give it.

As usual, stitching this functionality together requires a couple of includes.

^code vm-include-object-memory (1 before, 1 after)

## Freeing Objects

Behold this innocuous-seeming expression:

```lox
"st" + "ri" + "ng"
```

When the compiler chews through this, it allocates an ObjString for each of
those three string literals and stores them in the chunk's constant table and
generates this <span name="stack">bytecode</span>:

<aside name="stack">

Here's what the stack looks like after each instruction:

<img src="image/strings/stack.png" alt="The state of the stack at each instruction." />

</aside>

```text
0000    OP_CONSTANT         0 "st"
0002    OP_CONSTANT         1 "ri"
0004    OP_ADD
0005    OP_CONSTANT         2 "ng"
0007    OP_ADD
0008    OP_RETURN
```

The first two instructions push `"st"` and `"ri"` onto the stack. Then the
`OP_ADD` pops those and concatenates them. That dynamically allocates a new
`"stri"` string on the heap. The VM pushes that and then pushes the `"ng"`
constant. The last `OP_ADD` pops `"stri"` and `"ng"`, concatenates them, and
pushes the result: `"string"`. Great, that's what we expect.

But, wait. What happened to that `"stri"` string? We dynamically allocated it,
then the VM discarded it after concatenating it with `"ng"`. We popped it from
the stack and no longer have a reference to it, but we never freed its memory.
We've got ourselves a classic memory leak.

Of course, it's perfectly fine for the *Lox program* to forget about
intermediate strings and not worry about freeing them. Lox automatically manages
memory on the user's behalf. The responsibility to manage memory doesn't
*disappear*. Instead, it falls on our shoulders as VM implementers.

The full <span name="borrowed">solution</span> is a [garbage collector][gc] that
reclaims unused memory while the program is running. We've got some other stuff
to get in place before we're ready to tackle that project. Until then, we are
living on borrowed time. The longer we wait to add the collector, the harder it
is to do.

<aside name="borrowed">

I've seen a number of people implement large swathes of their language before
trying to start on the GC. For the kind of toy programs you typically run while
a language is being developed, you actually don't run out of memory before
reaching the end of the program, so this gets you surprisingly far.

But that underestimates how *hard* it is to add a garbage collector later. The
collector *must* ensure it can find every bit of memory that *is* still being
used so that it doesn't collect live data. There are hundreds of places a
language implementation can squirrel away a reference to some object. If you
don't find all of them, you get nightmarish bugs.

I've seen language implementations die because it was too hard to get the GC in
later. If your language needs GC, get it working as soon as you can. It's a
crosscutting concern that touches the entire codebase.

</aside>

Today, we should at least do the bare minimum: avoid *leaking* memory by making
sure the VM can still find every allocated object even if the Lox program itself
no longer references them. There are many sophisticated techniques that advanced
memory managers use to allocate and track memory for objects. We're going to
take the simplest practical approach.

We'll create a linked list that stores every Obj. The VM can traverse that
list to find every single object that has been allocated on the heap, whether or
not the user's program or the VM's stack still has a reference to it.

We could define a separate linked list node struct but then we'd have to
allocate those too. Instead, we'll use an **intrusive list** -- the Obj struct
itself will be the linked list node. Each Obj gets a pointer to the next Obj in
the chain.

^code next-field (2 before, 1 after)

The VM stores a pointer to the head of the list.

^code objects-root (1 before, 1 after)

When we first initialize the VM, there are no allocated objects.

^code init-objects-root (1 before, 1 after)

Every time we allocate an Obj, we insert it in the list.

^code add-to-list (1 before, 1 after)

Since this is a singly linked list, the easiest place to insert it is as the
head. That way, we don't need to also store a pointer to the tail and keep it
updated.

The "object" module is directly using the global `vm` variable from the "vm"
module, so we need to expose that externally.

^code extern-vm (2 before, 1 after)

Eventually, the garbage collector will free memory while the VM is still
running. But, even then, there will usually be unused objects still lingering in
memory when the user's program completes. The VM should free those too.

There's no sophisticated logic for that. Once the program is done, we can free
*every* object. We can and should implement that now.

^code call-free-objects (1 before, 1 after)

That empty function we defined [way back when][vm] finally does something! It
calls this:

[vm]: a-virtual-machine.html#an-instruction-execution-machine

^code free-objects-h (1 before, 2 after)

Here's how we free the objects:

^code free-objects

This is a CS 101 textbook implementation of walking a linked list and freeing
its nodes. For each node, we call:

^code free-object

We aren't only freeing the Obj itself. Since some object types also allocate
other memory that they own, we also need a little type-specific code to handle
each object type's special needs. Here, that means we free the character array
and then free the ObjString. Those both use one last memory management macro.

^code free (1 before, 2 after)

It's a tiny <span name="free">wrapper</span> around `reallocate()` that
"resizes" an allocation down to zero bytes.

<aside name="free">

Using `reallocate()` to free memory might seem pointless. Why not just call
`free()`? Later, this will help the VM track how much memory is still being
used. If all allocation and freeing goes through `reallocate()`, it's easy to
keep a running count of the number of bytes of allocated memory.

</aside>

As usual, we need an include to wire everything together.

^code memory-include-object (1 before, 2 after)

Then in the implementation file:

^code memory-include-vm (1 before, 2 after)

With this, our VM no longer leaks memory. Like a good C program, it cleans up
its mess before exiting. But it doesn't free any objects while the VM is
running. Later, when it's possible to write longer-running Lox programs, the VM
will eat more and more memory as it goes, not relinquishing a single byte until
the entire program is done.

We won't address that until we've added [a real garbage collector][gc], but this
is a big step. We now have the infrastructure to support a variety of different
kinds of dynamically allocated objects. And we've used that to add strings to
clox, one of the most used types in most programming languages. Strings in turn
enable us to build another fundamental data type, especially in dynamic
languages: the venerable [hash table][]. But that's for the next chapter...

[hash table]: hash-tables.html

<div class="challenges">

## Challenges

1.  Each string requires two separate dynamic allocations -- one for the
    ObjString and a second for the character array. Accessing the characters
    from a value requires two pointer indirections, which can be bad for
    performance. A more efficient solution relies on a technique called
    **[flexible array members][]**. Use that to store the ObjString and its
    character array in a single contiguous allocation.

2.  When we create the ObjString for each string literal, we copy the characters
    onto the heap. That way, when the string is later freed, we know it is safe
    to free the characters too.

    This is a simpler approach but wastes some memory, which might be a problem
    on very constrained devices. Instead, we could keep track of which
    ObjStrings own their character array and which are "constant strings" that
    just point back to the original source string or some other non-freeable
    location. Add support for this.

3.  If Lox was your language, what would you have it do when a user tries to use
    `+` with one string operand and the other some other type? Justify your
    choice. What do other languages do?

[flexible array members]: https://en.wikipedia.org/wiki/Flexible_array_member

</div>

<div class="design-note">

## Design Note: String Encoding

In this book, I try not to shy away from the gnarly problems you'll run into in
a real language implementation. We might not always use the most *sophisticated*
solution -- it's an intro book after all -- but I don't think it's honest to
pretend the problem doesn't exist at all. However, I did skirt around one really
nasty conundrum: deciding how to represent strings.

There are two facets to a string encoding:

*   **What is a single "character" in a string?** How many different values are
    there and what do they represent? The first widely adopted standard answer
    to this was [ASCII][]. It gave you 127 different character values and
    specified what they were. It was great... if you only ever cared about
    English. While it has weird, mostly forgotten characters like "record
    separator" and "synchronous idle", it doesn't have a single umlaut, acute,
    or grave. It can't represent "jalapeño", "naïve", <span
    name="gruyere">"Gruyère"</span>, or "Mötley Crüe".

    <aside name="gruyere">

    It goes without saying that a language that does not let one discuss Gruyère
    or Mötley Crüe is a language not worth using.

    </aside>

    Next came [Unicode][]. Initially, it supported 16,384 different characters
    (**code points**), which fit nicely in 16 bits with a couple of bits to
    spare. Later that grew and grew, and now there are well over 100,000
    different code points including such vital instruments of human
    communication as 💩 (Unicode Character 'PILE OF POO', `U+1F4A9`).

    Even that long list of code points is not enough to represent each possible
    visible glyph a language might support. To handle that, Unicode also has
    **combining characters** that modify a preceding code point. For example,
    "a" followed by the combining character "¨" gives you "ä". (To make things
    more confusing Unicode *also* has a single code point that looks like "ä".)

    If a user accesses the fourth "character" in "naïve", do they expect to get
    back "v" or &ldquo;¨&rdquo;? The former means they are thinking of each code
    point and its combining character as a single unit -- what Unicode calls an
    **extended grapheme cluster** -- the latter means they are thinking in
    individual code points. Which do your users expect?

*   **How is a single unit represented in memory?** Most systems using ASCII
    gave a single byte to each character and left the high bit unused. Unicode
    has a handful of common encodings. UTF-16 packs most code points into 16
    bits. That was great when every code point fit in that size. When that
    overflowed, they added *surrogate pairs* that use multiple 16-bit code units
    to represent a single code point. UTF-32 is the next evolution of
    UTF-16 -- it gives a full 32 bits to each and every code point.

    UTF-8 is more complex than either of those. It uses a variable number of
    bytes to encode a code point. Lower-valued code points fit in fewer bytes.
    Since each character may occupy a different number of bytes, you can't
    directly index into the string to find a specific code point. If you want,
    say, the 10th code point, you don't know how many bytes into the string that
    is without walking and decoding all of the preceding ones.

[ascii]: https://en.wikipedia.org/wiki/ASCII
[unicode]: https://en.wikipedia.org/wiki/Unicode

Choosing a character representation and encoding involves fundamental
trade-offs. Like many things in engineering, there's no <span
name="python">perfect</span> solution:

<aside name="python">

An example of how difficult this problem is comes from Python. The achingly long
transition from Python 2 to 3 is painful mostly because of its changes around
string encoding.

</aside>

*   ASCII is memory efficient and fast, but it kicks non-Latin languages to the
    side.

*   UTF-32 is fast and supports the whole Unicode range, but wastes a lot of
    memory given that most code points do tend to be in the lower range of
    values, where a full 32 bits aren't needed.

*   UTF-8 is memory efficient and supports the whole Unicode range, but its
    variable-length encoding makes it slow to access arbitrary code points.

*   UTF-16 is worse than all of them -- an ugly consequence of Unicode
    outgrowing its earlier 16-bit range. It's less memory efficient than UTF-8
    but is still a variable-length encoding thanks to surrogate pairs. Avoid it
    if you can. Alas, if your language needs to run on or interoperate with the
    browser, the JVM, or the CLR, you might be stuck with it, since those all
    use UTF-16 for their strings and you don't want to have to convert every
    time you pass a string to the underlying system.

One option is to take the maximal approach and do the "rightest" thing. Support
all the Unicode code points. Internally, select an encoding for each string
based on its contents -- use ASCII if every code point fits in a byte, UTF-16 if
there are no surrogate pairs, etc. Provide APIs to let users iterate over both
code points and extended grapheme clusters.

This covers all your bases but is really complex. It's a lot to implement,
debug, and optimize. When serializing strings or interoperating with other
systems, you have to deal with all of the encodings. Users need to understand
the two indexing APIs and know which to use when. This is the approach that
newer, big languages tend to take -- like Raku and Swift.

A simpler compromise is to always encode using UTF-8 and only expose an API that
works with code points. For users that want to work with grapheme clusters, let
them use a third-party library for that. This is less Latin-centric than ASCII
but not much more complex. You lose fast direct indexing by code point, but you
can usually live without that or afford to make it *O(n)* instead of *O(1)*.

If I were designing a big workhorse language for people writing large
applications, I'd probably go with the maximal approach. For my little embedded
scripting language [Wren][], I went with UTF-8 and code points.

[wren]: http://wren.io

</div>


================================================
FILE: book/superclasses.md
================================================
> You can choose your friends but you sho' can't choose your family, an' they're
> still kin to you no matter whether you acknowledge &rsquo;em or not, and it
> makes you look right silly when you don't.
>
> <cite>Harper Lee, <em>To Kill a Mockingbird</em></cite>

This is the very last chapter where we add new functionality to our VM. We've
packed almost the entire Lox language in there already. All that remains is
inheriting methods and calling superclass methods. We have [another
chapter][optimization] after this one, but it introduces no new behavior. It
<span name="faster">only</span> makes existing stuff faster. Make it to the end
of this one, and you'll have a complete Lox implementation.

<aside name="faster">

That "only" should not imply that making stuff faster isn't important! After
all, the whole purpose of our entire second virtual machine is better
performance over jlox. You could argue that *all* of the past fifteen chapters
are "optimization".

</aside>

[optimization]: optimization.html

Some of the material in this chapter will remind you of jlox. The way we resolve
super calls is pretty much the same, though viewed through clox's more complex
mechanism for storing state on the stack. But we have an entirely different,
much faster, way of handling inherited method calls this time around.

## Inheriting Methods

We'll kick things off with method inheritance since it's the simpler piece. To
refresh your memory, Lox inheritance syntax looks like this:

```lox
class Doughnut {
  cook() {
    print "Dunk in the fryer.";
  }
}

class Cruller < Doughnut {
  finish() {
    print "Glaze with icing.";
  }
}
```

Here, the Cruller class inherits from Doughnut and thus, instances of Cruller
inherit the `cook()` method. I don't know why I'm belaboring this. You know how
inheritance works. Let's start compiling the new syntax.

^code compile-superclass (2 before, 1 after)

After we compile the class name, if the next token is a `<`, then we found a
superclass clause. We consume the superclass's identifier token, then call
`variable()`. That function takes the previously consumed token, treats it as a
variable reference, and emits code to load the variable's value. In other words,
it looks up the superclass by name and pushes it onto the stack.

After that, we call `namedVariable()` to load the subclass doing the inheriting
onto the stack, followed by an `OP_INHERIT` instruction. That instruction
wires up the superclass to the new subclass. In the last chapter, we defined an
`OP_METHOD` instruction to mutate an existing class object by adding a method to
its method table. This is similar -- the `OP_INHERIT` instruction takes an
existing class and applies the effect of inheritance to it.

In the previous example, when the compiler works through this bit of syntax:

```lox
class Cruller < Doughnut {
```

The result is this bytecode:

<img src="image/superclasses/inherit-stack.png" alt="The series of bytecode instructions for a Cruller class inheriting from Doughnut." />

Before we implement the new `OP_INHERIT` instruction, we have an edge case to
detect.

^code inherit-self (1 before, 1 after)

<span name="cycle">A</span> class cannot be its own superclass. Unless you have
access to a deranged nuclear physicist and a very heavily modified DeLorean, you
cannot inherit from yourself.

<aside name="cycle">

Interestingly, with the way we implement method inheritance, I don't think
allowing cycles would actually cause any problems in clox. It wouldn't do
anything *useful*, but I don't think it would cause a crash or infinite loop.

</aside>

### Executing inheritance

Now onto the new instruction.

^code inherit-op (1 before, 1 after)

There are no operands to worry about. The two values we need -- superclass and
subclass -- are both found on the stack. That means disassembling is easy.

^code disassemble-inherit (1 before, 1 after)

The interpreter is where the action happens.

^code interpret-inherit (1 before, 1 after)

From the top of the stack down, we have the subclass then the superclass. We
grab both of those and then do the inherit-y bit. This is where clox takes a
different path than jlox. In our first interpreter, each subclass stored a
reference to its superclass. On method access, if we didn't find the method in
the subclass's method table, we recursed through the inheritance chain looking
at each ancestor's method table until we found it.

For example, calling `cook()` on an instance of Cruller sends jlox on this
journey:

<img src="image/superclasses/jlox-resolve.png" alt="Resolving a call to cook() in an instance of Cruller means walking the superclass chain." />

That's a lot of work to perform during method *invocation* time. It's slow, and
worse, the farther an inherited method is up the ancestor chain, the slower it
gets. Not a great performance story.

The new approach is much faster. When the subclass is declared, we copy all of
the inherited class's methods down into the subclass's own method table. Later,
when *calling* a method, any method inherited from a superclass will be found
right in the subclass's own method table. There is no extra runtime work needed
for inheritance at all. By the time the class is declared, the work is done.
This means inherited method calls are exactly as fast as normal method calls --
a <span name="two">single</span> hash table lookup.

<img src="image/superclasses/clox-resolve.png" alt="Resolving a call to cook() in an instance of Cruller which has the method in its own method table." />

<aside name="two">

Well, two hash table lookups, I guess. Because first we have to make sure a
field on the instance doesn't shadow the method.

</aside>

I've sometimes heard this technique called "copy-down inheritance". It's simple
and fast, but, like most optimizations, you get to use it only under certain
constraints. It works in Lox because Lox classes are *closed*. Once a class
declaration is finished executing, the set of methods for that class can never
change.

In languages like Ruby, Python, and JavaScript, it's possible to <span
name="monkey">crack</span> open an existing class and jam some new methods into
it or even remove them. That would break our optimization because if those
modifications happened to a superclass *after* the subclass declaration
executed, the subclass would not pick up those changes. That breaks a user's
expectation that inheritance always reflects the current state of the
superclass.

<aside name="monkey">

As you can imagine, changing the set of methods a class defines imperatively at
runtime can make it hard to reason about a program. It is a very powerful tool,
but also a dangerous tool.

Those who find this tool maybe a little *too* dangerous gave it the unbecoming
name "monkey patching", or the even less decorous "duck punching".

<img src="image/superclasses/monkey.png" alt="A monkey with an eyepatch, naturally." />

</aside>

Fortunately for us (but not for users who like the feature, I guess), Lox
doesn't let you patch monkeys or punch ducks, so we can safely apply this
optimization.

What about method overrides? Won't copying the superclass's methods into the
subclass's method table clash with the subclass's own methods? Fortunately, no.
We emit the `OP_INHERIT` after the `OP_CLASS` instruction that creates the
subclass but before any method declarations and `OP_METHOD` instructions have
been compiled. At the point that we copy the superclass's methods down, the
subclass's method table is empty. Any methods the subclass overrides will
overwrite those inherited entries in the table.

### Invalid superclasses

Our implementation is simple and fast, which is just the way I like my VM code.
But it's not robust. Nothing prevents a user from inheriting from an object that
isn't a class at all:

```lox
var NotClass = "So not a class";
class OhNo < NotClass {}
```

Obviously, no self-respecting programmer would write that, but we have to guard
against potential Lox users who have no self respect. A simple runtime check
fixes that.

^code inherit-non-class (1 before, 1 after)

If the value we loaded from the identifier in the superclass clause isn't an
ObjClass, we report a runtime error to let the user know what we think of them
and their code.

## Storing Superclasses

Did you notice that when we added method inheritance, we didn't actually add any
reference from a subclass to its superclass? After we copy the inherited methods
over, we forget the superclass entirely. We don't need to keep a handle on the
superclass, so we don't.

That won't be sufficient to support super calls. Since a subclass <span
name="may">may</span> override the superclass method, we need to be able to get
our hands on superclass method tables. Before we get to that mechanism, I want 
to refresh your memory on how super calls are statically resolved.

<aside name="may">

"May" might not be a strong enough word. Presumably the method *has* been
overridden. Otherwise, why are you bothering to use `super` instead of just
calling it directly?

</aside>

Back in the halcyon days of jlox, I showed you [this tricky example][example] to
explain the way super calls are dispatched:

[example]: inheritance.html#semantics

```lox
class A {
  method() {
    print "A method";
  }
}

class B < A {
  method() {
    print "B method";
  }

  test() {
    super.method();
  }
}

class C < B {}

C().test();
```

Inside the body of the `test()` method, `this` is an instance of C. If super
calls were resolved relative to the superclass of the *receiver*, then we would
look in C's superclass, B. But super calls are resolved relative to the
superclass of the *surrounding class where the super call occurs*. In this case,
we are in B's `test()` method, so the superclass is A, and the program should
print "A method".

This means that super calls are not resolved dynamically based on the runtime
instance. The superclass used to look up the method is a static -- practically
lexical -- property of where the call occurs. When we added inheritance to jlox,
we took advantage of that static aspect by storing the superclass in the same
Environment structure we used for all lexical scopes. Almost as if the
interpreter saw the above program like this:

```lox
class A {
  method() {
    print "A method";
  }
}

var Bs_super = A;
class B < A {
  method() {
    print "B method";
  }

  test() {
    runtimeSuperCall(Bs_super, "method");
  }
}

var Cs_super = B;
class C < B {}

C().test();
```

Each subclass has a hidden variable storing a reference to its superclass.
Whenever we need to perform a super call, we access the superclass from that
variable and tell the runtime to start looking for methods there.

We'll take the same path with clox. The difference is that instead of jlox's
heap-allocated Environment class, we have the bytecode VM's value stack and
upvalue system. The machinery is a little different, but the overall effect is
the same.

### A superclass local variable

Our compiler already emits code to load the superclass onto the stack. Instead
of leaving that slot as a temporary, we create a new scope and make it a local
variable.

^code superclass-variable (2 before, 2 after)

Creating a new lexical scope ensures that if we declare two classes in the same
scope, each has a different local slot to store its superclass. Since we always
name this variable "super", if we didn't make a scope for each subclass, the
variables would collide.

We name the variable "super" for the same reason we use "this" as the name of
the hidden local variable that `this` expressions resolve to: "super" is a
reserved word, which guarantees the compiler's hidden variable won't collide
with a user-defined one.

The difference is that when compiling `this` expressions, we conveniently have a
token sitting around whose lexeme is "this". We aren't so lucky here. Instead,
we add a little helper function to create a synthetic token for the given <span
name="constant">constant</span> string.

^code synthetic-token

<aside name="constant" class="bottom">

I say "constant string" because tokens don't do any memory management of their
lexeme. If we tried to use a heap-allocated string for this, we'd end up leaking
memory because it never gets freed. But the memory for C string literals lives
in the executable's constant data section and never needs to be freed, so we're
fine.

</aside>

Since we opened a local scope for the superclass variable, we need to close it.

^code end-superclass-scope (1 before, 2 after)

We pop the scope and discard the "super" variable after compiling the class body
and its methods. That way, the variable is accessible in all of the methods of
the subclass. It's a somewhat pointless optimization, but we create the scope
only if there *is* a superclass clause. Thus we need to close the scope only if
there is one.

To track that, we could declare a little local variable in `classDeclaration()`.
But soon, other functions in the compiler will need to know whether the
surrounding class is a subclass or not. So we may as well give our future selves
a hand and store this fact as a field in the ClassCompiler now.

^code has-superclass (2 before, 1 after)

When we first initialize a ClassCompiler, we assume it is not a subclass.

^code init-has-superclass (1 before, 1 after)

Then, if we see a superclass clause, we know we are compiling a subclass.

^code set-has-superclass (1 before, 1 after)

This machinery gives us a mechanism at runtime to access the superclass object
of the surrounding subclass from within any of the subclass's methods -- simply
emit code to load the variable named "super". That variable is a local outside
of the method body, but our existing upvalue support enables the VM to capture
that local inside the body of the method or even in functions nested inside that
method.

## Super Calls

With that runtime support in place, we are ready to implement super calls. As
usual, we go front to back, starting with the new syntax. A super call <span
name="last">begins</span>, naturally enough, with the `super` keyword.

<aside name="last">

This is it, friend. The very last entry you'll add to the parsing table.

</aside>

^code table-super (1 before, 1 after)

When the expression parser lands on a `super` token, control jumps to a new
parsing function which starts off like so:

^code super

This is pretty different from how we compiled `this` expressions. Unlike `this`,
a `super` <span name="token">token</span> is not a standalone expression.
Instead, the dot and method name following it are inseparable parts of the
syntax. However, the parenthesized argument list is separate. As with normal
method access, Lox supports getting a reference to a superclass method as a
closure without invoking it:

<aside name="token">

Hypothetical question: If a bare `super` token *was* an expression, what kind of
object would it evaluate to?

</aside>

```lox
class A {
  method() {
    print "A";
  }
}

class B < A {
  method() {
    var closure = super.method;
    closure(); // Prints "A".
  }
}
```

In other words, Lox doesn't really have super *call* expressions, it has super
*access* expressions, which you can choose to immediately invoke if you want. So
when the compiler hits a `super` token, we consume the subsequent `.` token and
then look for a method name. Methods are looked up dynamically, so we use
`identifierConstant()` to take the lexeme of the method name token and store it
in the constant table just like we do for property access expressions.

Here is what the compiler does after consuming those tokens:

^code super-get (1 before, 1 after)

In order to access a *superclass method* on *the current instance*, the runtime
needs both the receiver *and* the superclass of the surrounding method's class.
The first `namedVariable()` call generates code to look up the current receiver
stored in the hidden variable "this" and push it onto the stack. The second
`namedVariable()` call emits code to look up the superclass from its "super"
variable and push that on top.

Finally, we emit a new `OP_GET_SUPER` instruction with an operand for the
constant table index of the method name. That's a lot to hold in your head. To
make it tangible, consider this example program:

```lox
class Doughnut {
  cook() {
    print "Dunk in the fryer.";
    this.finish("sprinkles");
  }

  finish(ingredient) {
    print "Finish with " + ingredient;
  }
}

class Cruller < Doughnut {
  finish(ingredient) {
    // No sprinkles, always icing.
    super.finish("icing");
  }
}
```

The bytecode emitted for the `super.finish("icing")` expression looks and works
like this:

<img src="image/superclasses/super-instructions.png" alt="The series of bytecode instructions for calling super.finish()." />

The first three instructions give the runtime access to the three pieces of
information it needs to perform the super access:

1.  The first instruction loads **the instance** onto the stack.

2.  The second instruction loads **the superclass where the method is
    resolved**.

3.  Then the new `OP_GET_SUPER` instuction encodes **the name of the method to
    access** as an operand.

The remaining instructions are the normal bytecode for evaluating an argument
list and calling a function.

We're almost ready to implement the new `OP_GET_SUPER` instruction in the
interpreter. But before we do, the compiler has some errors it is responsible
for reporting.

^code super-errors (1 before, 1 after)

A super call is meaningful only inside the body of a method (or in a function
nested inside a method), and only inside the method of a class that has a
superclass. We detect both of these cases using the value of `currentClass`. If
that's `NULL` or points to a class with no superclass, we report those errors.

### Executing super accesses

Assuming the user didn't put a `super` expression where it's not allowed, their
code passes from the compiler over to the runtime. We've got ourselves a new
instruction.

^code get-super-op (1 before, 1 after)

We disassemble it like other opcodes that take a constant table index operand.

^code disassemble-get-super (1 before, 1 after)

You might anticipate something harder, but interpreting the new instruction is
similar to executing a normal property access.

^code interpret-get-super (1 before, 1 after)

As with properties, we read the method name from the
constant table. Then we pass that to `bindMethod()` which looks up the method in
the given class's method table and creates an ObjBoundMethod to bundle the
resulting closure to the current instance.

The key <span name="field">difference</span> is *which* class we pass to
`bindMethod()`. With a normal property access, we use the ObjInstances's own
class, which gives us the dynamic dispatch we want. For a super call, we don't
use the instance's class. Instead, we use the statically resolved superclass of
the containing class, which the compiler has conveniently ensured is sitting on
top of the stack waiting for us.

We pop that superclass and pass it to `bindMethod()`, which correctly skips over
any overriding methods in any of the subclasses between that superclass and the
instance's own class. It also correctly includes any methods inherited by the
superclass from any of *its* superclasses.

The rest of the behavior is the same. Popping the superclass leaves the instance
at the top of the stack. When `bindMethod()` succeeds, it pops the instance and
pushes the new bound method. Otherwise, it reports a runtime error and returns
`false`. In that case, we abort the interpreter.

<aside name="field">

Another difference compared to `OP_GET_PROPERTY` is that we don't try to look
for a shadowing field first. Fields are not inherited, so `super` expressions
always resolve to methods.

If Lox were a prototype-based language that used *delegation* instead of
*inheritance*, then instead of one *class* inheriting from another *class*,
instances would inherit from ("delegate to") other instances. In that case,
fields *could* be inherited, and we would need to check for them here.

</aside>

### Faster super calls

We have superclass method accesses working now. And since the returned object is
an ObjBoundMethod that you can then invoke, we've got super *calls* working too.
Just like last chapter, we've reached a point where our VM has the complete,
correct semantics.

But, also like last chapter, it's pretty slow. Again, we're heap allocating an
ObjBoundMethod for each super call even though most of the time the very next
instruction is an `OP_CALL` that immediately unpacks that bound method, invokes
it, and then discards it. In fact, this is even more likely to be true for
super calls than for regular method calls. At least with method calls there is
a chance that the user is actually invoking a function stored in a field. With
super calls, you're *always* looking up a method. The only question is whether
you invoke it immediately or not.

The compiler can certainly answer that question for itself if it sees a left
parenthesis after the superclass method name, so we'll go ahead and perform the
same optimization we did for method calls. Take out the two lines of code that
load the superclass and emit `OP_GET_SUPER`, and replace them with this:

^code super-invoke (1 before, 1 after)

Now before we emit anything, we look for a parenthesized argument list. If we
find one, we compile that. Then we load the superclass. After that, we emit a
new `OP_SUPER_INVOKE` instruction. This <span
name="superinstruction">superinstruction</span> combines the behavior of
`OP_GET_SUPER` and `OP_CALL`, so it takes two operands: the constant table index
of the method name to look up and the number of arguments to pass to it.

<aside name="superinstruction">

This is a particularly *super* superinstruction, if you get what I'm saying.
I... I'm sorry for this terrible joke.

</aside>

Otherwise, if we don't find a `(`, we continue to compile the expression as a
super access like we did before and emit an `OP_GET_SUPER`.

Drifting down the compilation pipeline, our first stop is a new instruction.

^code super-invoke-op (1 before, 1 after)

And just past that, its disassembler support.

^code disassemble-super-invoke (1 before, 1 after)

A super invocation instruction has the same set of operands as `OP_INVOKE`, so
we reuse the same helper to disassemble it. Finally, the pipeline dumps us into
the interpreter.

^code interpret-super-invoke (2 before, 1 after)

This handful of code is basically our implementation of `OP_INVOKE` mixed
together with a dash of `OP_GET_SUPER`. There are some differences in how the
stack is organized, though. With an unoptimized super call, the superclass is
popped and replaced by the ObjBoundMethod for the resolved function *before* the
arguments to the call are executed. This ensures that by the time the `OP_CALL`
is executed, the bound method is *under* the argument list, where the runtime
expects it to be for a closure call.

With our optimized instructions, things are shuffled a bit:

<img src="image/superclasses/super-invoke.png" class="wide" alt="The series of bytecode instructions for calling super.finish() using OP_SUPER_INVOKE." />

Now resolving the superclass method is part of the *invocation*, so the
arguments need to already be on the stack at the point that we look up the
method. This means the superclass object is on top of the arguments.

Aside from that, the behavior is roughly the same as an `OP_GET_SUPER` followed
by an `OP_CALL`. First, we pull out the method name and argument count operands.
Then we pop the superclass off the top of the stack so that we can look up the
method in its method table. This conveniently leaves the stack set up just right
for a method call.

We pass the superclass, method name, and argument count to our existing
`invokeFromClass()` function. That function looks up the given method on the
given class and attempts to create a call to it with the given arity. If a
method could not be found, it returns `false`, and we bail out of the
interpreter. Otherwise, `invokeFromClass()` pushes a new CallFrame onto the call
stack for the method's closure. That invalidates the interpreter's cached
CallFrame pointer, so we refresh `frame`.

## A Complete Virtual Machine

Take a look back at what we've created. By my count, we wrote around 2,500 lines
of fairly clean, straightforward C. That little program contains a complete
implementation of the -- quite high-level! -- Lox language, with a whole
precedence table full of expression types and a suite of control flow
statements. We implemented variables, functions, closures, classes, fields,
methods, and inheritance.

Even more impressive, our implementation is portable to any platform with a C
compiler, and is fast enough for real-world production use. We have a
single-pass bytecode compiler, a tight virtual machine interpreter for our
internal instruction set, compact object representations, a stack for storing
variables without heap allocation, and a precise garbage collector.

If you go out and start poking around in the implementations of Lua, Python, or
Ruby, you will be surprised by how much of it now looks familiar to you. You
have seriously leveled up your knowledge of how programming languages work,
which in turn gives you a deeper understanding of programming itself. It's like
you used to be a race car driver, and now you can pop the hood and repair the
engine too.

You can stop here if you like. The two implementations of Lox you have are
complete and full featured. You built the car and can drive it wherever you want
now. But if you are looking to have more fun tuning and tweaking for even
greater performance out on the track, there is one more chapter. We don't add
any new capabilities, but we roll in a couple of classic optimizations to
squeeze even more perf out. If that sounds fun, [keep reading][opt]...

[opt]: optimization.html

<div class="challenges">

## Challenges

1.  A tenet of object-oriented programming is that a class should ensure new
    objects are in a valid state. In Lox, that means defining an initializer
    that populates the instance's fields. Inheritance complicates invariants
    because the instance must be in a valid state according to all of the
    classes in the object's inheritance chain.

    The easy part is remembering to call `super.init()` in each subclass's
    `init()` method. The harder part is fields. There is nothing preventing two
    classes in the inheritance chain from accidentally claiming the same field
    name. When this happens, they will step on each other's fields and possibly
    leave you with an instance in a broken state.

    If Lox was your language, how would you address this, if at all? If you
    would change the language, implement your change.

2.  Our copy-down inheritance optimization is valid only because Lox does not
    permit you to modify a class's methods after its declaration. This means we
    don't have to worry about the copied methods in the subclass getting out of
    sync with later changes to the superclass.

    Other languages, like Ruby, *do* allow classes to be modified after the
    fact. How do implementations of languages like that support class
    modification while keeping method resolution efficient?

3.  In the [jlox chapter on inheritance][inheritance], we had a challenge to
    implement the BETA language's approach to method overriding. Solve the
    challenge again, but this time in clox. Here's the description of the
    previous challenge:

    In Lox, as in most other object-oriented languages, when looking up a
    method, we start at the bottom of the class hierarchy and work our way up --
    a subclass's method is preferred over a superclass's. In order to get to the
    superclass method from within an overriding method, you use `super`.

    The language [BETA][] takes the [opposite approach][inner]. When you call a
    method, it starts at the *top* of the class hierarchy and works *down*. A
    superclass method wins over a subclass method. In order to get to the
    subclass method, the superclass method can call `inner`, which is sort of
    like the inverse of `super`. It chains to the next method down the
    hierarchy.

    The superclass method controls when and where the subclass is allowed to
    refine its behavior. If the superclass method doesn't call `inner` at all,
    then the subclass has no way of overriding or modifying the superclass's
    behavior.

    Take out Lox's current overriding and `super` behavior, and replace it with
    BETA's semantics. In short:

    *   When calling a method on a class, the method *highest* on the
        class's inheritance chain takes precedence.

    *   Inside the body of a method, a call to `inner` looks for a method with
        the same name in the nearest subclass along the inheritance chain
        between the class containing the `inner` and the class of `this`. If
        there is no matching method, the `inner` call does nothing.

    For example:

    ```lox
    class Doughnut {
      cook() {
        print "Fry until golden brown.";
        inner();
        print "Place in a nice box.";
      }
    }

    class BostonCream < Doughnut {
      cook() {
        print "Pipe full of custard and coat with chocolate.";
      }
    }

    BostonCream().cook();
    ```

    This should print:

    ```text
    Fry until golden brown.
    Pipe full of custard and coat with chocolate.
    Place in a nice box.
    ```

    Since clox is about not just implementing Lox, but doing so with good
    performance, this time around try to solve the challenge with an eye towards
    efficiency.

[inheritance]: inheritance.html
[inner]: http://journal.stuffwithstuff.com/2012/12/19/the-impoliteness-of-overriding-methods/
[beta]: https://beta.cs.au.dk/

</div>


================================================
FILE: book/the-lox-language.md
================================================
> What nicer thing can you do for somebody than make them breakfast?
>
> <cite>Anthony Bourdain</cite>

We'll spend the rest of this book illuminating every dark and sundry corner of
the Lox language, but it seems cruel to have you immediately start grinding out
code for the interpreter without at least a glimpse of what we're going to end
up with.

At the same time, I don't want to drag you through reams of language lawyering
and specification-ese before you get to touch your text <span
name="home">editor</span>. So this will be a gentle, friendly introduction to
Lox. It will leave out a lot of details and edge cases. We've got plenty of time
for those later.

<aside name="home">

A tutorial isn't very fun if you can't try the code out yourself. Alas, you
don't have a Lox interpreter yet, since you haven't built one!

Fear not. You can use [mine][repo].

[repo]: https://github.com/munificent/craftinginterpreters

</aside>

## Hello, Lox

Here's your very first taste of <span name="salmon">Lox</span>:

<aside name="salmon">

Your first taste of Lox, the language, that is. I don't know if you've ever had
the cured, cold-smoked salmon before. If not, give it a try too.

</aside>

```lox
// Your first Lox program!
print "Hello, world!";
```

As that `//` line comment and the trailing semicolon imply, Lox's syntax is a
member of the C family. (There are no parentheses around the string because
`print` is a built-in statement, and not a library function.)

Now, I won't claim that <span name="c">C</span> has a *great* syntax. If we
wanted something elegant, we'd probably mimic Pascal or Smalltalk. If we wanted
to go full Scandinavian-furniture-minimalism, we'd do a Scheme. Those all have
their virtues.

<aside name="c">

I'm surely biased, but I think Lox's syntax is pretty clean. C's most egregious
grammar problems are around types. Dennis Ritchie had this idea called
"[declaration reflects use][use]", where variable declarations mirror the
operations you would have to perform on the variable to get to a value of the
base type. Clever idea, but I don't think it worked out great in practice.

[use]: http://softwareengineering.stackexchange.com/questions/117024/why-was-the-c-syntax-for-arrays-pointers-and-functions-designed-this-way

Lox doesn't have static types, so we avoid that.

</aside>

What C-like syntax has instead is something you'll often find more valuable
in a language: *familiarity*. I know you are already comfortable with that style
because the two languages we'll be using to *implement* Lox -- Java and C --
also inherit it. Using a similar syntax for Lox gives you one less thing to
learn.

## A High-Level Language

While this book ended up bigger than I was hoping, it's still not big enough to
fit a huge language like Java in it. In order to fit two complete
implementations of Lox in these pages, Lox itself has to be pretty compact.

When I think of languages that are small but useful, what comes to mind are
high-level "scripting" languages like <span name="js">JavaScript</span>, Scheme,
and Lua. Of those three, Lox looks most like JavaScript, mainly because most
C-syntax languages do. As we'll learn later, Lox's approach to scoping hews
closely to Scheme. The C flavor of Lox we'll build in [Part III][] is heavily
indebted to Lua's clean, efficient implementation.

[part iii]: a-bytecode-virtual-machine.html

<aside name="js">

Now that JavaScript has taken over the world and is used to build ginormous
applications, it's hard to think of it as a "little scripting language". But
Brendan Eich hacked the first JS interpreter into Netscape Navigator in *ten
days* to make buttons animate on web pages. JavaScript has grown up since then,
but it was once a cute little language.

Because Eich slapped JS together with roughly the same raw materials and time as
an episode of MacGyver, it has some weird semantic corners where the duct tape
and paper clips show through. Things like variable hoisting, dynamically bound
`this`, holes in arrays, and implicit conversions.

I had the luxury of taking my time on Lox, so it should be a little cleaner.

</aside>

Lox shares two other aspects with those three languages:

### Dynamic typing

Lox is dynamically typed. Variables can store values of any type, and a single
variable can even store values of different types at different times. If you try
to perform an operation on values of the wrong type -- say, dividing a number by
a string -- then the error is detected and reported at runtime.

There are plenty of reasons to like <span name="static">static</span> types, but
they don't outweigh the pragmatic reasons to pick dynamic types for Lox. A
static type system is a ton of work to learn and implement. Skipping it gives
you a simpler language and a shorter book. We'll get our interpreter up and
executing bits of code sooner if we defer our type checking to runtime.

<aside name="static">

After all, the two languages we'll be using to *implement* Lox are both
statically typed.

</aside>

### Automatic memory management

High-level languages exist to eliminate error-prone, low-level drudgery, and what
could be more tedious than manually managing the allocation and freeing of
storage? No one rises and greets the morning sun with, "I can't wait to figure
out the correct place to call `free()` for every byte of memory I allocate
today!"

There are two main <span name="gc">techniques</span> for managing memory:
**reference counting** and **tracing garbage collection** (usually just called
**garbage collection** or **GC**). Ref counters are much simpler to implement --
I think that's why Perl, PHP, and Python all started out using them. But, over
time, the limitations of ref counting become too troublesome. All of those
languages eventually ended up adding a full tracing GC, or at least enough of
one to clean up object cycles.

<aside name="gc">

In practice, ref counting and tracing are more ends of a continuum than
opposing sides. Most ref counting systems end up doing some tracing to handle
cycles, and the write barriers of a generational collector look a bit like
retain calls if you squint.

For lots more on this, see "[A Unified Theory of Garbage Collection][gc]" (PDF).

[gc]: https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon04Unified.pdf

</aside>

Tracing garbage collection has a fearsome reputation. It *is* a little harrowing
working at the level of raw memory. Debugging a GC can sometimes leave you
seeing hex dumps in your dreams. But, remember, this book is about dispelling
magic and slaying those monsters, so we *are* going to write our own garbage
collector. I think you'll find the algorithm is quite simple and a lot of fun to
implement.

## Data Types

In Lox's little universe, the atoms that make up all matter are the built-in
data types. There are only a few:

*   **<span name="bool">Booleans</span>.** You can't code without logic and you
    can't logic without Boolean values. "True" and "false", the yin and yang of
    software. Unlike some ancient languages that repurpose an existing type to
    represent truth and falsehood, Lox has a dedicated Boolean type. We may
    be roughing it on this expedition, but we aren't *savages*.

    <aside name="bool">

    Boolean variables are the only data type in Lox named after a person, George
    Boole, which is why "Boolean" is capitalized. He died in 1864, nearly a
    century before digital computers turned his algebra into electricity. I
    wonder what he'd think to see his name all over billions of lines of Java
    code.

    </aside>

    There are two Boolean values, obviously, and a literal for each one.

    ```lox
    true;  // Not false.
    false; // Not *not* false.
    ```

*   **Numbers.** Lox has only one kind of number: double-precision floating
    point. Since floating-point numbers can also represent a wide range of
    integers, that covers a lot of territory, while keeping things simple.

    Full-featured languages have lots of syntax for numbers -- hexadecimal,
    scientific notation, octal, all sorts of fun stuff. We'll settle for basic
    integer and decimal literals.

    ```lox
    1234;  // An integer.
    12.34; // A decimal number.
    ```

*   **Strings.** We've already seen one string literal in the first example.
    Like most languages, they are enclosed in double quotes.

    ```lox
    "I am a string";
    "";    // The empty string.
    "123"; // This is a string, not a number.
    ```

    As we'll see when we get to implementing them, there is quite a lot of
    complexity hiding in that innocuous sequence of <span
    name="char">characters</span>.

    <aside name="char">

    Even that word "character" is a trickster. Is it ASCII? Unicode? A
    code point or a "grapheme cluster"? How are characters encoded? Is each
    character a fixed size, or can they vary?

    </aside>

*   **Nil.** There's one last built-in value who's never invited to the party
    but always seems to show up. It represents "no value". It's called "null" in
    many other languages. In Lox we spell it `nil`. (When we get to implementing
    it, that will help distinguish when we're talking about Lox's `nil` versus
    Java or C's `null`.)

    There are good arguments for not having a null value in a language since
    null pointer errors are the scourge of our industry. If we were doing a
    statically typed language, it would be worth trying to ban it. In a
    dynamically typed one, though, eliminating it is often more annoying
    than having it.

## Expressions

If built-in data types and their literals are atoms, then **expressions** must
be the molecules. Most of these will be familiar.

### Arithmetic

Lox features the basic arithmetic operators you know and love from C and other
languages:

```lox
add + me;
subtract - me;
multiply * me;
divide / me;
```

The subexpressions on either side of the operator are **operands**. Because
there are *two* of them, these are called **binary** operators. (It has nothing
to do with the ones-and-zeroes use of "binary".) Because the operator is <span
name="fixity">fixed</span> *in* the middle of the operands, these are also
called **infix** operators (as opposed to **prefix** operators where the
operator comes before the operands, and **postfix** where it comes after).

<aside name="fixity">

There are some operators that have more than two operands and the operators are
interleaved between them. The only one in wide usage is the "conditional" or
"ternary" operator of C and friends:

```c
condition ? thenArm : elseArm;
```

Some call these **mixfix** operators. A few languages let you define your own
operators and control how they are positioned -- their "fixity".

</aside>

One arithmetic operator is actually *both* an infix and a prefix one. The `-`
operator can also be used to negate a number.

```lox
-negateMe;
```

All of these operators work on numbers, and it's an error to pass any other
types to them. The exception is the `+` operator -- you can also pass it two
strings to concatenate them.

### Comparison and equality

Moving along, we have a few more operators that always return a Boolean result.
We can compare numbers (and only numbers), using Ye Olde Comparison Operators.

```lox
less < than;
lessThan <= orEqual;
greater > than;
greaterThan >= orEqual;
```

We can test two values of any kind for equality or inequality.

```lox
1 == 2;         // false.
"cat" != "dog"; // true.
```

Even different types.

```lox
314 == "pi"; // false.
```

Values of different types are *never* equivalent.

```lox
123 == "123"; // false.
```

I'm generally against implicit conversions.

### Logical operators

The not operator, a prefix `!`, returns `false` if its operand is true, and vice
versa.

```lox
!true;  // false.
!false; // true.
```

The other two logical operators really are control flow constructs in the guise
of expressions. An <span name="and">`and`</span> expression determines if two
values are *both* true. It returns the left operand if it's false, or the
right operand otherwise.

```lox
true and false; // false.
true and true;  // true.
```

And an `or` expression determines if *either* of two values (or both) are true.
It returns the left operand if it is true and the right operand otherwise.

```lox
false or false; // false.
true or false;  // true.
```

<aside name="and">

I used `and` and `or` for these instead of `&&` and `||` because Lox doesn't use
`&` and `|` for bitwise operators. It felt weird to introduce the
double-character forms without the single-character ones.

I also kind of like using words for these since they are really control flow
structures and not simple operators.

</aside>

The reason `and` and `or` are like control flow structures is that they
**short-circuit**. Not only does `and` return the left operand if it is false,
it doesn't even *evaluate* the right one in that case. Conversely
(contrapositively?), if the left operand of an `or` is true, the right is
skipped.

### Precedence and grouping

All of these operators have the same precedence and associativity that you'd
expect coming from C. (When we get to parsing, we'll get *way* more precise
about that.) In cases where the precedence isn't what you want, you can use `()`
to group stuff.

```lox
var average = (min + max) / 2;
```

Since they aren't very technically interesting, I've cut the remainder of the
typical operator menagerie out of our little language. No bitwise, shift,
modulo, or conditional operators. I'm not grading you, but you will get bonus
points in my heart if you augment your own implementation of Lox with them.

Those are the expression forms (except for a couple related to specific features
that we'll get to later), so let's move up a level.

## Statements

Now we're at statements. Where an expression's main job is to produce a *value*,
a statement's job is to produce an *effect*. Since, by definition, statements
don't evaluate to a value, to be useful they have to otherwise change the world
in some way -- usually modifying some state, reading input, or producing output.

You've seen a couple of kinds of statements already. The first one was:

```lox
print "Hello, world!";
```

A <span name="print">`print` statement</span> evaluates a single expression
and displays the result to the user. You've also seen some statements like:

<aside name="print">

Baking `print` into the language instead of just making it a core library
function is a hack. But it's a *useful* hack for us: it means our in-progress
interpreter can start producing output before we've implemented all of the
machinery required to define functions, look them up by name, and call them.

</aside>

```lox
"some expression";
```

An expression followed by a semicolon (`;`) promotes the expression to
statement-hood. This is called (imaginatively enough), an **expression
statement**.

If you want to pack a series of statements where a single one is expected, you
can wrap them up in a **block**.

```lox
{
  print "One statement.";
  print "Two statements.";
}
```

Blocks also affect scoping, which leads us to the next section...

## Variables

You declare variables using `var` statements. If you <span
name="omit">omit</span> the initializer, the variable's value defaults to `nil`.

<aside name="omit">

This is one of those cases where not having `nil` and forcing every variable to
be initialized to some value would be more annoying than dealing with `nil`
itself.

</aside>

```lox
var imAVariable = "here is my value";
var iAmNil;
```

Once declared, you can, naturally, access and assign a variable using its name.

<span name="breakfast"></span>

```lox
var breakfast = "bagels";
print breakfast; // "bagels".
breakfast = "beignets";
print breakfast; // "beignets".
```

<aside name="breakfast">

Can you tell that I tend to work on this book in the morning before I've had
anything to eat?

</aside>

I won't get into the rules for variable scope here, because we're going to spend
a surprising amount of time in later chapters mapping every square inch of the
rules. In most cases, it works like you would expect coming from C or Java.

## Control Flow

It's hard to write <span name="flow">useful</span> programs if you can't skip
some code or execute some more than once. That means control flow. In addition
to the logical operators we already covered, Lox lifts three statements straight
from C.

<aside name="flow">

We already have `and` and `or` for branching, and we *could* use recursion to
repeat code, so that's theoretically sufficient. It would be pretty awkward to
program that way in an imperative-styled language, though.

Scheme, on the other hand, has no built-in looping constructs. It *does* rely on
recursion for repetition. Smalltalk has no built-in branching constructs, and
relies on dynamic dispatch for selectively executing code.

</aside>

An `if` statement executes one of two statements based on some condition.

```lox
if (condition) {
  print "yes";
} else {
  print "no";
}
```

A `while` <span name="do">loop</span> executes the body repeatedly as long as
the condition expression evaluates to true.

```lox
var a = 1;
while (a < 10) {
  print a;
  a = a + 1;
}
```

<aside name="do">

I left `do while` loops out of Lox because they aren't that common and wouldn't
teach you anything that you won't already learn from `while`. Go ahead and add
it to your implementation if it makes you happy. It's your party.

</aside>

Finally, we have `for` loops.

```lox
for (var a = 1; a < 10; a = a + 1) {
  print a;
}
```

This loop does the same thing as the previous `while` loop. Most modern
languages also have some sort of <span name="foreach">`for-in`</span> or
`foreach` loop for explicitly iterating over various sequence types. In a real
language, that's nicer than the crude C-style `for` loop we got here. Lox keeps
it basic.

<aside name="foreach">

This is a concession I made because of how the implementation is split across
chapters. A `for-in` loop needs some sort of dynamic dispatch in the iterator
protocol to handle different kinds of sequences, but we don't get that until
after we're done with control flow. We could circle back and add `for-in` loops
later, but I didn't think doing so would teach you anything super interesting.

</aside>

## Functions

A function call expression looks the same as it does in C.

```lox
makeBreakfast(bacon, eggs, toast);
```

You can also call a function without passing anything to it.

```lox
makeBreakfast();
```

Unlike in, say, Ruby, the parentheses are mandatory in this case. If you leave them
off, the name doesn't *call* the function, it just refers to it.

A language isn't very fun if you can't define your own functions. In Lox, you do
that with <span name="fun">`fun`</span>.

<aside name="fun">

I've seen languages that use `fn`, `fun`, `func`, and `function`. I'm still
hoping to discover a `funct`, `functi`, or `functio` somewhere.

</aside>

```lox
fun printSum(a, b) {
  print a + b;
}
```

Now's a good time to clarify some <span name="define">terminology</span>. Some
people throw around "parameter" and "argument" like they are interchangeable
and, to many, they are. We're going to spend a lot of time splitting the finest
of downy hairs around semantics, so let's sharpen our words. From here on out:

*   An **argument** is an actual value you pass to a function when you call it.
    So a function *call* has an *argument* list. Sometimes you hear **actual
    parameter** used for these.

*   A **parameter** is a variable that holds the value of the argument inside
    the body of the function. Thus, a function *declaration* has a *parameter*
    list. Others call these **formal parameters** or simply **formals**.

<aside name="define">

Speaking of terminology, some statically typed languages like C make a
distinction between *declaring* a function and *defining* it. A declaration
binds the function's type to its name so that calls can be type-checked but does
not provide a body. A definition declares the function and also fills in the
body so that the function can be compiled.

Since Lox is dynamically typed, this distinction isn't meaningful. A function
declaration fully specifies the function including its body.

</aside>

The body of a function is always a block. Inside it, you can return a value
using a `return` statement.

```lox
fun returnSum(a, b) {
  return a + b;
}
```

If execution reaches the end of the block without hitting a `return`, it
<span name="sneaky">implicitly</span> returns `nil`.

<aside name="sneaky">

See, I told you `nil` would sneak in when we weren't looking.

</aside>

### Closures

Functions are *first class* in Lox, which just means they are real values that
you can get a reference to, store in variables, pass around, etc. This works:

```lox
fun addPair(a, b) {
  return a + b;
}

fun identity(a) {
  return a;
}

print identity(addPair)(1, 2); // Prints "3".
```

Since function declarations are statements, you can declare local functions
inside another function.

```lox
fun outerFunction() {
  fun localFunction() {
    print "I'm local!";
  }

  localFunction();
}
```

If you combine local functions, first-class functions, and block scope, you run
into this interesting situation:

```lox
fun returnFunction() {
  var outside = "outside";

  fun inner() {
    print outside;
  }

  return inner;
}

var fn = returnFunction();
fn();
```

Here, `inner()` accesses a local variable declared outside of its body in the
surrounding function. Is this kosher? Now that lots of languages have borrowed
this feature from Lisp, you probably know the answer is yes.

For that to work, `inner()` has to "hold on" to references to any surrounding
variables that it uses so that they stay around even after the outer function
has returned. We call functions that do this <span
name="closure">**closures**</span>. These days, the term is often used for *any*
first-class function, though it's sort of a misnomer if the function doesn't
happen to close over any variables.

<aside name="closure">

Peter J. Landin coined the term "closure". Yes, he invented damn near half the
terms in programming languages. Most of them came out of one incredible paper,
"[The Next 700 Programming Languages][svh]".

[svh]: https://homepages.inf.ed.ac.uk/wadler/papers/papers-we-love/landin-next-700.pdf

In order to implement these kind of functions, you need to create a data
structure that bundles together the function's code and the surrounding
variables it needs. He called this a "closure" because it *closes over* and
holds on to the variables it needs.

</aside>

As you can imagine, implementing these adds some complexity because we can no
longer assume variable scope works strictly like a stack where local variables
evaporate the moment the function returns. We're going to have a fun time
learning how to make these work correctly and efficiently.

## Classes

Since Lox has dynamic typing, lexical (roughly, "block") scope, and closures,
it's about halfway to being a functional language. But as you'll see, it's
*also* about halfway to being an object-oriented language. Both paradigms have a
lot going for them, so I thought it was worth covering some of each.

Since classes have come under fire for not living up to their hype, let me first
explain why I put them into Lox and this book. There are really two questions:

### Why might any language want to be object oriented?

Now that object-oriented languages like Java have sold out and only play arena
shows, it's not cool to like them anymore. Why would anyone make a *new*
language with objects? Isn't that like releasing music on 8-track?

It is true that the "all inheritance all the time" binge of the '90s produced
some monstrous class hierarchies, but **object-oriented programming** (**OOP**)
is still pretty rad. Billions of lines of successful code have been written in
OOP languages, shipping millions of apps to happy users. Likely a majority of
working programmers today are using an object-oriented language. They can't all
be *that* wrong.

In particular, for a dynamically typed language, objects are pretty handy. We
need *some* way of defining compound data types to bundle blobs of stuff
together.

If we can also hang methods off of those, then we avoid the need to prefix all
of our functions with the name of the data type they operate on to avoid
colliding with similar functions for different types. In, say, Racket, you end
up having to name your functions like `hash-copy` (to copy a hash table) and
`vector-copy` (to copy a vector) so that they don't step on each other. Methods
are scoped to the object, so that problem goes away.

### Why is Lox object oriented?

I could claim objects are groovy but still out of scope for the book. Most
programming language books, especially ones that try to implement a whole
language, leave objects out. To me, that means the topic isn't well covered.
With such a widespread paradigm, that omission makes me sad.

Given how many of us spend all day *using* OOP languages, it seems like the
world could use a little documentation on how to *make* one. As you'll see, it
turns out to be pretty interesting. Not as hard as you might fear, but not as
simple as you might presume, either.

### Classes or prototypes

When it comes to objects, there are actually two approaches to them, [classes][]
and [prototypes][]. Classes came first, and are more common thanks to C++, Java,
C#, and friends. Prototypes were a virtually forgotten offshoot until JavaScript
accidentally took over the world.

[classes]: https://en.wikipedia.org/wiki/Class-based_programming
[prototypes]: https://en.wikipedia.org/wiki/Prototype-based_programming

In class-based languages, there are two core concepts: instances and classes.
Instances store the state for each object and have a reference to the instance's
class. Classes contain the methods and inheritance chain. To call a method on an
instance, there is always a level of indirection. You <span
name="dispatch">look</span> up the instance's class and then you find the method
*there*:

<aside name="dispatch">

In a statically typed language like C++, method lookup typically happens at
compile time based on the *static* type of the instance, giving you **static
dispatch**. In contrast, **dynamic dispatch** looks up the class of the actual
instance object at runtime. This is how virtual methods in statically typed
languages and all methods in a dynamically typed language like Lox work.

</aside>

<img src="image/the-lox-language/class-lookup.png" alt="How fields and methods are looked up on classes and instances" />

Prototype-based languages <span name="blurry">merge</span> these two concepts.
There are only objects -- no classes -- and each individual object may contain
state and methods. Objects can directly inherit from each other (or "delegate
to" in prototypal lingo):

<aside name="blurry">

In practice the line between class-based and prototype-based languages blurs.
JavaScript's "constructor function" notion [pushes you pretty hard][js new]
towards defining class-like objects. Meanwhile, class-based Ruby is perfectly
happy to let you attach methods to individual instances.

[js new]: http://gameprogrammingpatterns.com/prototype.html#what-about-javascript

</aside>

<img src="image/the-lox-language/prototype-lookup.png" alt="How fields and methods are looked up in a prototypal system" />

This means that in some ways prototypal languages are more fundamental than
classes. They are really neat to implement because they're *so* simple. Also,
they can express lots of unusual patterns that classes steer you away from.

But I've looked at a *lot* of code written in prototypal languages -- including
[some of my own devising][finch]. Do you know what people generally do with all
of the power and flexibility of prototypes? ...They use them to reinvent
classes.

[finch]: http://finch.stuffwithstuff.com/

I don't know *why* that is, but people naturally seem to prefer a class-based
(Classic? Classy?) style. Prototypes *are* simpler in the language, but they
seem to accomplish that only by <span name="waterbed">pushing</span> the
complexity onto the user. So, for Lox, we'll save our users the trouble and bake
classes right in.

<aside name="waterbed">

Larry Wall, Perl's inventor/prophet calls this the "[waterbed theory][]". Some
complexity is essential and cannot be eliminated. If you push it down in one
place, it swells up in another.

[waterbed theory]: http://wiki.c2.com/?WaterbedTheory

Prototypal languages don't so much *eliminate* the complexity of classes as they
do make the *user* take that complexity by building their own class-like
metaprogramming libraries.

</aside>

### Classes in Lox

Enough rationale, let's see what we actually have. Classes encompass a
constellation of features in most languages. For Lox, I've selected what I think
are the brightest stars. You declare a class and its methods like so:

```lox
class Breakfast {
  cook() {
    print "Eggs a-fryin'!";
  }

  serve(who) {
    print "Enjoy your breakfast, " + who + ".";
  }
}
```

The body of a class contains its methods. They look like function declarations
but without the `fun` <span name="method">keyword</span>. When the class
declaration is executed, Lox creates a class object and stores that in a
variable named after the class. Just like functions, classes are first class in
Lox.

<aside name="method">

They are still just as fun, though.

</aside>

```lox
// Store it in variables.
var someVariable = Breakfast;

// Pass it to functions.
someFunction(Breakfast);
```

Next, we need a way to create instances. We could add some sort of `new`
keyword, but to keep things simple, in Lox the class itself is a factory
function for instances. Call a class like a function, and it produces a new
instance of itself.

```lox
var breakfast = Breakfast();
print breakfast; // "Breakfast instance".
```

### Instantiation and initialization

Classes that only have behavior aren't super useful. The idea behind
object-oriented programming is encapsulating behavior *and state* together. To
do that, you need fields. Lox, like other dynamically typed languages, lets you
freely add properties onto objects.

```lox
breakfast.meat = "sausage";
breakfast.bread = "sourdough";
```

Assigning to a field creates it if it doesn't already exist.

If you want to access a field or method on the current object from within a
method, you use good old `this`.

```lox
class Breakfast {
  serve(who) {
    print "Enjoy your " + this.meat + " and " +
        this.bread + ", " + who + ".";
  }

  // ...
}
```

Part of encapsulating data within an object is ensuring the object is in a valid
state when it's created. To do that, you can define an initializer. If your
class has a method named `init()`, it is called automatically when the object is
constructed. Any parameters passed to the class are forwarded to its
initializer.

```lox
class Breakfast {
  init(meat, bread) {
    this.meat = meat;
    this.bread = bread;
  }

  // ...
}

var baconAndToast = Breakfast("bacon", "toast");
baconAndToast.serve("Dear Reader");
// "Enjoy your bacon and toast, Dear Reader."
```

### Inheritance

Every object-oriented language lets you not only define methods, but reuse them
across multiple classes or objects. For that, Lox supports single inheritance.
When you declare a class, you can specify a class that it inherits from using a less-than
<span name="less">(`<`)</span> operator.

```lox
class Brunch < Breakfast {
  drink() {
    print "How about a Bloody Mary?";
  }
}
```

<aside name="less">

Why the `<` operator? I didn't feel like introducing a new keyword like
`extends`. Lox doesn't use `:` for anything else so I didn't want to reserve
that either. Instead, I took a page from Ruby and used `<`.

If you know any type theory, you'll notice it's not a *totally* arbitrary
choice. Every instance of a subclass is an instance of its superclass too, but
there may be instances of the superclass that are not instances of the subclass.
That means, in the universe of objects, the set of subclass objects is smaller
than the superclass's set, though type nerds usually use `<:` for that relation.

</aside>

Here, Brunch is the **derived class** or **subclass**, and Breakfast is the
**base class** or **superclass**.

Every method defined in the superclass is also available to its subclasses.

```lox
var benedict = Brunch("ham", "English muffin");
benedict.serve("Noble Reader");
```

Even the `init()` method gets <span name="init">inherited</span>. In practice,
the subclass usually wants to define its own `init()` method too. But the
original one also needs to be called so that the superclass can maintain its
state. We need some way to call a method on our own *instance* without hitting
our own *methods*.

<aside name="init">

Lox is different from C++, Java, and C#, which do not inherit constructors, but
similar to Smalltalk and Ruby, which do.

</aside>

As in Java, you use `super` for that.

```lox
class Brunch < Breakfast {
  init(meat, bread, drink) {
    super.init(meat, bread);
    this.drink = drink;
  }
}
```

That's about it for object orientation. I tried to keep the feature set minimal.
The structure of the book did force one compromise. Lox is not a *pure*
object-oriented language. In a true OOP language every object is an instance of
a class, even primitive values like numbers and Booleans.

Because we don't implement classes until well after we start working with the
built-in types, that would have been hard. So values of primitive types aren't
real objects in the sense of being instances of classes. They don't have methods
or properties. If I were trying to make Lox a real language for real users, I
would fix that.

## The Standard Library

We're almost done. That's the whole language, so all that's left is the "core"
or "standard" library -- the set of functionality that is implemented directly
in the interpreter and that all user-defined behavior is built on top of.

This is the saddest part of Lox. Its standard library goes beyond minimalism and
veers close to outright nihilism. For the sample code in the book, we only need
to demonstrate that code is running and doing what it's supposed to do. For
that, we already have the built-in `print` statement.

Later, when we start optimizing, we'll write some benchmarks and see how long it
takes to execute code. That means we need to track time, so we'll define one
built-in function, `clock()`, that returns the number of seconds since the
program started.

And... that's it. I know, right? It's embarrassing.

If you wanted to turn Lox into an actual useful language, the very first thing
you should do is flesh this out. String manipulation, trigonometric functions,
file I/O, networking, heck, even *reading input from the user* would help. But we
don't need any of that for this book, and adding it wouldn't teach you anything
interesting, so I've left it out.

Don't worry, we'll have plenty of exciting stuff in the language itself to keep
us busy.

<div class="challenges">

## Challenges

1. Write some sample Lox programs and run them (you can use the implementations
   of Lox in [my repository][repo]). Try to come up with edge case behavior I
   didn't specify here. Does it do what you expect? Why or why not?

2. This informal introduction leaves a *lot* unspecified. List several open
   questions you have about the language's syntax and semantics. What do you
   think the answers should be?

3. Lox is a pretty tiny language. What features do you think it is missing that
   would make it annoying to use for real programs? (Aside from the standard
   library, of course.)

</div>

<div class="design-note">

## Design Note: Expressions and Statements

Lox has both expressions and statements. Some languages omit the latter.
Instead, they treat declarations and control flow constructs as expressions too.
These "everything is an expression" languages tend to have functional pedigrees
and include most Lisps, SML, Haskell, Ruby, and CoffeeScript.

To do that, for each "statement-like" construct in the language, you need to
decide what value it evaluates to. Some of those are easy:

*   An `if` expression evaluates to the result of whichever branch is chosen.
    Likewise, a `switch` or other multi-way branch evaluates to whichever case
    is picked.

*   A variable declaration evaluates to the value of the variable.

*   A block evaluates to the result of the last expression in the sequence.

Some get a little stranger. What should a loop evaluate to? A `while` loop in
CoffeeScript evaluates to an array containing each element that the body
evaluated to. That can be handy, or a waste of memory if you don't need the
array.

You also have to decide how these statement-like expressions compose with other
expressions -- you have to fit them into the grammar's precedence table. For
example, Ruby allows:

```ruby
puts 1 + if true then 2 else 3 end + 4
```

Is this what you'd expect? Is it what your *users* expect? How does this affect
how you design the syntax for your "statements"? Note that Ruby has an explicit
`end` to tell when the `if` expression is complete. Without it, the `+ 4` would
likely be parsed as part of the `else` clause.

Turning every statement into an expression forces you to answer a few hairy
questions like that. In return, you eliminate some redundancy. C has both blocks
for sequencing statements, and the comma operator for sequencing expressions. It
has both the `if` statement and the `?:` conditional operator. If everything was
an expression in C, you could unify each of those.

Languages that do away with statements usually also feature **implicit returns**
-- a function automatically returns whatever value its body evaluates to without
need for some explicit `return` syntax. For small functions and methods, this is
really handy. In fact, many languages that do have statements have added syntax
like `=>` to be able to define functions whose body is the result of evaluating
a single expression.

But making *all* functions work that way can be a little strange. If you aren't
careful, your function will leak a return value even if you only intend it to
produce a side effect. In practice, though, users of these languages don't find
it to be a problem.

For Lox, I gave it statements for prosaic reasons. I picked a C-like syntax for
familiarity's sake, and trying to take the existing C statement syntax and
interpret it like expressions gets weird pretty fast.

</div>


================================================
FILE: book/types-of-values.md
================================================
> When you are a Bear of Very Little Brain, and you Think of Things, you find
> sometimes that a Thing which seemed very Thingish inside you is quite
> different when it gets out into the open and has other people looking at it.
>
> <cite>A. A. Milne, <em>Winnie-the-Pooh</em></cite>

The past few chapters were huge, packed full of complex techniques and pages of
code. In this chapter, there's only one new concept to learn and a scattering of
straightforward code. You've earned a respite.

Lox is <span name="unityped">dynamically</span> typed. A single variable can
hold a Boolean, number, or string at different points in time. At least, that's
the idea. Right now, in clox, all values are numbers. By the end of the chapter,
it will also support Booleans and `nil`. While those aren't super interesting,
they force us to figure out how our value representation can dynamically handle
different types.

<aside name="unityped">

There is a third category next to statically typed and dynamically typed:
**unityped**. In that paradigm, all variables have a single type, usually a
machine register integer. Unityped languages aren't common today, but some
Forths and BCPL, the language that inspired C, worked like this.

As of this moment, clox is unityped.

</aside>

## Tagged Unions

The nice thing about working in C is that we can build our data structures from
the raw bits up. The bad thing is that we *have* to do that. C doesn't give you
much for free at compile time and even less at runtime. As far as C is
concerned, the universe is an undifferentiated array of bytes. It's up to us to
decide how many of those bytes to use and what they mean.

In order to choose a value representation, we need to answer two key questions:

1.  **How do we represent the type of a value?** If you try to, say, multiply a
    number by `true`, we need to detect that error at runtime and report it. In
    order to do that, we need to be able to tell what a value's type is.

2.  **How do we store the value itself?** We need to not only be able to tell
    that three is a number, but that it's different from the number four. I
    know, seems obvious, right? But we're operating at a level where it's good
    to spell these things out.

Since we're not just designing this language but building it ourselves, when
answering these two questions we also have to keep in mind the implementer's
eternal quest: to do it *efficiently*.

Language hackers over the years have come up with a variety of clever ways to
pack the above information into as few bits as possible. For now, we'll start
with the simplest, classic solution: a **tagged union**. A value contains two
parts: a type "tag", and a payload for the actual value. To store the value's
type, we define an enum for each kind of value the VM supports.

^code value-type (2 before, 1 after)

<aside name="user-types">

The cases here cover each kind of value that has *built-in support in the VM*.
When we get to adding classes to the language, each class the user defines
doesn't need its own entry in this enum. As far as the VM is concerned, every
instance of a class is the same type: "instance".

In other words, this is the VM's notion of "type", not the user's.

</aside>

For now, we have only a couple of cases, but this will grow as we add strings,
functions, and classes to clox. In addition to the type, we also need to store
the data for the value -- the `double` for a number, `true` or `false` for a
Boolean. We could define a struct with fields for each possible type.

<img src="image/types-of-values/struct.png" alt="A struct with two fields laid next to each other in memory." />

But this is a waste of memory. A value can't simultaneously be both a number and
a Boolean. So at any point in time, only one of those fields will be used. C
lets you optimize this by defining a <span name="sum">union</span>. A union
looks like a struct except that all of its fields overlap in memory.

<aside name="sum">

If you're familiar with a language in the ML family, structs and unions in C
roughly mirror the difference between product and sum types, between tuples
and algebraic data types.

</aside>

<img src="image/types-of-values/union.png" alt="A union with two fields overlapping in memory." />

The size of a union is the size of its largest field. Since the fields all reuse
the same bits, you have to be very careful when working with them. If you store
data using one field and then access it using <span
name="reinterpret">another</span>, you will reinterpret what the underlying bits
mean.

<aside name="reinterpret">

Using a union to interpret bits as different types is the quintessence of C. It
opens up a number of clever optimizations and lets you slice and dice each byte
of memory in ways that memory-safe languages disallow. But it is also wildly
unsafe and will happily saw your fingers off if you don't watch out.

</aside>

As the name "tagged union" implies, our new value representation combines these
two parts into a single struct.

^code value (2 before, 2 after)

There's a field for the type tag, and then a second field containing the union
of all of the underlying values. On a 64-bit machine with a typical C compiler,
the layout looks like this:

<aside name="as">

A smart language hacker gave me the idea to use "as" for the name of the union
field because it reads nicely, almost like a cast, when you pull the various
values out.

</aside>

<img src="image/types-of-values/value.png" alt="The full value struct, with the type and as fields next to each other in memory." />

The four-byte type tag comes first, then the union. Most architectures prefer
values be aligned to their size. Since the union field contains an eight-byte
double, the compiler adds four bytes of <span name="pad">padding</span> after
the type field to keep that double on the nearest eight-byte boundary. That
means we're effectively spending eight bytes on the type tag, which only needs
to represent a number between zero and three. We could stuff the enum in a
smaller size, but all that would do is increase the padding.

<aside name="pad">

We could move the tag field *after* the union, but that doesn't help much
either. Whenever we create an array of Values -- which is where most of our
memory usage for Values will be -- the C compiler will insert that same padding
*between* each Value to keep the doubles aligned.

</aside>

So our Values are 16 bytes, which seems a little large. We'll improve it
[later][optimization]. In the meantime, they're still small enough to store on
the C stack and pass around by value. Lox's semantics allow that because the
only types we support so far are **immutable**. If we pass a copy of a Value
containing the number three to some function, we don't need to worry about the
caller seeing modifications to the value. You can't "modify" three. It's three
forever.

[optimization]: optimization.html

## Lox Values and C Values

That's our new value representation, but we aren't done. Right now, the rest of
clox assumes Value is an alias for `double`. We have code that does a straight C
cast from one to the other. That code is all broken now. So sad.

With our new representation, a Value can *contain* a double, but it's not
*equivalent* to it. There is a mandatory conversion step to get from one to the
other. We need to go through the code and insert those conversions to get clox
working again.

We'll implement these conversions as a handful of macros, one for each type and
operation. First, to promote a native C value to a clox Value:

^code value-macros (1 before, 2 after)

Each one of these takes a C value of the appropriate type and produces a Value
that has the correct type tag and contains the underlying value. This hoists
statically typed values up into clox's dynamically typed universe. In order to
*do* anything with a Value, though, we need to unpack it and get the C value
back out.

^code as-macros (1 before, 2 after)

<aside name="as-null">

There's no `AS_NIL` macro because there is only one `nil` value, so a Value with
type `VAL_NIL` doesn't carry any extra data.

</aside>

<span name="as-null">These</span> macros go in the opposite direction. Given a
Value of the right type, they unwrap it and return the corresponding raw C
value. The "right type" part is important! These macros directly access the
union fields. If we were to do something like:

```c
Value value = BOOL_VAL(true);
double number = AS_NUMBER(value);
```

Then we may open a smoldering portal to the Shadow Realm. It's not safe to use
any of the `AS_` macros unless we know the Value contains the appropriate type.
To that end, we define a last few macros to check a Value's type.

^code is-macros (1 before, 2 after)

<span name="universe">These</span> macros return `true` if the Value has that
type. Any time we call one of the `AS_` macros, we need to guard it behind a
call to one of these first. With these eight macros, we can now safely shuttle
data between Lox's dynamic world and C's static one.

<aside name="universe">

<img src="image/types-of-values/universe.png" alt="The earthly C firmament with the Lox heavens above." />

The `_VAL` macros lift a C value into the heavens. The `AS_` macros bring it
back down.

</aside>

## Dynamically Typed Numbers

We've got our value representation and the tools to convert to and from it. All
that's left to get clox running again is to grind through the code and fix every
place where data moves across that boundary. This is one of those sections of
the book that isn't exactly mind-blowing, but I promised I'd show you every
single line of code, so here we are.

The first values we create are the constants generated when we compile number
literals. After we convert the lexeme to a C double, we simply wrap it in a
Value before storing it in the constant table.

^code const-number-val (1 before, 1 after)

Over in the runtime, we have a function to print values.

^code print-number-value (1 before, 1 after)

Right before we send the Value to `printf()`, we unwrap it and extract the
double value. We'll revisit this function shortly to add the other types, but
let's get our existing code working first.

### Unary negation and runtime errors

The next simplest operation is unary negation. It pops a value off the stack,
negates it, and pushes the result. Now that we have other types of values, we
can't assume the operand is a number anymore. The user could just as well do:

```lox
print -false; // Uh...
```

We need to handle that gracefully, which means it's time for *runtime errors*.
Before performing an operation that requires a certain type, we need to make
sure the Value *is* that type.

For unary negation, the check looks like this:

^code op-negate (1 before, 1 after)

First, we check to see if the Value on top of the stack is a number. If it's
not, we report the runtime error and <span name="halt">stop</span> the
interpreter. Otherwise, we keep going. Only after this validation do we unwrap
the operand, negate it, wrap the result and push it.

<aside name="halt">

Lox's approach to error-handling is rather... *spare*. All errors are fatal and
immediately halt the interpreter. There's no way for user code to recover from
an error. If Lox were a real language, this is one of the first things I would
remedy.

</aside>

To access the Value, we use a new little function.

^code peek

It returns a Value from the stack but doesn't <span name="peek">pop</span> it.
The `distance` argument is how far down from the top of the stack to look: zero
is the top, one is one slot down, etc.

<aside name="peek">

Why not just pop the operand and then validate it? We could do that. In later
chapters, it will be important to leave operands on the stack to ensure the
garbage collector can find them if a collection is triggered in the middle of
the operation. I do the same thing here mostly out of habit.

</aside>

We report the runtime error using a new function that we'll get a lot of mileage
out of over the remainder of the book.

^code runtime-error

You've certainly *called* variadic functions -- ones that take a varying number
of arguments -- in C before: `printf()` is one. But you may not have *defined*
your own. This book isn't a C <span name="tutorial">tutorial</span>, so I'll
skim over it here, but basically the `...` and `va_list` stuff let us pass an
arbitrary number of arguments to `runtimeError()`. It forwards those on to
`vfprintf()`, which is the flavor of `printf()` that takes an explicit
`va_list`.

<aside name="tutorial">

If you are looking for a C tutorial, I love *[The C Programming Language][kr]*,
usually called "K&R" in honor of its authors. It's not entirely up to date, but
the quality of the writing more than makes up for it.

[kr]: https://www.cs.princeton.edu/~bwk/cbook.html

</aside>

Callers can pass a format string to `runtimeError()` followed by a number of
arguments, just like they can when calling `printf()` directly. `runtimeError()`
then formats and prints those arguments. We won't take advantage of that in this
chapter, but later chapters will produce formatted runtime error messages that
contain other data.

After we show the hopefully helpful error message, we tell the user which <span
name="stack">line</span> of their code was being executed when the error
occurred. Since we left the tokens behind in the compiler, we look up the line
in the debug information compiled into the chunk. If our compiler did its job
right, that corresponds to the line of source code that the bytecode was
compiled from.

We look into the chunk's debug line array using the current bytecode instruction
index *minus one*. That's because the interpreter advances past each instruction
before executing it. So, at the point that we call `runtimeError()`, the failed
instruction is the previous one.

<aside name="stack">

Just showing the immediate line where the error occurred doesn't provide much
context. Better would be a full stack trace. But we don't even have functions to
call yet, so there is no call stack to trace.

</aside>

In order to use `va_list` and the macros for working with it, we need to bring
in a standard header.

^code include-stdarg (1 after)

With this, our VM can not only do the right thing when we negate numbers (like
it used to before we broke it), but it also gracefully handles erroneous
attempts to negate other types (which we don't have yet, but still).

### Binary arithmetic operators

We have our runtime error machinery in place now, so fixing the binary operators
is easier even though they're more complex. We support four binary operators
today: `+`, `-`, `*`, and `/`. The only difference between them is which
underlying C operator they use. To minimize redundant code between the four
operators, we wrapped up the commonality in a big preprocessor macro that takes
the operator token as a parameter.

That macro seemed like overkill a [few chapters ago][], but we get the benefit
from it today. It lets us add the necessary type checking and conversions in one
place.

[few chapters ago]: a-virtual-machine.html#binary-operators

^code binary-op (1 before, 2 after)

Yeah, I realize that's a monster of a macro. It's not what I'd normally consider
good C practice, but let's roll with it. The changes are similar to what we did
for unary negate. First, we check that the two operands are both numbers. If
either isn't, we report a runtime error and yank the ejection seat lever.

If the operands are fine, we pop them both and unwrap them. Then we apply the
given operator, wrap the result, and push it back on the stack. Note that we
don't wrap the result by directly using `NUMBER_VAL()`. Instead, the wrapper to
use is passed in as a macro <span name="macro">parameter</span>. For our
existing arithmetic operators, the result is a number, so we pass in the
`NUMBER_VAL` macro.

<aside name="macro">

Did you know you can pass macros as parameters to macros? Now you do!

</aside>

^code op-arithmetic (1 before, 1 after)

Soon, I'll show you why we made the wrapping macro an argument.

## Two New Types

All of our existing clox code is back in working order. Finally, it's time to
add some new types. We've got a running numeric calculator that now does a
number of pointless paranoid runtime type checks. We can represent other types
internally, but there's no way for a user's program to ever create a Value of
one of those types.

Not until now, that is. We'll start by adding compiler support for the three new
literals: `true`, `false`, and `nil`. They're all pretty simple, so we'll do all
three in a single batch.

With number literals, we had to deal with the fact that there are billions of
possible numeric values. We attended to that by storing the literal's value in
the chunk's constant table and emitting a bytecode instruction that simply
loaded that constant. We could do the same thing for the new types. We'd store,
say, `true`, in the constant table, and use an `OP_CONSTANT` to read it out.

But given that there are literally (heh) only three possible values we need to
worry about with these new types, it's gratuitous -- and <span
name="small">slow!</span> -- to waste a two-byte instruction and a constant
table entry on them. Instead, we'll define three dedicated instructions to push
each of these literals on the stack.

<aside name="small" class="bottom">

I'm not kidding about dedicated operations for certain constant values being
faster. A bytecode VM spends much of its execution time reading and decoding
instructions. The fewer, simpler instructions you need for a given piece of
behavior, the faster it goes. Short instructions dedicated to common operations
are a classic optimization.

For example, the Java bytecode instruction set has dedicated instructions for
loading 0.0, 1.0, 2.0, and the integer values from -1 through 5. (This ends up
being a vestigial optimization given that most mature JVMs now JIT-compile the
bytecode to machine code before execution anyway.)

</aside>

^code literal-ops (1 before, 1 after)

Our scanner already treats `true`, `false`, and `nil` as keywords, so we can
skip right to the parser. With our table-based Pratt parser, we just need to
slot parser functions into the rows associated with those keyword token types.
We'll use the same function in all three slots. Here:

^code table-false (1 before, 1 after)

Here:

^code table-true (1 before, 1 after)

And here:

^code table-nil (1 before, 1 after)

When the parser encounters `false`, `nil`, or `true`, in prefix position, it
calls this new parser function:

^code parse-literal

Since `parsePrecedence()` has already consumed the keyword token, all we need to
do is output the proper instruction. We <span name="switch">figure</span> that
out based on the type of token we parsed. Our front end can now compile Boolean
and nil literals to bytecode. Moving down the execution pipeline, we reach the
interpreter.

<aside name="switch">

We could have used separate parser functions for each literal and saved
ourselves a switch but that felt needlessly verbose to me. I think it's mostly a
matter of taste.

</aside>

^code interpret-literals (5 before, 1 after)

This is pretty self-explanatory. Each instruction summons the appropriate value
and pushes it onto the stack. We shouldn't forget our disassembler either.

^code disassemble-literals (2 before, 1 after)

With this in place, we can run this Earth-shattering program:

```lox
true
```

Except that when the interpreter tries to print the result, it blows up. We need
to extend `printValue()` to handle the new types too:

^code print-value (1 before, 1 after)

There we go! Now we have some new types. They just aren't very useful yet. Aside
from the literals, you can't really *do* anything with them. It will be a while
before `nil` comes into play, but we can start putting Booleans to work in the
logical operators.

### Logical not and falsiness

The simplest logical operator is our old exclamatory friend unary not.

```lox
print !true; // "false"
```

This new operation gets a new instruction.

^code not-op (1 before, 1 after)

We can reuse the `unary()` parser function we wrote for unary negation to
compile a not expression. We just need to slot it into the parsing table.

^code table-not (1 before, 1 after)

Because I knew we were going to do this, the `unary()` function already has a
switch on the token type to figure out which bytecode instruction to output. We
merely add another case.

^code compile-not (1 before, 3 after)

That's it for the front end. Let's head over to the VM and conjure this
instruction into life.

^code op-not (1 before, 1 after)

Like our previous unary operator, it pops the one operand, performs the
operation, and pushes the result. And, as we did there, we have to worry about
dynamic typing. Taking the logical not of `true` is easy, but there's nothing
preventing an unruly programmer from writing something like this:

```lox
print !nil;
```

For unary minus, we made it an error to negate anything that isn't a <span
name="negate">number</span>. But Lox, like most scripting languages, is more
permissive when it comes to `!` and other contexts where a Boolean is expected.
The rule for how other types are handled is called "falsiness", and we implement
it here:

<aside name="negate">

Now I can't help but try to figure out what it would mean to negate other types
of values. `nil` is probably its own negation, sort of like a weird pseudo-zero.
Negating a string could, uh, reverse it?

</aside>

^code is-falsey

Lox follows Ruby in that `nil` and `false` are falsey and every other value
behaves like `true`. We've got a new instruction we can generate, so we also
need to be able to *un*generate it in the disassembler.

^code disassemble-not (2 before, 1 after)

### Equality and comparison operators

That wasn't too bad. Let's keep the momentum going and knock out the equality
and comparison operators too: `==`, `!=`, `<`, `>`, `<=`, and `>=`. That covers
all of the operators that return Boolean results except the logical operators
`and` and `or`. Since those need to short-circuit (basically do a little
control flow) we aren't ready for them yet.

Here are the new instructions for those operators:

^code comparison-ops (1 before, 1 after)

Wait, only three? What about `!=`, `<=`, and `>=`? We could create instructions
for those too. Honestly, the VM would execute faster if we did, so we *should*
do that if the goal is performance.

But my main goal is to teach you about bytecode compilers. I want you to start
internalizing the idea that the bytecode instructions don't need to closely
follow the user's source code. The VM has total freedom to use whatever
instruction set and code sequences it wants as long as they have the right
user-visible behavior.

The expression `a != b` has the same semantics as `!(a == b)`, so the compiler
is free to compile the former as if it were the latter. Instead of a dedicated
`OP_NOT_EQUAL` instruction, it can output an `OP_EQUAL` followed by an `OP_NOT`.
Likewise, `a <= b` is the <span name="same">same</span> as `!(a > b)` and `a >=
b` is `!(a < b)`. Thus, we only need three new instructions.

<aside name="same" class="bottom">

*Is* `a <= b` always the same as `!(a > b)`? According to [IEEE 754][], all
comparison operators return false when an operand is NaN. That means `NaN <= 1`
is false and `NaN > 1` is also false. But our desugaring assumes the latter is
always the negation of the former.

For the book, we won't get hung up on this, but these kinds of details will
matter in your real language implementations.

[ieee 754]: https://en.wikipedia.org/wiki/IEEE_754

</aside>

Over in the parser, though, we do have six new operators to slot into the parse
table. We use the same `binary()` parser function from before. Here's the row
for `!=`:

^code table-equal (1 before, 1 after)

The remaining five operators are a little farther down in the table.

^code table-comparisons (1 before, 1 after)

Inside `binary()` we already have a switch to generate the right bytecode for
each token type. We add cases for the six new operators.

^code comparison-operators (1 before, 1 after)

The `==`, `<`, and `>` operators output a single instruction. The others output
a pair of instructions, one to evalute the inverse operation, and then an
`OP_NOT` to flip the result. Six operators for the price of three instructions!

That means over in the VM, our job is simpler. Equality is the most general
operation.

^code interpret-equal (1 before, 1 after)

You can evaluate `==` on any pair of objects, even objects of different types.
There's enough complexity that it makes sense to shunt that logic over to a
separate function. That function always returns a C `bool`, so we can safely
wrap the result in a `BOOL_VAL`. The function relates to Values, so it lives
over in the "value" module.

^code values-equal-h (2 before, 1 after)

And here's the implementation:

^code values-equal

First, we check the types. If the Values have <span
name="equal">different</span> types, they are definitely not equal. Otherwise,
we unwrap the two Values and compare them directly.

<aside name="equal">

Some languages have "implicit conversions" where values of different types may
be considered equal if one can be converted to the other's type. For example,
the number 0 is equivalent to the string "0" in JavaScript. This looseness was a
large enough source of pain that JS added a separate "strict equality" operator,
`===`.

PHP considers the strings "1" and "01" to be equivalent because both can be
converted to equivalent numbers, though the ultimate reason is because PHP was
designed by a Lovecraftian eldritch god to destroy the mind.

Most dynamically typed languages that have separate integer and floating-point
number types consider values of different number types equal if the numeric
values are the same (so, say, 1.0 is equal to 1), though even that seemingly
innocuous convenience can bite the unwary.

</aside>

For each value type, we have a separate case that handles comparing the value
itself. Given how similar the cases are, you might wonder why we can't simply
`memcmp()` the two Value structs and be done with it. The problem is that
because of padding and different-sized union fields, a Value contains unused
bits. C gives no guarantee about what is in those, so it's possible that two
equal Values actually differ in memory that isn't used.

<img src="image/types-of-values/memcmp.png" alt="The memory respresentations of two equal values that differ in unused bytes." />

(You wouldn't believe how much pain I went through before learning this fact.)

Anyway, as we add more types to clox, this function will grow new cases. For
now, these three are sufficient. The other comparison operators are easier since
they work only on numbers.

^code interpret-comparison (3 before, 1 after)

We already extended the `BINARY_OP` macro to handle operators that return
non-numeric types. Now we get to use that. We pass in `BOOL_VAL` since the
result value type is Boolean. Otherwise, it's no different from plus or minus.

As always, the coda to today's aria is disassembling the new instructions.

^code disassemble-comparison (2 before, 1 after)

With that, our numeric calculator has become something closer to a general
expression evaluator. Fire up clox and type in:

```lox
!(5 - 4 > 3 * 2 == !nil)
```

OK, I'll admit that's maybe not the most *useful* expression, but we're making
progress. We have one missing built-in type with its own literal form: strings.
Those are much more complex because strings can vary in size. That tiny
difference turns out to have implications so large that we give strings [their
very own chapter][strings].

[strings]: strings.html

<div class="challenges">

## Challenges

1. We could reduce our binary operators even further than we did here. Which
   other instructions can you eliminate, and how would the compiler cope with
   their absence?

2. Conversely, we can improve the speed of our bytecode VM by adding more
   specific instructions that correspond to higher-level operations. What
   instructions would you define to speed up the kind of user code we added
   support for in this chapter?

</div>


================================================
FILE: book/welcome.md
================================================
This may be the beginning of a grand adventure. Programming languages encompass
a huge space to explore and play in. Plenty of room for your own creations to
share with others or just enjoy yourself. Brilliant computer scientists and
software engineers have spent entire careers traversing this land without ever
reaching the end. If this book is your first entry into the country, welcome.

The pages of this book give you a guided tour through some of the world of
languages. But before we strap on our hiking boots and venture out, we should
familiarize ourselves with the territory. The chapters in this part introduce
you to the basic concepts used by programming languages and how those concepts
are organized.

We will also get acquainted with Lox, the language we'll spend the rest of the
book implementing (twice).


================================================
FILE: c/chunk.c
================================================
//> Chunks of Bytecode chunk-c
#include <stdlib.h>

#include "chunk.h"
//> chunk-c-include-memory
#include "memory.h"
//< chunk-c-include-memory
//> Garbage Collection chunk-include-vm
#include "vm.h"
//< Garbage Collection chunk-include-vm

void initChunk(Chunk* chunk) {
  chunk->count = 0;
  chunk->capacity = 0;
  chunk->code = NULL;
//> chunk-null-lines
  chunk->lines = NULL;
//< chunk-null-lines
//> chunk-init-constant-array
  initValueArray(&chunk->constants);
//< chunk-init-constant-array
}
//> free-chunk
void freeChunk(Chunk* chunk) {
  FREE_ARRAY(uint8_t, chunk->code, chunk->capacity);
//> chunk-free-lines
  FREE_ARRAY(int, chunk->lines, chunk->capacity);
//< chunk-free-lines
//> chunk-free-constants
  freeValueArray(&chunk->constants);
//< chunk-free-constants
  initChunk(chunk);
}
//< free-chunk
/* Chunks of Bytecode write-chunk < Chunks of Bytecode write-chunk-with-line
void writeChunk(Chunk* chunk, uint8_t byte) {
*/
//> write-chunk
//> write-chunk-with-line
void writeChunk(Chunk* chunk, uint8_t byte, int line) {
//< write-chunk-with-line
  if (chunk->capacity < chunk->count + 1) {
    int oldCapacity = chunk->capacity;
    chunk->capacity = GROW_CAPACITY(oldCapacity);
    chunk->code = GROW_ARRAY(uint8_t, chunk->code,
        oldCapacity, chunk->capacity);
//> write-chunk-line
    chunk->lines = GROW_ARRAY(int, chunk->lines,
        oldCapacity, chunk->capacity);
//< write-chunk-line
  }

  chunk->code[chunk->count] = byte;
//> chunk-write-line
  chunk->lines[chunk->count] = line;
//< chunk-write-line
  chunk->count++;
}
//< write-chunk
//> add-constant
int addConstant(Chunk* chunk, Value value) {
//> Garbage Collection add-constant-push
  push(value);
//< Garbage Collection add-constant-push
  writeValueArray(&chunk->constants, value);
//> Garbage Collection add-constant-pop
  pop();
//< Garbage Collection add-constant-pop
  return chunk->constants.count - 1;
}
//< add-constant


================================================
FILE: c/chunk.h
================================================
//> Chunks of Bytecode chunk-h
#ifndef clox_chunk_h
#define clox_chunk_h

#include "common.h"
//> chunk-h-include-value
#include "value.h"
//< chunk-h-include-value
//> op-enum

typedef enum {
//> op-constant
  OP_CONSTANT,
//< op-constant
//> Types of Values literal-ops
  OP_NIL,
  OP_TRUE,
  OP_FALSE,
//< Types of Values literal-ops
//> Global Variables pop-op
  OP_POP,
//< Global Variables pop-op
//> Local Variables get-local-op
  OP_GET_LOCAL,
//< Local Variables get-local-op
//> Local Variables set-local-op
  OP_SET_LOCAL,
//< Local Variables set-local-op
//> Global Variables get-global-op
  OP_GET_GLOBAL,
//< Global Variables get-global-op
//> Global Variables define-global-op
  OP_DEFINE_GLOBAL,
//< Global Variables define-global-op
//> Global Variables set-global-op
  OP_SET_GLOBAL,
//< Global Variables set-global-op
//> Closures upvalue-ops
  OP_GET_UPVALUE,
  OP_SET_UPVALUE,
//< Closures upvalue-ops
//> Classes and Instances property-ops
  OP_GET_PROPERTY,
  OP_SET_PROPERTY,
//< Classes and Instances property-ops
//> Superclasses get-super-op
  OP_GET_SUPER,
//< Superclasses get-super-op
//> Types of Values comparison-ops
  OP_EQUAL,
  OP_GREATER,
  OP_LESS,
//< Types of Values comparison-ops
//> A Virtual Machine binary-ops
  OP_ADD,
  OP_SUBTRACT,
  OP_MULTIPLY,
  OP_DIVIDE,
//> Types of Values not-op
  OP_NOT,
//< Types of Values not-op
//< A Virtual Machine binary-ops
//> A Virtual Machine negate-op
  OP_NEGATE,
//< A Virtual Machine negate-op
//> Global Variables op-print
  OP_PRINT,
//< Global Variables op-print
//> Jumping Back and Forth jump-op
  OP_JUMP,
//< Jumping Back and Forth jump-op
//> Jumping Back and Forth jump-if-false-op
  OP_JUMP_IF_FALSE,
//< Jumping Back and Forth jump-if-false-op
//> Jumping Back and Forth loop-op
  OP_LOOP,
//< Jumping Back and Forth loop-op
//> Calls and Functions op-call
  OP_CALL,
//< Calls and Functions op-call
//> Methods and Initializers invoke-op
  OP_INVOKE,
//< Methods and Initializers invoke-op
//> Superclasses super-invoke-op
  OP_SUPER_INVOKE,
//< Superclasses super-invoke-op
//> Closures closure-op
  OP_CLOSURE,
//< Closures closure-op
//> Closures close-upvalue-op
  OP_CLOSE_UPVALUE,
//< Closures close-upvalue-op
  OP_RETURN,
//> Classes and Instances class-op
  OP_CLASS,
//< Classes and Instances class-op
//> Superclasses inherit-op
  OP_INHERIT,
//< Superclasses inherit-op
//> Methods and Initializers method-op
  OP_METHOD
//< Methods and Initializers method-op
} OpCode;
//< op-enum
//> chunk-struct

typedef struct {
//> count-and-capacity
  int count;
  int capacity;
//< count-and-capacity
  uint8_t* code;
//> chunk-lines
  int* lines;
//< chunk-lines
//> chunk-constants
  ValueArray constants;
//< chunk-constants
} Chunk;
//< chunk-struct
//> init-chunk-h

void initChunk(Chunk* chunk);
//< init-chunk-h
//> free-chunk-h
void freeChunk(Chunk* chunk);
//< free-chunk-h
/* Chunks of Bytecode write-chunk-h < Chunks of Bytecode write-chunk-with-line-h
void writeChunk(Chunk* chunk, uint8_t byte);
*/
//> write-chunk-with-line-h
void writeChunk(Chunk* chunk, uint8_t byte, int line);
//< write-chunk-with-line-h
//> add-constant-h
int addConstant(Chunk* chunk, Value value);
//< add-constant-h

#endif


================================================
FILE: c/clox.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 46;
	objects = {

/* Begin PBXBuildFile section */
		2905EA1B1CAC1C3900E258E5 /* memory.c in Sources */ = {isa = PBXBuildFile; fileRef = 2905EA191CAC1C3900E258E5 /* memory.c */; };
		293173A51D03628E0028CBCC /* chunk.c in Sources */ = {isa = PBXBuildFile; fileRef = 293173A31D03628E0028CBCC /* chunk.c */; };
		293173A81D0378530028CBCC /* value.c in Sources */ = {isa = PBXBuildFile; fileRef = 293173A61D0378530028CBCC /* value.c */; };
		2940770F1C8368CF0067320B /* vm.c in Sources */ = {isa = PBXBuildFile; fileRef = 2940770D1C8368CF0067320B /* vm.c */; };
		294077121C8369BC0067320B /* compiler.c in Sources */ = {isa = PBXBuildFile; fileRef = 294077101C8369BC0067320B /* compiler.c */; };
		296041FF1C5DCCD0007310F9 /* scanner.c in Sources */ = {isa = PBXBuildFile; fileRef = 296041FE1C5DCCD0007310F9 /* scanner.c */; };
		29815E3F1C5DCC3A004A67D8 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 29815E3E1C5DCC3A004A67D8 /* main.c */; };
		2984DBA21C83FD540075BAC3 /* object.c in Sources */ = {isa = PBXBuildFile; fileRef = 2984DBA01C83FD540075BAC3 /* object.c */; };
		29C6CA711C85EBE6009617A9 /* debug.c in Sources */ = {isa = PBXBuildFile; fileRef = 29C6CA6F1C85EBE6009617A9 /* debug.c */; };
		29CD6FB01CB6A3430005D92B /* table.c in Sources */ = {isa = PBXBuildFile; fileRef = 29CD6FAE1CB6A3430005D92B /* table.c */; };
/* End PBXBuildFile section */

/* Begin PBXCopyFilesBuildPhase section */
		292D23761E10F6590044C66E /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
		292D239F1E10F6C30044C66E /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
		292D23BC1E10F6E70044C66E /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
		29815E321C5DCBF7004A67D8 /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		2905EA191CAC1C3900E258E5 /* memory.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = memory.c; sourceTree = "<group>"; };
		2905EA1A1CAC1C3900E258E5 /* memory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory.h; sourceTree = "<group>"; };
		2905EA1C1CAC1DFB00E258E5 /* common.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = common.h; sourceTree = "<group>"; };
		292D23781E10F6590044C66E /* chap14_chunks */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = chap14_chunks; sourceTree = BUILT_PRODUCTS_DIR; };
		292D23A11E10F6C30044C66E /* chap15_virtual */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = chap15_virtual; sourceTree = BUILT_PRODUCTS_DIR; };
		292D23BE1E10F6E70044C66E /* chap16_scanning */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = chap16_scanning; sourceTree = BUILT_PRODUCTS_DIR; };
		293173A31D03628E0028CBCC /* chunk.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = chunk.c; sourceTree = "<group>"; };
		293173A41D03628E0028CBCC /* chunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chunk.h; sourceTree = "<group>"; };
		293173A61D0378530028CBCC /* value.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = value.c; sourceTree = "<group>"; };
		293173A71D0378530028CBCC /* value.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = value.h; sourceTree = "<group>"; };
		2940770D1C8368CF0067320B /* vm.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = vm.c; sourceTree = "<group>"; };
		2940770E1C8368CF0067320B /* vm.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vm.h; sourceTree = "<group>"; };
		294077101C8369BC0067320B /* compiler.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = compiler.c; sourceTree = "<group>"; };
		294077111C8369BC0067320B /* compiler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compiler.h; sourceTree = "<group>"; };
		296041FE1C5DCCD0007310F9 /* scanner.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = scanner.c; sourceTree = "<group>"; };
		29815E341C5DCBF7004A67D8 /* clox */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = clox; sourceTree = BUILT_PRODUCTS_DIR; };
		29815E3E1C5DCC3A004A67D8 /* main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
		29815E401C5DCCAC004A67D8 /* scanner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = scanner.h; sourceTree = "<group>"; };
		2984DBA01C83FD540075BAC3 /* object.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = object.c; sourceTree = "<group>"; };
		2984DBA11C83FD540075BAC3 /* object.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = object.h; sourceTree = "<group>"; };
		29C6CA6F1C85EBE6009617A9 /* debug.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = debug.c; sourceTree = "<group>"; };
		29C6CA701C85EBE6009617A9 /* debug.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = debug.h; sourceTree = "<group>"; };
		29CD6FAE1CB6A3430005D92B /* table.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = table.c; sourceTree = "<group>"; };
		29CD6FAF1CB6A3430005D92B /* table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = table.h; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		292D23751E10F6590044C66E /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		292D239E1E10F6C30044C66E /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		292D23BB1E10F6E70044C66E /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		29815E311C5DCBF7004A67D8 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		29815E2B1C5DCBF7004A67D8 = {
			isa = PBXGroup;
			children = (
				293173A41D03628E0028CBCC /* chunk.h */,
				293173A31D03628E0028CBCC /* chunk.c */,
				2905EA1C1CAC1DFB00E258E5 /* common.h */,
				294077111C8369BC0067320B /* compiler.h */,
				294077101C8369BC0067320B /* compiler.c */,
				29C6CA701C85EBE6009617A9 /* debug.h */,
				29C6CA6F1C85EBE6009617A9 /* debug.c */,
				29815E3E1C5DCC3A004A67D8 /* main.c */,
				2905EA1A1CAC1C3900E258E5 /* memory.h */,
				2905EA191CAC1C3900E258E5 /* memory.c */,
				2984DBA11C83FD540075BAC3 /* object.h */,
				2984DBA01C83FD540075BAC3 /* object.c */,
				29815E401C5DCCAC004A67D8 /* scanner.h */,
				296041FE1C5DCCD0007310F9 /* scanner.c */,
				29CD6FAF1CB6A3430005D92B /* table.h */,
				29CD6FAE1CB6A3430005D92B /* table.c */,
				293173A71D0378530028CBCC /* value.h */,
				293173A61D0378530028CBCC /* value.c */,
				2940770E1C8368CF0067320B /* vm.h */,
				2940770D1C8368CF0067320B /* vm.c */,
				29815E351C5DCBF7004A67D8 /* Products */,
			);
			sourceTree = "<group>";
		};
		29815E351C5DCBF7004A67D8 /* Products */ = {
			isa = PBXGroup;
			children = (
				29815E341C5DCBF7004A67D8 /* clox */,
				292D23781E10F6590044C66E /* chap14_chunks */,
				292D23A11E10F6C30044C66E /* chap15_virtual */,
				292D23BE1E10F6E70044C66E /* chap16_scanning */,
			);
			name = Products;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		292D23771E10F6590044C66E /* chap14_chunks */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 292D237C1E10F6590044C66E /* Build configuration list for PBXNativeTarget "chap14_chunks" */;
			buildPhases = (
				292D23741E10F6590044C66E /* Sources */,
				292D23751E10F6590044C66E /* Frameworks */,
				292D23761E10F6590044C66E /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = chap14_chunks;
			productName = chap14_chunks;
			productReference = 292D23781E10F6590044C66E /* chap14_chunks */;
			productType = "com.apple.product-type.tool";
		};
		292D23A01E10F6C30044C66E /* chap15_virtual */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 292D23A51E10F6C30044C66E /* Build configuration list for PBXNativeTarget "chap15_virtual" */;
			buildPhases = (
				292D239D1E10F6C30044C66E /* Sources */,
				292D239E1E10F6C30044C66E /* Frameworks */,
				292D239F1E10F6C30044C66E /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = chap15_virtual;
			productName = chap15_virtual;
			productReference = 292D23A11E10F6C30044C66E /* chap15_virtual */;
			productType = "com.apple.product-type.tool";
		};
		292D23BD1E10F6E70044C66E /* chap16_scanning */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 292D23C21E10F6E70044C66E /* Build configuration list for PBXNativeTarget "chap16_scanning" */;
			buildPhases = (
				292D23BA1E10F6E70044C66E /* Sources */,
				292D23BB1E10F6E70044C66E /* Frameworks */,
				292D23BC1E10F6E70044C66E /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = chap16_scanning;
			productName = chap16_scanning;
			productReference = 292D23BE1E10F6E70044C66E /* chap16_scanning */;
			productType = "com.apple.product-type.tool";
		};
		29815E331C5DCBF7004A67D8 /* clox */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 29815E3B1C5DCBF7004A67D8 /* Build configuration list for PBXNativeTarget "clox" */;
			buildPhases = (
				29815E301C5DCBF7004A67D8 /* Sources */,
				29815E311C5DCBF7004A67D8 /* Frameworks */,
				29815E321C5DCBF7004A67D8 /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = clox;
			productName = cvox;
			productReference = 29815E341C5DCBF7004A67D8 /* clox */;
			productType = "com.apple.product-type.tool";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		29815E2C1C5DCBF7004A67D8 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				LastUpgradeCheck = 0830;
				ORGANIZATIONNAME = "Robert Nystrom";
				TargetAttributes = {
					292D23771E10F6590044C66E = {
						CreatedOnToolsVersion = 8.1;
						ProvisioningStyle = Automatic;
					};
					292D23A01E10F6C30044C66E = {
						CreatedOnToolsVersion = 8.1;
						ProvisioningStyle = Automatic;
					};
					292D23BD1E10F6E70044C66E = {
						CreatedOnToolsVersion = 8.1;
						ProvisioningStyle = Automatic;
					};
					29815E331C5DCBF7004A67D8 = {
						CreatedOnToolsVersion = 6.4;
					};
				};
			};
			buildConfigurationList = 29815E2F1C5DCBF7004A67D8 /* Build configuration list for PBXProject "clox" */;
			compatibilityVersion = "Xcode 3.2";
			developmentRegion = English;
			hasScannedForEncodings = 0;
			knownRegions = (
				English,
				en,
			);
			mainGroup = 29815E2B1C5DCBF7004A67D8;
			productRefGroup = 29815E351C5DCBF7004A67D8 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				29815E331C5DCBF7004A67D8 /* clox */,
				292D23771E10F6590044C66E /* chap14_chunks */,
				292D23A01E10F6C30044C66E /* chap15_virtual */,
				292D23BD1E10F6E70044C66E /* chap16_scanning */,
			);
		};
/* End PBXProject section */

/* Begin PBXSourcesBuildPhase section */
		292D23741E10F6590044C66E /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		292D239D1E10F6C30044C66E /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		292D23BA1E10F6E70044C66E /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		29815E301C5DCBF7004A67D8 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				293173A51D03628E0028CBCC /* chunk.c in Sources */,
				29CD6FB01CB6A3430005D92B /* table.c in Sources */,
				2905EA1B1CAC1C3900E258E5 /* memory.c in Sources */,
				2984DBA21C83FD540075BAC3 /* object.c in Sources */,
				296041FF1C5DCCD0007310F9 /* scanner.c in Sources */,
				293173A81D0378530028CBCC /* value.c in Sources */,
				2940770F1C8368CF0067320B /* vm.c in Sources */,
				29C6CA711C85EBE6009617A9 /* debug.c in Sources */,
				294077121C8369BC0067320B /* compiler.c in Sources */,
				29815E3F1C5DCC3A004A67D8 /* main.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin XCBuildConfiguration section */
		292D237D1E10F6590044C66E /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_SUSPICIOUS_MOVES = YES;
				CODE_SIGN_IDENTITY = "-";
				ENABLE_TESTABILITY = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.12;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		292D237E1E10F6590044C66E /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_SUSPICIOUS_MOVES = YES;
				CODE_SIGN_IDENTITY = "-";
				MACOSX_DEPLOYMENT_TARGET = 10.12;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
		292D23A61E10F6C30044C66E /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_SUSPICIOUS_MOVES = YES;
				CODE_SIGN_IDENTITY = "-";
				ENABLE_TESTABILITY = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.12;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		292D23A71E10F6C30044C66E /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_SUSPICIOUS_MOVES = YES;
				CODE_SIGN_IDENTITY = "-";
				MACOSX_DEPLOYMENT_TARGET = 10.12;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
		292D23C31E10F6E70044C66E /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_SUSPICIOUS_MOVES = YES;
				CODE_SIGN_IDENTITY = "-";
				ENABLE_TESTABILITY = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.12;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		292D23C41E10F6E70044C66E /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_SUSPICIOUS_MOVES = YES;
				CODE_SIGN_IDENTITY = "-";
				MACOSX_DEPLOYMENT_TARGET = 10.12;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
		29815E391C5DCBF7004A67D8 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.11;
				MTL_ENABLE_DEBUG_INFO = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = macosx;
				WARNING_CFLAGS = "-Wno-gnu-label-as-value";
			};
			name = Debug;
		};
		29815E3A1C5DCBF7004A67D8 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 3;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.11;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
				WARNING_CFLAGS = "-Wno-gnu-label-as-value";
			};
			name = Release;
		};
		29815E3C1C5DCBF7004A67D8 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES;
				GCC_C_LANGUAGE_STANDARD = c99;
				GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
				GCC_TREAT_INCOMPATIBLE_POINTER_TYPE_WARNINGS_AS_ERRORS = YES;
				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
				GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES;
				GCC_WARN_PEDANTIC = YES;
				GCC_WARN_SHADOW = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.14;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		29815E3D1C5DCBF7004A67D8 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES;
				GCC_C_LANGUAGE_STANDARD = c99;
				GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
				GCC_TREAT_INCOMPATIBLE_POINTER_TYPE_WARNINGS_AS_ERRORS = YES;
				GCC_TREAT_WARNINGS_AS_ERRORS = YES;
				GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES;
				GCC_WARN_PEDANTIC = YES;
				GCC_WARN_SHADOW = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.14;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		292D237C1E10F6590044C66E /* Build configuration list for PBXNativeTarget "chap14_chunks" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				292D237D1E10F6590044C66E /* Debug */,
				292D237E1E10F6590044C66E /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		292D23A51E10F6C30044C66E /* Build configuration list for PBXNativeTarget "chap15_virtual" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				292D23A61E10F6C30044C66E /* Debug */,
				292D23A71E10F6C30044C66E /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		292D23C21E10F6E70044C66E /* Build configuration list for PBXNativeTarget "chap16_scanning" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				292D23C31E10F6E70044C66E /* Debug */,
				292D23C41E10F6E70044C66E /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		29815E2F1C5DCBF7004A67D8 /* Build configuration list for PBXProject "clox" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				29815E391C5DCBF7004A67D8 /* Debug */,
				29815E3A1C5DCBF7004A67D8 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		29815E3B1C5DCBF7004A67D8 /* Build configuration list for PBXNativeTarget "clox" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				29815E3C1C5DCBF7004A67D8 /* Debug */,
				29815E3D1C5DCBF7004A67D8 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 29815E2C1C5DCBF7004A67D8 /* Project object */;
}


================================================
FILE: c/clox.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:cvox.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: c/clox.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: c/clox.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>PreviewsEnabled</key>
	<false/>
</dict>
</plist>


================================================
FILE: c/clox.xcodeproj/xcshareddata/xcschemes/clox.xcscheme
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1130"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
      buildImplicitDependencies = "YES">
      <BuildActionEntries>
         <BuildActionEntry
            buildForTesting = "YES"
            buildForRunning = "YES"
            buildForProfiling = "YES"
            buildForArchiving = "YES"
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "29815E331C5DCBF7004A67D8"
               BuildableName = "clox"
               BlueprintName = "clox"
               ReferencedContainer = "container:clox.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
   </BuildAction>
   <TestAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      shouldUseLaunchSchemeArgsEnv = "YES">
      <Testables>
      </Testables>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
      useCustomWorkingDirectory = "YES"
      customWorkingDirectory = "/Users/bob/Dropbox/Writing/Crafting Interpreters/interpreters"
      ignoresPersistentStateOnLaunch = "NO"
      debugDocumentVersioning = "YES"
      debugServiceExtension = "internal"
      allowLocationSimulation = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "29815E331C5DCBF7004A67D8"
            BuildableName = "clox"
            BlueprintName = "clox"
            ReferencedContainer = "container:clox.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
      <CommandLineArguments>
         <CommandLineArgument
            argument = "test/closure/reuse_closure_slot.lox"
            isEnabled = "YES">
         </CommandLineArgument>
      </CommandLineArguments>
   </LaunchAction>
   <ProfileAction
      buildConfiguration = "Release"
      shouldUseLaunchSchemeArgsEnv = "YES"
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      debugDocumentVersioning = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "29815E331C5DCBF7004A67D8"
            BuildableName = "clox"
            BlueprintName = "clox"
            ReferencedContainer = "container:clox.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
   </AnalyzeAction>
   <ArchiveAction
      buildConfiguration = "Release"
      revealArchiveInOrganizer = "YES">
   </ArchiveAction>
</Scheme>


================================================
FILE: c/common.h
================================================
//> Chunks of Bytecode common-h
#ifndef clox_common_h
#define clox_common_h

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
//> A Virtual Machine define-debug-trace

//> Optimization define-nan-boxing
#define NAN_BOXING
//< Optimization define-nan-boxing
//> Compiling Expressions define-debug-print-code
#define DEBUG_PRINT_CODE
//< Compiling Expressions define-debug-print-code
#define DEBUG_TRACE_EXECUTION
//< A Virtual Machine define-debug-trace
//> Garbage Collection define-stress-gc

#define DEBUG_STRESS_GC
//< Garbage Collection define-stress-gc
//> Garbage Collection define-log-gc
#define DEBUG_LOG_GC
//< Garbage Collection define-log-gc
//> Local Variables uint8-count

#define UINT8_COUNT (UINT8_MAX + 1)
//< Local Variables uint8-count

#endif
//> omit
// In the book, we show them defined, but for working on them locally,
// we don't want them to be.
#undef DEBUG_PRINT_CODE
#undef DEBUG_TRACE_EXECUTION
#undef DEBUG_STRESS_GC
#undef DEBUG_LOG_GC
//< omit


================================================
FILE: c/compiler.c
================================================
//> Scanning on Demand compiler-c
#include <stdio.h>
//> Compiling Expressions compiler-include-stdlib
#include <stdlib.h>
//< Compiling Expressions compiler-include-stdlib
//> Local Variables compiler-include-string
#include <string.h>
//< Local Variables compiler-include-string

#include "common.h"
#include "compiler.h"
//> Garbage Collection compiler-include-memory
#include "memory.h"
//< Garbage Collection compiler-include-memory
#include "scanner.h"
//> Compiling Expressions include-debug

#ifdef DEBUG_PRINT_CODE
#include "debug.h"
#endif
//< Compiling Expressions include-debug
//> Compiling Expressions parser

typedef struct {
  Token current;
  Token previous;
//> had-error-field
  bool hadError;
//< had-error-field
//> panic-mode-field
  bool panicMode;
//< panic-mode-field
} Parser;
//> precedence

typedef enum {
  PREC_NONE,
  PREC_ASSIGNMENT,  // =
  PREC_OR,          // or
  PREC_AND,         // and
  PREC_EQUALITY,    // == !=
  PREC_COMPARISON,  // < > <= >=
  PREC_TERM,        // + -
  PREC_FACTOR,      // * /
  PREC_UNARY,       // ! -
  PREC_CALL,        // . ()
  PREC_PRIMARY
} Precedence;
//< precedence
//> parse-fn-type

//< parse-fn-type
/* Compiling Expressions parse-fn-type < Global Variables parse-fn-type
typedef void (*ParseFn)();
*/
//> Global Variables parse-fn-type
typedef void (*ParseFn)(bool canAssign);
//< Global Variables parse-fn-type
//> parse-rule

typedef struct {
  ParseFn prefix;
  ParseFn infix;
  Precedence precedence;
} ParseRule;
//< parse-rule
//> Local Variables local-struct

typedef struct {
  Token name;
  int depth;
//> Closures is-captured-field
  bool isCaptured;
//< Closures is-captured-field
} Local;
//< Local Variables local-struct
//> Closures upvalue-struct
typedef struct {
  uint8_t index;
  bool isLocal;
} Upvalue;
//< Closures upvalue-struct
//> Calls and Functions function-type-enum
typedef enum {
  TYPE_FUNCTION,
//> Methods and Initializers initializer-type-enum
  TYPE_INITIALIZER,
//< Methods and Initializers initializer-type-enum
//> Methods and Initializers method-type-enum
  TYPE_METHOD,
//< Methods and Initializers method-type-enum
  TYPE_SCRIPT
} FunctionType;
//< Calls and Functions function-type-enum
//> Local Variables compiler-struct

/* Local Variables compiler-struct < Calls and Functions enclosing-field
typedef struct {
*/
//> Calls and Functions enclosing-field
typedef struct Compiler {
  struct Compiler* enclosing;
//< Calls and Functions enclosing-field
//> Calls and Functions function-fields
  ObjFunction* function;
  FunctionType type;

//< Calls and Functions function-fields
  Local locals[UINT8_COUNT];
  int localCount;
//> Closures upvalues-array
  Upvalue upvalues[UINT8_COUNT];
//< Closures upvalues-array
  int scopeDepth;
} Compiler;
//< Local Variables compiler-struct
//> Methods and Initializers class-compiler-struct

typedef struct ClassCompiler {
  struct ClassCompiler* enclosing;
//> Superclasses has-superclass
  bool hasSuperclass;
//< Superclasses has-superclass
} ClassCompiler;
//< Methods and Initializers class-compiler-struct

Parser parser;
//< Compiling Expressions parser
//> Local Variables current-compiler
Compiler* current = NULL;
//< Local Variables current-compiler
//> Methods and Initializers current-class
ClassCompiler* currentClass = NULL;
//< Methods and Initializers current-class
//> Compiling Expressions compiling-chunk
/* Compiling Expressions compiling-chunk < Calls and Functions current-chunk
Chunk* compilingChunk;

static Chunk* currentChunk() {
  return compilingChunk;
}
*/
//> Calls and Functions current-chunk

static Chunk* currentChunk() {
  return &current->function->chunk;
}
//< Calls and Functions current-chunk

//< Compiling Expressions compiling-chunk
//> Compiling Expressions error-at
static void errorAt(Token* token, const char* message) {
//> check-panic-mode
  if (parser.panicMode) return;
//< check-panic-mode
//> set-panic-mode
  parser.panicMode = true;
//< set-panic-mode
  fprintf(stderr, "[line %d] Error", token->line);

  if (token->type == TOKEN_EOF) {
    fprintf(stderr, " at end");
  } else if (token->type == TOKEN_ERROR) {
    // Nothing.
  } else {
    fprintf(stderr, " at '%.*s'", token->length, token->start);
  }

  fprintf(stderr, ": %s\n", message);
  parser.hadError = true;
}
//< Compiling Expressions error-at
//> Compiling Expressions error
static void error(const char* message) {
  errorAt(&parser.previous, message);
}
//< Compiling Expressions error
//> Compiling Expressions error-at-current
static void errorAtCurrent(const char* message) {
  errorAt(&parser.current, message);
}
//< Compiling Expressions error-at-current
//> Compiling Expressions advance

static void advance() {
  parser.previous = parser.current;

  for (;;) {
    parser.current = scanToken();
    if (parser.current.type != TOKEN_ERROR) break;

    errorAtCurrent(parser.current.start);
  }
}
//< Compiling Expressions advance
//> Compiling Expressions consume
static void consume(TokenType type, const char* message) {
  if (parser.current.type == type) {
    advance();
    return;
  }

  errorAtCurrent(message);
}
//< Compiling Expressions consume
//> Global Variables check
static bool check(TokenType type) {
  return parser.current.type == type;
}
//< Global Variables check
//> Global Variables match
static bool match(TokenType type) {
  if (!check(type)) return false;
  advance();
  return true;
}
//< Global Variables match
//> Compiling Expressions emit-byte
static void emitByte(uint8_t byte) {
  writeChunk(currentChunk(), byte, parser.previous.line);
}
//< Compiling Expressions emit-byte
//> Compiling Expressions emit-bytes
static void emitBytes(uint8_t byte1, uint8_t byte2) {
  emitByte(byte1);
  emitByte(byte2);
}
//< Compiling Expressions emit-bytes
//> Jumping Back and Forth emit-loop
static void emitLoop(int loopStart) {
  emitByte(OP_LOOP);

  int offset = currentChunk()->count - loopStart + 2;
  if (offset > UINT16_MAX) error("Loop body too large.");

  emitByte((offset >> 8) & 0xff);
  emitByte(offset & 0xff);
}
//< Jumping Back and Forth emit-loop
//> Jumping Back and Forth emit-jump
static int emitJump(uint8_t instruction) {
  emitByte(instruction);
  emitByte(0xff);
  emitByte(0xff);
  return currentChunk()->count - 2;
}
//< Jumping Back and Forth emit-jump
//> Compiling Expressions emit-return
static void emitReturn() {
/* Calls and Functions return-nil < Methods and Initializers return-this
  emitByte(OP_NIL);
*/
//> Methods and Initializers return-this
  if (current->type == TYPE_INITIALIZER) {
    emitBytes(OP_GET_LOCAL, 0);
  } else {
    emitByte(OP_NIL);
  }

//< Methods and Initializers return-this
  emitByte(OP_RETURN);
}
//< Compiling Expressions emit-return
//> Compiling Expressions make-constant
static uint8_t makeConstant(Value value) {
  int constant = addConstant(currentChunk(), value);
  if (constant > UINT8_MAX) {
    error("Too many constants in one chunk.");
    return 0;
  }

  return (uint8_t)constant;
}
//< Compiling Expressions make-constant
//> Compiling Expressions emit-constant
static void emitConstant(Value value) {
  emitBytes(OP_CONSTANT, makeConstant(value));
}
//< Compiling Expressions emit-constant
//> Jumping Back and Forth patch-jump
static void patchJump(int offset) {
  // -2 to adjust for the bytecode for the jump offset itself.
  int jump = currentChunk()->count - offset - 2;

  if (jump > UINT16_MAX) {
    error("Too much code to jump over.");
  }

  currentChunk()->code[offset] = (jump >> 8) & 0xff;
  currentChunk()->code[offset + 1] = jump & 0xff;
}
//< Jumping Back and Forth patch-jump
//> Local Variables init-compiler
/* Local Variables init-compiler < Calls and Functions init-compiler
static void initCompiler(Compiler* compiler) {
*/
//> Calls and Functions init-compiler
static void initCompiler(Compiler* compiler, FunctionType type) {
//> store-enclosing
  compiler->enclosing = current;
//< store-enclosing
  compiler->function = NULL;
  compiler->type = type;
//< Calls and Functions init-compiler
  compiler->localCount = 0;
  compiler->scopeDepth = 0;
//> Calls and Functions init-function
  compiler->function = newFunction();
//< Calls and Functions init-function
  current = compiler;
//> Calls and Functions init-function-name
  if (type != TYPE_SCRIPT) {
    current->function->name = copyString(parser.previous.start,
                                         parser.previous.length);
  }
//< Calls and Functions init-function-name
//> Calls and Functions init-function-slot

  Local* local = &current->locals[current->localCount++];
  local->depth = 0;
//> Closures init-zero-local-is-captured
  local->isCaptured = false;
//< Closures init-zero-local-is-captured
/* Calls and Functions init-function-slot < Methods and Initializers slot-zero
  local->name.start = "";
  local->name.length = 0;
*/
//> Methods and Initializers slot-zero
  if (type != TYPE_FUNCTION) {
    local->name.start = "this";
    local->name.length = 4;
  } else {
    local->name.start = "";
    local->name.length = 0;
  }
//< Methods and Initializers slot-zero
//< Calls and Functions init-function-slot
}
//< Local Variables init-compiler
//> Compiling Expressions end-compiler
/* Compiling Expressions end-compiler < Calls and Functions end-compiler
static void endCompiler() {
*/
//> Calls and Functions end-compiler
static ObjFunction* endCompiler() {
//< Calls and Functions end-compiler
  emitReturn();
//> Calls and Functions end-function
  ObjFunction* function = current->function;

//< Calls and Functions end-function
//> dump-chunk
#ifdef DEBUG_PRINT_CODE
  if (!parser.hadError) {
/* Compiling Expressions dump-chunk < Calls and Functions disassemble-end
    disassembleChunk(currentChunk(), "code");
*/
//> Calls and Functions disassemble-end
    disassembleChunk(currentChunk(), function->name != NULL
        ? function->name->chars : "<script>");
//< Calls and Functions disassemble-end
  }
#endif
//< dump-chunk
//> Calls and Functions return-function

//> restore-enclosing
  current = current->enclosing;
//< restore-enclosing
  return function;
//< Calls and Functions return-function
}
//< Compiling Expressions end-compiler
//> Local Variables begin-scope
static void beginScope() {
  current->scopeDepth++;
}
//< Local Variables begin-scope
//> Local Variables end-scope
static void endScope() {
  current->scopeDepth--;
//> pop-locals

  while (current->localCount > 0 &&
         current->locals[current->localCount - 1].depth >
            current->scopeDepth) {
/* Local Variables pop-locals < Closures end-scope
    emitByte(OP_POP);
*/
//> Closures end-scope
    if (current->locals[current->localCount - 1].isCaptured) {
      emitByte(OP_CLOSE_UPVALUE);
    } else {
      emitByte(OP_POP);
    }
//< Closures end-scope
    current->localCount--;
  }
//< pop-locals
}
//< Local Variables end-scope
//> Compiling Expressions forward-declarations

static void expression();
//> Global Variables forward-declarations
static void statement();
static void declaration();
//< Global Variables forward-declarations
static ParseRule* getRule(TokenType type);
static void parsePrecedence(Precedence precedence);

//< Compiling Expressions forward-declarations
//> Global Variables identifier-constant
static uint8_t identifierConstant(Token* name) {
  return makeConstant(OBJ_VAL(copyString(name->start,
                                         name->length)));
}
//< Global Variables identifier-constant
//> Local Variables identifiers-equal
static bool identifiersEqual(Token* a, Token* b) {
  if (a->length != b->length) return false;
  return memcmp(a->start, b->start, a->length) == 0;
}
//< Local Variables identifiers-equal
//> Local Variables resolve-local
static int resolveLocal(Compiler* compiler, Token* name) {
  for (int i = compiler->localCount - 1; i >= 0; i--) {
    Local* local = &compiler->locals[i];
    if (identifiersEqual(name, &local->name)) {
//> own-initializer-error
      if (local->depth == -1) {
        error("Can't read local variable in its own initializer.");
      }
//< own-initializer-error
      return i;
    }
  }

  return -1;
}
//< Local Variables resolve-local
//> Closures add-upvalue
static int addUpvalue(Compiler* compiler, uint8_t index,
                      bool isLocal) {
  int upvalueCount = compiler->function->upvalueCount;
//> existing-upvalue

  for (int i = 0; i < upvalueCount; i++) {
    Upvalue* upvalue = &compiler->upvalues[i];
    if (upvalue->index == index && upvalue->isLocal == isLocal) {
      return i;
    }
  }

//< existing-upvalue
//> too-many-upvalues
  if (upvalueCount == UINT8_COUNT) {
    error("Too many closure variables in function.");
    return 0;
  }

//< too-many-upvalues
  compiler->upvalues[upvalueCount].isLocal = isLocal;
  compiler->upvalues[upvalueCount].index = index;
  return compiler->function->upvalueCount++;
}
//< Closures add-upvalue
//> Closures resolve-upvalue
static int resolveUpvalue(Compiler* compiler, Token* name) {
  if (compiler->enclosing == NULL) return -1;

  int local = resolveLocal(compiler->enclosing, name);
  if (local != -1) {
//> mark-local-captured
    compiler->enclosing->locals[local].isCaptured = true;
//< mark-local-captured
    return addUpvalue(compiler, (uint8_t)local, true);
  }

//> resolve-upvalue-recurse
  int upvalue = resolveUpvalue(compiler->enclosing, name);
  if (upvalue != -1) {
    return addUpvalue(compiler, (uint8_t)upvalue, false);
  }
  
//< resolve-upvalue-recurse
  return -1;
}
//< Closures resolve-upvalue
//> Local Variables add-local
static void addLocal(Token name) {
//> too-many-locals
  if (current->localCount == UINT8_COUNT) {
    error("Too many local variables in function.");
    return;
  }

//< too-many-locals
  Local* local = &current->locals[current->localCount++];
  local->name = name;
/* Local Variables add-local < Local Variables declare-undefined
  local->depth = current->scopeDepth;
*/
//> declare-undefined
  local->depth = -1;
//< declare-undefined
//> Closures init-is-captured
  local->isCaptured = false;
//< Closures init-is-captured
}
//< Local Variables add-local
//> Local Variables declare-variable
static void declareVariable() {
  if (current->scopeDepth == 0) return;

  Token* name = &parser.previous;
//> existing-in-scope
  for (int i = current->localCount - 1; i >= 0; i--) {
    Local* local = &current->locals[i];
    if (local->depth != -1 && local->depth < current->scopeDepth) {
      break; // [negative]
    }
    
    if (identifiersEqual(name, &local->name)) {
      error("Already a variable with this name in this scope.");
    }
  }

//< existing-in-scope
  addLocal(*name);
}
//< Local Variables declare-variable
//> Global Variables parse-variable
static uint8_t parseVariable(const char* errorMessage) {
  consume(TOKEN_IDENTIFIER, errorMessage);
//> Local Variables parse-local

  declareVariable();
  if (current->scopeDepth > 0) return 0;

//< Local Variables parse-local
  return identifierConstant(&parser.previous);
}
//< Global Variables parse-variable
//> Local Variables mark-initialized
static void markInitialized() {
//> Calls and Functions check-depth
  if (current->scopeDepth == 0) return;
//< Calls and Functions check-depth
  current->locals[current->localCount - 1].depth =
      current->scopeDepth;
}
//< Local Variables mark-initialized
//> Global Variables define-variable
static void defineVariable(uint8_t global) {
//> Local Variables define-variable
  if (current->scopeDepth > 0) {
//> define-local
    markInitialized();
//< define-local
    return;
  }

//< Local Variables define-variable
  emitBytes(OP_DEFINE_GLOBAL, global);
}
//< Global Variables define-variable
//> Calls and Functions argument-list
static uint8_t argumentList() {
  uint8_t argCount = 0;
  if (!check(TOKEN_RIGHT_PAREN)) {
    do {
      expression();
//> arg-limit
      if (argCount == 255) {
        error("Can't have more than 255 arguments.");
      }
//< arg-limit
      argCount++;
    } while (match(TOKEN_COMMA));
  }
  consume(TOKEN_RIGHT_PAREN, "Expect ')' after arguments.");
  return argCount;
}
//< Calls and Functions argument-list
//> Jumping Back and Forth and
static void and_(bool canAssign) {
  int endJump = emitJump(OP_JUMP_IF_FALSE);

  emitByte(OP_POP);
  parsePrecedence(PREC_AND);

  patchJump(endJump);
}
//< Jumping Back and Forth and
//> Compiling Expressions binary
/* Compiling Expressions binary < Global Variables binary
static void binary() {
*/
//> Global Variables binary
static void binary(bool canAssign) {
//< Global Variables binary
  TokenType operatorType = parser.previous.type;
  ParseRule* rule = getRule(operatorType);
  parsePrecedence((Precedence)(rule->precedence + 1));

  switch (operatorType) {
//> Types of Values comparison-operators
    case TOKEN_BANG_EQUAL:    emitBytes(OP_EQUAL, OP_NOT); break;
    case TOKEN_EQUAL_EQUAL:   emitByte(OP_EQUAL); break;
    case TOKEN_GREATER:       emitByte(OP_GREATER); break;
    case TOKEN_GREATER_EQUAL: emitBytes(OP_LESS, OP_NOT); break;
    case TOKEN_LESS:          emitByte(OP_LESS); break;
    case TOKEN_LESS_EQUAL:    emitBytes(OP_GREATER, OP_NOT); break;
//< Types of Values comparison-operators
    case TOKEN_PLUS:          emitByte(OP_ADD); break;
    case TOKEN_MINUS:         emitByte(OP_SUBTRACT); break;
    case TOKEN_STAR:          emitByte(OP_MULTIPLY); break;
    case TOKEN_SLASH:         emitByte(OP_DIVIDE); break;
    default: return; // Unreachable.
  }
}
//< Compiling Expressions binary
//> Calls and Functions compile-call
static void call(bool canAssign) {
  uint8_t argCount = argumentList();
  emitBytes(OP_CALL, argCount);
}
//< Calls and Functions compile-call
//> Classes and Instances compile-dot
static void dot(bool canAssign) {
  consume(TOKEN_IDENTIFIER, "Expect property name after '.'.");
  uint8_t name = identifierConstant(&parser.previous);

  if (canAssign && match(TOKEN_EQUAL)) {
    expression();
    emitBytes(OP_SET_PROPERTY, name);
//> Methods and Initializers parse-call
  } else if (match(TOKEN_LEFT_PAREN)) {
    uint8_t argCount = argumentList();
    emitBytes(OP_INVOKE, name);
    emitByte(argCount);
//< Methods and Initializers parse-call
  } else {
    emitBytes(OP_GET_PROPERTY, name);
  }
}
//< Classes and Instances compile-dot
//> Types of Values parse-literal
/* Types of Values parse-literal < Global Variables parse-literal
static void literal() {
*/
//> Global Variables parse-literal
static void literal(bool canAssign) {
//< Global Variables parse-literal
  switch (parser.previous.type) {
    case TOKEN_FALSE: emitByte(OP_FALSE); break;
    case TOKEN_NIL: emitByte(OP_NIL); break;
    case TOKEN_TRUE: emitByte(OP_TRUE); break;
    default: return; // Unreachable.
  }
}
//< Types of Values parse-literal
//> Compiling Expressions grouping
/* Compiling Expressions grouping < Global Variables grouping
static void grouping() {
*/
//> Global Variables grouping
static void grouping(bool canAssign) {
//< Global Variables grouping
  expression();
  consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
}
//< Compiling Expressions grouping
/* Compiling Expressions number < Global Variables number
static void number() {
*/
//> Compiling Expressions number
//> Global Variables number
static void number(bool canAssign) {
//< Global Variables number
  double value = strtod(parser.previous.start, NULL);
/* Compiling Expressions number < Types of Values const-number-val
  emitConstant(value);
*/
//> Types of Values const-number-val
  emitConstant(NUMBER_VAL(value));
//< Types of Values const-number-val
}
//< Compiling Expressions number
//> Jumping Back and Forth or
static void or_(bool canAssign) {
  int elseJump = emitJump(OP_JUMP_IF_FALSE);
  int endJump = emitJump(OP_JUMP);

  patchJump(elseJump);
  emitByte(OP_POP);

  parsePrecedence(PREC_OR);
  patchJump(endJump);
}
//< Jumping Back and Forth or
/* Strings parse-string < Global Variables string
static void string() {
*/
//> Strings parse-string
//> Global Variables string
static void string(bool canAssign) {
//< Global Variables string
  emitConstant(OBJ_VAL(copyString(parser.previous.start + 1,
                                  parser.previous.length - 2)));
}
//< Strings parse-string
/* Global Variables read-named-variable < Global Variables named-variable-signature
static void namedVariable(Token name) {
*/
//> Global Variables named-variable-signature
static void namedVariable(Token name, bool canAssign) {
//< Global Variables named-variable-signature
/* Global Variables read-named-variable < Local Variables named-local
  uint8_t arg = identifierConstant(&name);
*/
//> Global Variables read-named-variable
//> Local Variables named-local
  uint8_t getOp, setOp;
  int arg = resolveLocal(current, &name);
  if (arg != -1) {
    getOp = OP_GET_LOCAL;
    setOp = OP_SET_LOCAL;
//> Closures named-variable-upvalue
  } else if ((arg = resolveUpvalue(current, &name)) != -1) {
    getOp = OP_GET_UPVALUE;
    setOp = OP_SET_UPVALUE;
//< Closures named-variable-upvalue
  } else {
    arg = identifierConstant(&name);
    getOp = OP_GET_GLOBAL;
    setOp = OP_SET_GLOBAL;
  }
//< Local Variables named-local
/* Global Variables read-named-variable < Global Variables named-variable
  emitBytes(OP_GET_GLOBAL, arg);
*/
//> named-variable

/* Global Variables named-variable < Global Variables named-variable-can-assign
  if (match(TOKEN_EQUAL)) {
*/
//> named-variable-can-assign
  if (canAssign && match(TOKEN_EQUAL)) {
//< named-variable-can-assign
    expression();
/* Global Variables named-variable < Local Variables emit-set
    emitBytes(OP_SET_GLOBAL, arg);
*/
//> Local Variables emit-set
    emitBytes(setOp, (uint8_t)arg);
//< Local Variables emit-set
  } else {
/* Global Variables named-variable < Local Variables emit-get
    emitBytes(OP_GET_GLOBAL, arg);
*/
//> Local Variables emit-get
    emitBytes(getOp, (uint8_t)arg);
//< Local Variables emit-get
  }
//< named-variable
}
//< Global Variables read-named-variable
/* Global Variables variable-without-assign < Global Variables variable
static void variable() {
  namedVariable(parser.previous);
}
*/
//> Global Variables variable
static void variable(bool canAssign) {
  namedVariable(parser.previous, canAssign);
}
//< Global Variables variable
//> Superclasses synthetic-token
static Token syntheticToken(const char* text) {
  Token token;
  token.start = text;
  token.length = (int)strlen(text);
  return token;
}
//< Superclasses synthetic-token
//> Superclasses super
static void super_(bool canAssign) {
//> super-errors
  if (currentClass == NULL) {
    error("Can't use 'super' outside of a class.");
  } else if (!currentClass->hasSuperclass) {
    error("Can't use 'super' in a class with no superclass.");
  }

//< super-errors
  consume(TOKEN_DOT, "Expect '.' after 'super'.");
  consume(TOKEN_IDENTIFIER, "Expect superclass method name.");
  uint8_t name = identifierConstant(&parser.previous);
//> super-get
  
  namedVariable(syntheticToken("this"), false);
/* Superclasses super-get < Superclasses super-invoke
  namedVariable(syntheticToken("super"), false);
  emitBytes(OP_GET_SUPER, name);
*/
//< super-get
//> super-invoke
  if (match(TOKEN_LEFT_PAREN)) {
    uint8_t argCount = argumentList();
    namedVariable(syntheticToken("super"), false);
    emitBytes(OP_SUPER_INVOKE, name);
    emitByte(argCount);
  } else {
    namedVariable(syntheticToken("super"), false);
    emitBytes(OP_GET_SUPER, name);
  }
//< super-invoke
}
//< Superclasses super
//> Methods and Initializers this
static void this_(bool canAssign) {
//> this-outside-class
  if (currentClass == NULL) {
    error("Can't use 'this' outside of a class.");
    return;
  }
  
//< this-outside-class
  variable(false);
} // [this]
//< Methods and Initializers this
//> Compiling Expressions unary
/* Compiling Expressions unary < Global Variables unary
static void unary() {
*/
//> Global Variables unary
static void unary(bool canAssign) {
//< Global Variables unary
  TokenType operatorType = parser.previous.type;

  // Compile the operand.
/* Compiling Expressions unary < Compiling Expressions unary-operand
  expression();
*/
//> unary-operand
  parsePrecedence(PREC_UNARY);
//< unary-operand

  // Emit the operator instruction.
  switch (operatorType) {
//> Types of Values compile-not
    case TOKEN_BANG: emitByte(OP_NOT); break;
//< Types of Values compile-not
    case TOKEN_MINUS: emitByte(OP_NEGATE); break;
    default: return; // Unreachable.
  }
}
//< Compiling Expressions unary
//> Compiling Expressions rules
ParseRule rules[] = {
/* Compiling Expressions rules < Calls and Functions infix-left-paren
  [TOKEN_LEFT_PAREN]    = {grouping, NULL,   PREC_NONE},
*/
//> Calls and Functions infix-left-paren
  [TOKEN_LEFT_PAREN]    = {grouping, call,   PREC_CALL},
//< Calls and Functions infix-left-paren
  [TOKEN_RIGHT_PAREN]   = {NULL,     NULL,   PREC_NONE},
  [TOKEN_LEFT_BRACE]    = {NULL,     NULL,   PREC_NONE}, // [big]
  [TOKEN_RIGHT_BRACE]   = {NULL,     NULL,   PREC_NONE},
  [TOKEN_COMMA]         = {NULL,     NULL,   PREC_NONE},
/* Compiling Expressions rules < Classes and Instances table-dot
  [TOKEN_DOT]           = {NULL,     NULL,   PREC_NONE},
*/
//> Classes and Instances table-dot
  [TOKEN_DOT]           = {NULL,     dot,    PREC_CALL},
//< Classes and Instances table-dot
  [TOKEN_MINUS]         = {unary,    binary, PREC_TERM},
  [TOKEN_PLUS]          = {NULL,     binary, PREC_TERM},
  [TOKEN_SEMICOLON]     = {NULL,     NULL,   PREC_NONE},
  [TOKEN_SLASH]         = {NULL,     binary, PREC_FACTOR},
  [TOKEN_STAR]          = {NULL,     binary, PREC_FACTOR},
/* Compiling Expressions rules < Types of Values table-not
  [TOKEN_BANG]          = {NULL,     NULL,   PREC_NONE},
*/
//> Types of Values table-not
  [TOKEN_BANG]          = {unary,    NULL,   PREC_NONE},
//< Types of Values table-not
/* Compiling Expressions rules < Types of Values table-equal
  [TOKEN_BANG_EQUAL]    = {NULL,     NULL,   PREC_NONE},
*/
//> Types of Values table-equal
  [TOKEN_BANG_EQUAL]    = {NULL,     binary, PREC_EQUALITY},
//< Types of Values table-equal
  [TOKEN_EQUAL]         = {NULL,     NULL,   PREC_NONE},
/* Compiling Expressions rules < Types of Values table-comparisons
  [TOKEN_EQUAL_EQUAL]   = {NULL,     NULL,   PREC_NONE},
  [TOKEN_GREATER]       = {NULL,     NULL,   PREC_NONE},
  [TOKEN_GREATER_EQUAL] = {NULL,     NULL,   PREC_NONE},
  [TOKEN_LESS]          = {NULL,     NULL,   PREC_NONE},
  [TOKEN_LESS_EQUAL]    = {NULL,     NULL,   PREC_NONE},
*/
//> Types of Values table-comparisons
  [TOKEN_EQUAL_EQUAL]   = {NULL,     binary, PREC_EQUALITY},
  [TOKEN_GREATER]       = {NULL,     binary, PREC_COMPARISON},
  [TOKEN_GREATER_EQUAL] = {NULL,     binary, PREC_COMPARISON},
  [TOKEN_LESS]          = {NULL,     binary, PREC_COMPARISON},
  [TOKEN_LESS_EQUAL]    = {NULL,     binary, PREC_COMPARISON},
//< Types of Values table-comparisons
/* Compiling Expressions rules < Global Variables table-identifier
  [TOKEN_IDENTIFIER]    = {NULL,     NULL,   PREC_NONE},
*/
//> Global Variables table-identifier
  [TOKEN_IDENTIFIER]    = {variable, NULL,   PREC_NONE},
//< Global Variables table-identifier
/* Compiling Expressions rules < Strings table-string
  [TOKEN_STRING]        = {NULL,     NULL,   PREC_NONE},
*/
//> Strings table-string
  [TOKEN_STRING]        = {string,   NULL,   PREC_NONE},
//< Strings table-string
  [TOKEN_NUMBER]        = {number,   NULL,   PREC_NONE},
/* Compiling Expressions rules < Jumping Back and Forth table-and
  [TOKEN_AND]           = {NULL,     NULL,   PREC_NONE},
*/
//> Jumping Back and Forth table-and
  [TOKEN_AND]           = {NULL,     and_,   PREC_AND},
//< Jumping Back and Forth table-and
  [TOKEN_CLASS]         = {NULL,     NULL,   PREC_NONE},
  [TOKEN_ELSE]          = {NULL,     NULL,   PREC_NONE},
/* Compiling Expressions rules < Types of Values table-false
  [TOKEN_FALSE]         = {NULL,     NULL,   PREC_NONE},
*/
//> Types of Values table-false
  [TOKEN_FALSE]         = {literal,  NULL,   PREC_NONE},
//< Types of Values table-false
  [TOKEN_FOR]           = {NULL,     NULL,   PREC_NONE},
  [TOKEN_FUN]           = {NULL,     NULL,   PREC_NONE},
  [TOKEN_IF]            = {NULL,     NULL,   PREC_NONE},
/* Compiling Expressions rules < Types of Values table-nil
  [TOKEN_NIL]           = {NULL,     NULL,   PREC_NONE},
*/
//> Types of Values table-nil
  [TOKEN_NIL]           = {literal,  NULL,   PREC_NONE},
//< Types of Values table-nil
/* Compiling Expressions rules < Jumping Back and Forth table-or
  [TOKEN_OR]            = {NULL,     NULL,   PREC_NONE},
*/
//> Jumping Back and Forth table-or
  [TOKEN_OR]            = {NULL,     or_,    PREC_OR},
//< Jumping Back and Forth table-or
  [TOKEN_PRINT]         = {NULL,     NULL,   PREC_NONE},
  [TOKEN_RETURN]        = {NULL,     NULL,   PREC_NONE},
/* Compiling Expressions rules < Superclasses table-super
  [TOKEN_SUPER]         = {NULL,     NULL,   PREC_NONE},
*/
//> Superclasses table-super
  [TOKEN_SUPER]         = {super_,   NULL,   PREC_NONE},
//< Superclasses table-super
/* Compiling Expressions rules < Methods and Initializers table-this
  [TOKEN_THIS]          = {NULL,     NULL,   PREC_NONE},
*/
//> Methods and Initializers table-this
  [TOKEN_THIS]          = {this_,    NULL,   PREC_NONE},
//< Methods and Initializers table-this
/* Compiling Expressions rules < Types of Values table-true
  [TOKEN_TRUE]          = {NULL,     NULL,   PREC_NONE},
*/
//> Types of Values table-true
  [TOKEN_TRUE]          = {literal,  NULL,   PREC_NONE},
//< Types of Values table-true
  [TOKEN_VAR]           = {NULL,     NULL,   PREC_NONE},
  [TOKEN_WHILE]         = {NULL,     NULL,   PREC_NONE},
  [TOKEN_ERROR]         = {NULL,     NULL,   PREC_NONE},
  [TOKEN_EOF]           = {NULL,     NULL,   PREC_NONE},
};
//< Compiling Expressions rules
//> Compiling Expressions parse-precedence
static void parsePrecedence(Precedence precedence) {
/* Compiling Expressions parse-precedence < Compiling Expressions precedence-body
  // What goes here?
*/
//> precedence-body
  advance();
  ParseFn prefixRule = getRule(parser.previous.type)->prefix;
  if (prefixRule == NULL) {
    error("Expect expression.");
    return;
  }

/* Compiling Expressions precedence-body < Global Variables prefix-rule
  prefixRule();
*/
//> Global Variables prefix-rule
  bool canAssign = precedence <= PREC_ASSIGNMENT;
  prefixRule(canAssign);
//< Global Variables prefix-rule
//> infix

  while (precedence <= getRule(parser.current.type)->precedence) {
    advance();
    ParseFn infixRule = getRule(parser.previous.type)->infix;
/* Compiling Expressions infix < Global Variables infix-rule
    infixRule();
*/
//> Global Variables infix-rule
    infixRule(canAssign);
//< Global Variables infix-rule
  }
//> Global Variables invalid-assign

  if (canAssign && match(TOKEN_EQUAL)) {
    error("Invalid assignment target.");
  }
//< Global Variables invalid-assign
//< infix
//< precedence-body
}
//< Compiling Expressions parse-precedence
//> Compiling Expressions get-rule
static ParseRule* getRule(TokenType type) {
  return &rules[type];
}
//< Compiling Expressions get-rule
//> Compiling Expressions expression
static void expression() {
/* Compiling Expressions expression < Compiling Expressions expression-body
  // What goes here?
*/
//> expression-body
  parsePrecedence(PREC_ASSIGNMENT);
//< expression-body
}
//< Compiling Expressions expression
//> Local Variables block
static void block() {
  while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) {
    declaration();
  }

  consume(TOKEN_RIGHT_BRACE, "Expect '}' after block.");
}
//< Local Variables block
//> Calls and Functions compile-function
static void function(FunctionType type) {
  Compiler compiler;
  initCompiler(&compiler, type);
  beginScope(); // [no-end-scope]

  consume(TOKEN_LEFT_PAREN, "Expect '(' after function name.");
//> parameters
  if (!check(TOKEN_RIGHT_PAREN)) {
    do {
      current->function->arity++;
      if (current->function->arity > 255) {
        errorAtCurrent("Can't have more than 255 parameters.");
      }
      uint8_t constant = parseVariable("Expect parameter name.");
      defineVariable(constant);
    } while (match(TOKEN_COMMA));
  }
//< parameters
  consume(TOKEN_RIGHT_PAREN, "Expect ')' after parameters.");
  consume(TOKEN_LEFT_BRACE, "Expect '{' before function body.");
  block();

  ObjFunction* function = endCompiler();
/* Calls and Functions compile-function < Closures emit-closure
  emitBytes(OP_CONSTANT, makeConstant(OBJ_VAL(function)));
*/
//> Closures emit-closure
  emitBytes(OP_CLOSURE, makeConstant(OBJ_VAL(function)));
//< Closures emit-closure
//> Closures capture-upvalues

  for (int i = 0; i < function->upvalueCount; i++) {
    emitByte(compiler.upvalues[i].isLocal ? 1 : 0);
    emitByte(compiler.upvalues[i].index);
  }
//< Closures capture-upvalues
}
//< Calls and Functions compile-function
//> Methods and Initializers method
static void method() {
  consume(TOKEN_IDENTIFIER, "Expect method name.");
  uint8_t constant = identifierConstant(&parser.previous);
//> method-body

//< method-body
/* Methods and Initializers method-body < Methods and Initializers method-type
  FunctionType type = TYPE_FUNCTION;
*/
//> method-type
  FunctionType type = TYPE_METHOD;
//< method-type
//> initializer-name
  if (parser.previous.length == 4 &&
      memcmp(parser.previous.start, "init", 4) == 0) {
    type = TYPE_INITIALIZER;
  }
  
//< initializer-name
//> method-body
  function(type);
//< method-body
  emitBytes(OP_METHOD, constant);
}
//< Methods and Initializers method
//> Classes and Instances class-declaration
static void classDeclaration() {
  consume(TOKEN_IDENTIFIER, "Expect class name.");
//> Methods and Initializers class-name
  Token className = parser.previous;
//< Methods and Initializers class-name
  uint8_t nameConstant = identifierConstant(&parser.previous);
  declareVariable();

  emitBytes(OP_CLASS, nameConstant);
  defineVariable(nameConstant);

//> Methods and Initializers create-class-compiler
  ClassCompiler classCompiler;
//> Superclasses init-has-superclass
  classCompiler.hasSuperclass = false;
//< Superclasses init-has-superclass
  classCompiler.enclosing = currentClass;
  currentClass = &classCompiler;

//< Methods and Initializers create-class-compiler
//> Superclasses compile-superclass
  if (match(TOKEN_LESS)) {
    consume(TOKEN_IDENTIFIER, "Expect superclass name.");
    variable(false);
//> inherit-self

    if (identifiersEqual(&className, &parser.previous)) {
      error("A class can't inherit from itself.");
    }

//< inherit-self
//> superclass-variable
    beginScope();
    addLocal(syntheticToken("super"));
    defineVariable(0);
    
//< superclass-variable
    namedVariable(className, false);
    emitByte(OP_INHERIT);
//> set-has-superclass
    classCompiler.hasSuperclass = true;
//< set-has-superclass
  }
  
//< Superclasses compile-superclass
//> Methods and Initializers load-class
  namedVariable(className, false);
//< Methods and Initializers load-class
  consume(TOKEN_LEFT_BRACE, "Expect '{' before class body.");
//> Methods and Initializers class-body
  while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) {
    method();
  }
//< Methods and Initializers class-body
  consume(TOKEN_RIGHT_BRACE, "Expect '}' after class body.");
//> Methods and Initializers pop-class
  emitByte(OP_POP);
//< Methods and Initializers pop-class
//> Superclasses end-superclass-scope

  if (classCompiler.hasSuperclass) {
    endScope();
  }
//< Superclasses end-superclass-scope
//> Methods and Initializers pop-enclosing

  currentClass = currentClass->enclosing;
//< Methods and Initializers pop-enclosing
}
//< Classes and Instances class-declaration
//> Calls and Functions fun-declaration
static void funDeclaration() {
  uint8_t global = parseVariable("Expect function name.");
  markInitialized();
  function(TYPE_FUNCTION);
  defineVariable(global);
}
//< Calls and Functions fun-declaration
//> Global Variables var-declaration
static void varDeclaration() {
  uint8_t global = parseVariable("Expect variable name.");

  if (match(TOKEN_EQUAL)) {
    expression();
  } else {
    emitByte(OP_NIL);
  }
  consume(TOKEN_SEMICOLON,
          "Expect ';' after variable declaration.");

  defineVariable(global);
}
//< Global Variables var-declaration
//> Global Variables expression-statement
static void expressionStatement() {
  expression();
  consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
  emitByte(OP_POP);
}
//< Global Variables expression-statement
//> Jumping Back and Forth for-statement
static void forStatement() {
//> for-begin-scope
  beginScope();
//< for-begin-scope
  consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
/* Jumping Back and Forth for-statement < Jumping Back and Forth for-initializer
  consume(TOKEN_SEMICOLON, "Expect ';'.");
*/
//> for-initializer
  if (match(TOKEN_SEMICOLON)) {
    // No initializer.
  } else if (match(TOKEN_VAR)) {
    varDeclaration();
  } else {
    expressionStatement();
  }
//< for-initializer

  int loopStart = currentChunk()->count;
/* Jumping Back and Forth for-statement < Jumping Back and Forth for-exit
  consume(TOKEN_SEMICOLON, "Expect ';'.");
*/
//> for-exit
  int exitJump = -1;
  if (!match(TOKEN_SEMICOLON)) {
    expression();
    consume(TOKEN_SEMICOLON, "Expect ';' after loop condition.");

    // Jump out of the loop if the condition is false.
    exitJump = emitJump(OP_JUMP_IF_FALSE);
    emitByte(OP_POP); // Condition.
  }

//< for-exit
/* Jumping Back and Forth for-statement < Jumping Back and Forth for-increment
  consume(TOKEN_RIGHT_PAREN, "Expect ')' after for clauses.");
*/
//> for-increment
  if (!match(TOKEN_RIGHT_PAREN)) {
    int bodyJump = emitJump(OP_JUMP);
    int incrementStart = currentChunk()->count;
    expression();
    emitByte(OP_POP);
    consume(TOKEN_RIGHT_PAREN, "Expect ')' after for clauses.");

    emitLoop(loopStart);
    loopStart = incrementStart;
    patchJump(bodyJump);
  }
//< for-increment

  statement();
  emitLoop(loopStart);
//> exit-jump

  if (exitJump != -1) {
    patchJump(exitJump);
    emitByte(OP_POP); // Condition.
  }

//< exit-jump
//> for-end-scope
  endScope();
//< for-end-scope
}
//< Jumping Back and Forth for-statement
//> Jumping Back and Forth if-statement
static void ifStatement() {
  consume(TOKEN_LEFT_PAREN, "Expect '(' after 'if'.");
  expression();
  consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition."); // [paren]

  int thenJump = emitJump(OP_JUMP_IF_FALSE);
//> pop-then
  emitByte(OP_POP);
//< pop-then
  statement();

//> jump-over-else
  int elseJump = emitJump(OP_JUMP);

//< jump-over-else
  patchJump(thenJump);
//> pop-end
  emitByte(OP_POP);
//< pop-end
//> compile-else

  if (match(TOKEN_ELSE)) statement();
//< compile-else
//> patch-else
  patchJump(elseJump);
//< patch-else
}
//< Jumping Back and Forth if-statement
//> Global Variables print-statement
static void printStatement() {
  expression();
  consume(TOKEN_SEMICOLON, "Expect ';' after value.");
  emitByte(OP_PRINT);
}
//< Global Variables print-statement
//> Calls and Functions return-statement
static void returnStatement() {
//> return-from-script
  if (current->type == TYPE_SCRIPT) {
    error("Can't return from top-level code.");
  }

//< return-from-script
  if (match(TOKEN_SEMICOLON)) {
    emitReturn();
  } else {
//> Methods and Initializers return-from-init
    if (current->type == TYPE_INITIALIZER) {
      error("Can't return a value from an initializer.");
    }

//< Methods and Initializers return-from-init
    expression();
    consume(TOKEN_SEMICOLON, "Expect ';' after return value.");
    emitByte(OP_RETURN);
  }
}
//< Calls and Functions return-statement
//> Jumping Back and Forth while-statement
static void whileStatement() {
//> loop-start
  int loopStart = currentChunk()->count;
//< loop-start
  consume(TOKEN_LEFT_PAREN, "Expect '(' after 'while'.");
  expression();
  consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition.");

  int exitJump = emitJump(OP_JUMP_IF_FALSE);
  emitByte(OP_POP);
  statement();
//> loop
  emitLoop(loopStart);
//< loop

  patchJump(exitJump);
  emitByte(OP_POP);
}
//< Jumping Back and Forth while-statement
//> Global Variables synchronize
static void synchronize() {
  parser.panicMode = false;

  while (parser.current.type != TOKEN_EOF) {
    if (parser.previous.type == TOKEN_SEMICOLON) return;
    switch (parser.current.type) {
      case TOKEN_CLASS:
      case TOKEN_FUN:
      case TOKEN_VAR:
      case TOKEN_FOR:
      case TOKEN_IF:
      case TOKEN_WHILE:
      case TOKEN_PRINT:
      case TOKEN_RETURN:
        return;

      default:
        ; // Do nothing.
    }

    advance();
  }
}
//< Global Variables synchronize
//> Global Variables declaration
static void declaration() {
//> Classes and Instances match-class
  if (match(TOKEN_CLASS)) {
    classDeclaration();
/* Calls and Functions match-fun < Classes and Instances match-class
  if (match(TOKEN_FUN)) {
*/
  } else if (match(TOKEN_FUN)) {
//< Classes and Instances match-class
//> Calls and Functions match-fun
    funDeclaration();
/* Global Variables match-var < Calls and Functions match-fun
  if (match(TOKEN_VAR)) {
*/
  } else if (match(TOKEN_VAR)) {
//< Calls and Functions match-fun
//> match-var
    varDeclaration();
  } else {
    statement();
  }
//< match-var
/* Global Variables declaration < Global Variables match-var
  statement();
*/
//> call-synchronize

  if (parser.panicMode) synchronize();
//< call-synchronize
}
//< Global Variables declaration
//> Global Variables statement
static void statement() {
  if (match(TOKEN_PRINT)) {
    printStatement();
//> Jumping Back and Forth parse-for
  } else if (match(TOKEN_FOR)) {
    forStatement();
//< Jumping Back and Forth parse-for
//> Jumping Back and Forth parse-if
  } else if (match(TOKEN_IF)) {
    ifStatement();
//< Jumping Back and Forth parse-if
//> Calls and Functions match-return
  } else if (match(TOKEN_RETURN)) {
    returnStatement();
//< Calls and Functions match-return
//> Jumping Back and Forth parse-while
  } else if (match(TOKEN_WHILE)) {
    whileStatement();
//< Jumping Back and Forth parse-while
//> Local Variables parse-block
  } else if (match(TOKEN_LEFT_BRACE)) {
    beginScope();
    block();
    endScope();
//< Local Variables parse-block
//> parse-expressions-statement
  } else {
    expressionStatement();
//< parse-expressions-statement
  }
}
//< Global Variables statement

/* Scanning on Demand compiler-c < Compiling Expressions compile-signature
void compile(const char* source) {
*/
/* Compiling Expressions compile-signature < Calls and Functions compile-signature
bool compile(const char* source, Chunk* chunk) {
*/
//> Calls and Functions compile-signature
ObjFunction* compile(const char* source) {
//< Calls and Functions compile-signature
  initScanner(source);
/* Scanning on Demand dump-tokens < Compiling Expressions compile-chunk
  int line = -1;
  for (;;) {
    Token token = scanToken();
    if (token.line != line) {
      printf("%4d ", token.line);
      line = token.line;
    } else {
      printf("   | ");
    }
    printf("%2d '%.*s'\n", token.type, token.length, token.start); // [format]

    if (token.type == TOKEN_EOF) break;
  }
*/
//> Local Variables compiler
  Compiler compiler;
//< Local Variables compiler
/* Local Variables compiler < Calls and Functions call-init-compiler
  initCompiler(&compiler);
*/
//> Calls and Functions call-init-compiler
  initCompiler(&compiler, TYPE_SCRIPT);
//< Calls and Functions call-init-compiler
/* Compiling Expressions init-compile-chunk < Calls and Functions call-init-compiler
  compilingChunk = chunk;
*/
//> Compiling Expressions compile-chunk
//> init-parser-error

  parser.hadError = false;
  parser.panicMode = false;

//< init-parser-error
  advance();
//< Compiling Expressions compile-chunk
/* Compiling Expressions compile-chunk < Global Variables compile
  expression();
  consume(TOKEN_EOF, "Expect end of expression.");
*/
//> Global Variables compile

  while (!match(TOKEN_EOF)) {
    declaration();
  }

//< Global Variables compile
/* Compiling Expressions finish-compile < Calls and Functions call-end-compiler
  endCompiler();
*/
/* Compiling Expressions return-had-error < Calls and Functions call-end-compiler
  return !parser.hadError;
*/
//> Calls and Functions call-end-compiler
  ObjFunction* function = endCompiler();
  return parser.hadError ? NULL : function;
//< Calls and Functions call-end-compiler
}
//> Garbage Collection mark-compiler-roots
void markCompilerRoots() {
  Compiler* compiler = current;
  while (compiler != NULL) {
    markObject((Obj*)compiler->function);
    compiler = compiler->enclosing;
  }
}
//< Garbage Collection mark-compiler-roots


================================================
FILE: c/compiler.h
================================================
//> Scanning on Demand compiler-h
#ifndef clox_compiler_h
#define clox_compiler_h

//> Strings compiler-include-object
#include "object.h"
//< Strings compiler-include-object
//> Compiling Expressions compile-h
#include "vm.h"

//< Compiling Expressions compile-h
/* Scanning on Demand compiler-h < Compiling Expressions compile-h
void compile(const char* source);
*/
/* Compiling Expressions compile-h < Calls and Functions compile-h
bool compile(const char* source, Chunk* chunk);
*/
//> Calls and Functions compile-h
ObjFunction* compile(const char* source);
//< Calls and Functions compile-h
//> Garbage Collection mark-compiler-roots-h
void markCompilerRoots();
//< Garbage Collection mark-compiler-roots-h

#endif


================================================
FILE: c/debug.c
================================================
//> Chunks of Bytecode debug-c
#include <stdio.h>

#include "debug.h"
//> Closures debug-include-object
#include "object.h"
//< Closures debug-include-object
//> debug-include-value
#include "value.h"
//< debug-include-value

void disassembleChunk(Chunk* chunk, const char* name) {
  printf("== %s ==\n", name);
  
  for (int offset = 0; offset < chunk->count;) {
    offset = disassembleInstruction(chunk, offset);
  }
}
//> constant-instruction
static int constantInstruction(const char* name, Chunk* chunk,
                               int offset) {
  uint8_t constant = chunk->code[offset + 1];
  printf("%-16s %4d '", name, constant);
  printValue(chunk->constants.values[constant]);
  printf("'\n");
//> return-after-operand
  return offset + 2;
//< return-after-operand
}
//< constant-instruction
//> Methods and Initializers invoke-instruction
static int invokeInstruction(const char* name, Chunk* chunk,
                                int offset) {
  uint8_t constant = chunk->code[offset + 1];
  uint8_t argCount = chunk->code[offset + 2];
  printf("%-16s (%d args) %4d '", name, argCount, constant);
  printValue(chunk->constants.values[constant]);
  printf("'\n");
  return offset + 3;
}
//< Methods and Initializers invoke-instruction
//> simple-instruction
static int simpleInstruction(const char* name, int offset) {
  printf("%s\n", name);
  return offset + 1;
}
//< simple-instruction
//> Local Variables byte-instruction
static int byteInstruction(const char* name, Chunk* chunk,
                           int offset) {
  uint8_t slot = chunk->code[offset + 1];
  printf("%-16s %4d\n", name, slot);
  return offset + 2; // [debug]
}
//< Local Variables byte-instruction
//> Jumping Back and Forth jump-instruction
static int jumpInstruction(const char* name, int sign,
                           Chunk* chunk, int offset) {
  uint16_t jump = (uint16_t)(chunk->code[offset + 1] << 8);
  jump |= chunk->code[offset + 2];
  printf("%-16s %4d -> %d\n", name, offset,
         offset + 3 + sign * jump);
  return offset + 3;
}
//< Jumping Back and Forth jump-instruction
//> disassemble-instruction
int disassembleInstruction(Chunk* chunk, int offset) {
  printf("%04d ", offset);
//> show-location
  if (offset > 0 &&
      chunk->lines[offset] == chunk->lines[offset - 1]) {
    printf("   | ");
  } else {
    printf("%4d ", chunk->lines[offset]);
  }
//< show-location
  
  uint8_t instruction = chunk->code[offset];
  switch (instruction) {
//> disassemble-constant
    case OP_CONSTANT:
      return constantInstruction("OP_CONSTANT", chunk, offset);
//< disassemble-constant
//> Types of Values disassemble-literals
    case OP_NIL:
      return simpleInstruction("OP_NIL", offset);
    case OP_TRUE:
      return simpleInstruction("OP_TRUE", offset);
    case OP_FALSE:
      return simpleInstruction("OP_FALSE", offset);
//< Types of Values disassemble-literals
//> Global Variables disassemble-pop
    case OP_POP:
      return simpleInstruction("OP_POP", offset);
//< Global Variables disassemble-pop
//> Local Variables disassemble-local
    case OP_GET_LOCAL:
      return byteInstruction("OP_GET_LOCAL", chunk, offset);
    case OP_SET_LOCAL:
      return byteInstruction("OP_SET_LOCAL", chunk, offset);
//< Local Variables disassemble-local
//> Global Variables disassemble-get-global
    case OP_GET_GLOBAL:
      return constantInstruction("OP_GET_GLOBAL", chunk, offset);
//< Global Variables disassemble-get-global
//> Global Variables disassemble-define-global
    case OP_DEFINE_GLOBAL:
      return constantInstruction("OP_DEFINE_GLOBAL", chunk,
                                 offset);
//< Global Variables disassemble-define-global
//> Global Variables disassemble-set-global
    case OP_SET_GLOBAL:
      return constantInstruction("OP_SET_GLOBAL", chunk, offset);
//< Global Variables disassemble-set-global
//> Closures disassemble-upvalue-ops
    case OP_GET_UPVALUE:
      return byteInstruction("OP_GET_UPVALUE", chunk, offset);
    case OP_SET_UPVALUE:
      return byteInstruction("OP_SET_UPVALUE", chunk, offset);
//< Closures disassemble-upvalue-ops
//> Classes and Instances disassemble-property-ops
    case OP_GET_PROPERTY:
      return constantInstruction("OP_GET_PROPERTY", chunk, offset);
    case OP_SET_PROPERTY:
      return constantInstruction("OP_SET_PROPERTY", chunk, offset);
//< Classes and Instances disassemble-property-ops
//> Superclasses disassemble-get-super
    case OP_GET_SUPER:
      return constantInstruction("OP_GET_SUPER", chunk, offset);
//< Superclasses disassemble-get-super
//> Types of Values disassemble-comparison
    case OP_EQUAL:
      return simpleInstruction("OP_EQUAL", offset);
    case OP_GREATER:
      return simpleInstruction("OP_GREATER", offset);
    case OP_LESS:
      return simpleInstruction("OP_LESS", offset);
//< Types of Values disassemble-comparison
//> A Virtual Machine disassemble-binary
    case OP_ADD:
      return simpleInstruction("OP_ADD", offset);
    case OP_SUBTRACT:
      return simpleInstruction("OP_SUBTRACT", offset);
    case OP_MULTIPLY:
      return simpleInstruction("OP_MULTIPLY", offset);
    case OP_DIVIDE:
      return simpleInstruction("OP_DIVIDE", offset);
//> Types of Values disassemble-not
    case OP_NOT:
      return simpleInstruction("OP_NOT", offset);
//< Types of Values disassemble-not
//< A Virtual Machine disassemble-binary
//> A Virtual Machine disassemble-negate
    case OP_NEGATE:
      return simpleInstruction("OP_NEGATE", offset);
//< A Virtual Machine disassemble-negate
//> Global Variables disassemble-print
    case OP_PRINT:
      return simpleInstruction("OP_PRINT", offset);
//< Global Variables disassemble-print
//> Jumping Back and Forth disassemble-jump
    case OP_JUMP:
      return jumpInstruction("OP_JUMP", 1, chunk, offset);
    case OP_JUMP_IF_FALSE:
      return jumpInstruction("OP_JUMP_IF_FALSE", 1, chunk, offset);
//< Jumping Back and Forth disassemble-jump
//> Jumping Back and Forth disassemble-loop
    case OP_LOOP:
      return jumpInstruction("OP_LOOP", -1, chunk, offset);
//< Jumping Back and Forth disassemble-loop
//> Calls and Functions disassemble-call
    case OP_CALL:
      return byteInstruction("OP_CALL", chunk, offset);
//< Calls and Functions disassemble-call
//> Methods and Initializers disassemble-invoke
    case OP_INVOKE:
      return invokeInstruction("OP_INVOKE", chunk, offset);
//< Methods and Initializers disassemble-invoke
//> Superclasses disassemble-super-invoke
    case OP_SUPER_INVOKE:
      return invokeInstruction("OP_SUPER_INVOKE", chunk, offset);
//< Superclasses disassemble-super-invoke
//> Closures disassemble-closure
    case OP_CLOSURE: {
      offset++;
      uint8_t constant = chunk->code[offset++];
      printf("%-16s %4d ", "OP_CLOSURE", constant);
      printValue(chunk->constants.values[constant]);
      printf("\n");
//> disassemble-upvalues

      ObjFunction* function = AS_FUNCTION(
          chunk->constants.values[constant]);
      for (int j = 0; j < function->upvalueCount; j++) {
        int isLocal = chunk->code[offset++];
        int index = chunk->code[offset++];
        printf("%04d      |                     %s %d\n",
               offset - 2, isLocal ? "local" : "upvalue", index);
      }
      
//< disassemble-upvalues
      return offset;
    }
//< Closures disassemble-closure
//> Closures disassemble-close-upvalue
    case OP_CLOSE_UPVALUE:
      return simpleInstruction("OP_CLOSE_UPVALUE", offset);
//< Closures disassemble-close-upvalue
    case OP_RETURN:
      return simpleInstruction("OP_RETURN", offset);
//> Classes and Instances disassemble-class
    case OP_CLASS:
      return constantInstruction("OP_CLASS", chunk, offset);
//< Classes and Instances disassemble-class
//> Superclasses disassemble-inherit
    case OP_INHERIT:
      return simpleInstruction("OP_INHERIT", offset);
//< Superclasses disassemble-inherit
//> Methods and Initializers disassemble-method
    case OP_METHOD:
      return constantInstruction("OP_METHOD", chunk, offset);
//< Methods and Initializers disassemble-method
    default:
      printf("Unknown opcode %d\n", instruction);
      return offset + 1;
  }
}
//< disassemble-instruction


================================================
FILE: c/debug.h
================================================
//> Chunks of Bytecode debug-h
#ifndef clox_debug_h
#define clox_debug_h

#include "chunk.h"

void disassembleChunk(Chunk* chunk, const char* name);
int disassembleInstruction(Chunk* chunk, int offset);

#endif


================================================
FILE: c/main.c
================================================
//> Chunks of Bytecode main-c
//> Scanning on Demand main-includes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

//< Scanning on Demand main-includes
#include "common.h"
//> main-include-chunk
#include "chunk.h"
//< main-include-chunk
//> main-include-debug
#include "debug.h"
//< main-include-debug
//> A Virtual Machine main-include-vm
#include "vm.h"
//< A Virtual Machine main-include-vm
//> Scanning on Demand repl

static void repl() {
  char line[1024];
  for (;;) {
    printf("> ");

    if (!fgets(line, sizeof(line), stdin)) {
      printf("\n");
      break;
    }

    interpret(line);
  }
}
//< Scanning on Demand repl
//> Scanning on Demand read-file
static char* readFile(const char* path) {
  FILE* file = fopen(path, "rb");
//> no-file
  if (file == NULL) {
    fprintf(stderr, "Could not open file \"%s\".\n", path);
    exit(74);
  }
//< no-file

  fseek(file, 0L, SEEK_END);
  size_t fileSize = ftell(file);
  rewind(file);

  char* buffer = (char*)malloc(fileSize + 1);
//> no-buffer
  if (buffer == NULL) {
    fprintf(stderr, "Not enough memory to read \"%s\".\n", path);
    exit(74);
  }
  
//< no-buffer
  size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
//> no-read
  if (bytesRead < fileSize) {
    fprintf(stderr, "Could not read file \"%s\".\n", path);
    exit(74);
  }
  
//< no-read
  buffer[bytesRead] = '\0';

  fclose(file);
  return buffer;
}
//< Scanning on Demand read-file
//> Scanning on Demand run-file
static void runFile(const char* path) {
  char* source = readFile(path);
  InterpretResult result = interpret(source);
  free(source); // [owner]

  if (result == INTERPRET_COMPILE_ERROR) exit(65);
  if (result == INTERPRET_RUNTIME_ERROR) exit(70);
}
//< Scanning on Demand run-file

int main(int argc, const char* argv[]) {
//> A Virtual Machine main-init-vm
  initVM();

//< A Virtual Machine main-init-vm
/* Chunks of Bytecode main-chunk < Scanning on Demand args
  Chunk chunk;
  initChunk(&chunk);
*/
/* Chunks of Bytecode main-constant < Scanning on Demand args

  int constant = addConstant(&chunk, 1.2);
*/
/* Chunks of Bytecode main-constant < Chunks of Bytecode main-chunk-line
  writeChunk(&chunk, OP_CONSTANT);
  writeChunk(&chunk, constant);

*/
/* Chunks of Bytecode main-chunk-line < Scanning on Demand args
  writeChunk(&chunk, OP_CONSTANT, 123);
  writeChunk(&chunk, constant, 123);
*/
/* A Virtual Machine main-chunk < Scanning on Demand args

  constant = addConstant(&chunk, 3.4);
  writeChunk(&chunk, OP_CONSTANT, 123);
  writeChunk(&chunk, constant, 123);

  writeChunk(&chunk, OP_ADD, 123);

  constant = addConstant(&chunk, 5.6);
  writeChunk(&chunk, OP_CONSTANT, 123);
  writeChunk(&chunk, constant, 123);

  writeChunk(&chunk, OP_DIVIDE, 123);
*/
/* A Virtual Machine main-negate < Scanning on Demand args
  writeChunk(&chunk, OP_NEGATE, 123);
*/
/* Chunks of Bytecode main-chunk < Chunks of Bytecode main-chunk-line
  writeChunk(&chunk, OP_RETURN);
*/
/* Chunks of Bytecode main-chunk-line < Scanning on Demand args

  writeChunk(&chunk, OP_RETURN, 123);
*/
/* Chunks of Bytecode main-disassemble-chunk < Scanning on Demand args

  disassembleChunk(&chunk, "test chunk");
*/
/* A Virtual Machine main-interpret < Scanning on Demand args
  interpret(&chunk);
*/
//> Scanning on Demand args
  if (argc == 1) {
    repl();
  } else if (argc == 2) {
    runFile(argv[1]);
  } else {
    fprintf(stderr, "Usage: clox [path]\n");
    exit(64);
  }
  
  freeVM();
//< Scanning on Demand args
/* A Virtual Machine main-free-vm < Scanning on Demand args
  freeVM();
*/
/* Chunks of Bytecode main-chunk < Scanning on Demand args
  freeChunk(&chunk);
*/
  return 0;
}


================================================
FILE: c/memory.c
================================================
//> Chunks of Bytecode memory-c
#include <stdlib.h>

//> Garbage Collection memory-include-compiler
#include "compiler.h"
//< Garbage Collection memory-include-compiler
#include "memory.h"
//> Strings memory-include-vm
#include "vm.h"
//< Strings memory-include-vm
//> Garbage Collection debug-log-includes

#ifdef DEBUG_LOG_GC
#include <stdio.h>
#include "debug.h"
#endif
//< Garbage Collection debug-log-includes
//> Garbage Collection heap-grow-factor

#define GC_HEAP_GROW_FACTOR 2
//< Garbage Collection heap-grow-factor

void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
//> Garbage Collection updated-bytes-allocated
  vm.bytesAllocated += newSize - oldSize;
//< Garbage Collection updated-bytes-allocated
//> Garbage Collection call-collect
  if (newSize > oldSize) {
#ifdef DEBUG_STRESS_GC
    collectGarbage();
#endif
//> collect-on-next

    if (vm.bytesAllocated > vm.nextGC) {
      collectGarbage();
    }
//< collect-on-next
  }

//< Garbage Collection call-collect
  if (newSize == 0) {
    free(pointer);
    return NULL;
  }

  void* result = realloc(pointer, newSize);
//> out-of-memory
  if (result == NULL) exit(1);
//< out-of-memory
  return result;
}
//> Garbage Collection mark-object
void markObject(Obj* object) {
  if (object == NULL) return;
//> check-is-marked
  if (object->isMarked) return;

//< check-is-marked
//> log-mark-object
#ifdef DEBUG_LOG_GC
  printf("%p mark ", (void*)object);
  printValue(OBJ_VAL(object));
  printf("\n");
#endif

//< log-mark-object
  object->isMarked = true;
//> add-to-gray-stack

  if (vm.grayCapacity < vm.grayCount + 1) {
    vm.grayCapacity = GROW_CAPACITY(vm.grayCapacity);
    vm.grayStack = (Obj**)realloc(vm.grayStack,
                                  sizeof(Obj*) * vm.grayCapacity);
//> exit-gray-stack

    if (vm.grayStack == NULL) exit(1);
//< exit-gray-stack
  }

  vm.grayStack[vm.grayCount++] = object;
//< add-to-gray-stack
}
//< Garbage Collection mark-object
//> Garbage Collection mark-value
void markValue(Value value) {
  if (IS_OBJ(value)) markObject(AS_OBJ(value));
}
//< Garbage Collection mark-value
//> Garbage Collection mark-array
static void markArray(ValueArray* array) {
  for (int i = 0; i < array->count; i++) {
    markValue(array->values[i]);
  }
}
//< Garbage Collection mark-array
//> Garbage Collection blacken-object
static void blackenObject(Obj* object) {
//> log-blacken-object
#ifdef DEBUG_LOG_GC
  printf("%p blacken ", (void*)object);
  printValue(OBJ_VAL(object));
  printf("\n");
#endif

//< log-blacken-object
  switch (object->type) {
//> Methods and Initializers blacken-bound-method
    case OBJ_BOUND_METHOD: {
      ObjBoundMethod* bound = (ObjBoundMethod*)object;
      markValue(bound->receiver);
      markObject((Obj*)bound->method);
      break;
    }
//< Methods and Initializers blacken-bound-method
//> Classes and Instances blacken-class
    case OBJ_CLASS: {
      ObjClass* klass = (ObjClass*)object;
      markObject((Obj*)klass->name);
//> Methods and Initializers mark-methods
      markTable(&klass->methods);
//< Methods and Initializers mark-methods
      break;
    }
//< Classes and Instances blacken-class
//> blacken-closure
    case OBJ_CLOSURE: {
      ObjClosure* closure = (ObjClosure*)object;
      markObject((Obj*)closure->function);
      for (int i = 0; i < closure->upvalueCount; i++) {
        markObject((Obj*)closure->upvalues[i]);
      }
      break;
    }
//< blacken-closure
//> blacken-function
    case OBJ_FUNCTION: {
      ObjFunction* function = (ObjFunction*)object;
      markObject((Obj*)function->name);
      markArray(&function->chunk.constants);
      break;
    }
//< blacken-function
//> Classes and Instances blacken-instance
    case OBJ_INSTANCE: {
      ObjInstance* instance = (ObjInstance*)object;
      markObject((Obj*)instance->klass);
      markTable(&instance->fields);
      break;
    }
//< Classes and Instances blacken-instance
//> blacken-upvalue
    case OBJ_UPVALUE:
      markValue(((ObjUpvalue*)object)->closed);
      break;
//< blacken-upvalue
    case OBJ_NATIVE:
    case OBJ_STRING:
      break;
  }
}
//< Garbage Collection blacken-object
//> Strings free-object
static void freeObject(Obj* object) {
//> Garbage Collection log-free-object
#ifdef DEBUG_LOG_GC
  printf("%p free type %d\n", (void*)object, object->type);
#endif

//< Garbage Collection log-free-object
  switch (object->type) {
//> Methods and Initializers free-bound-method
    case OBJ_BOUND_METHOD:
      FREE(ObjBoundMethod, object);
      break;
//< Methods and Initializers free-bound-method
//> Classes and Instances free-class
    case OBJ_CLASS: {
//> Methods and Initializers free-methods
      ObjClass* klass = (ObjClass*)object;
      freeTable(&klass->methods);
//< Methods and Initializers free-methods
      FREE(ObjClass, object);
      break;
    } // [braces]
//< Classes and Instances free-class
//> Closures free-closure
    case OBJ_CLOSURE: {
//> free-upvalues
      ObjClosure* closure = (ObjClosure*)object;
      FREE_ARRAY(ObjUpvalue*, closure->upvalues,
                 closure->upvalueCount);
//< free-upvalues
      FREE(ObjClosure, object);
      break;
    }
//< Closures free-closure
//> Calls and Functions free-function
    case OBJ_FUNCTION: {
      ObjFunction* function = (ObjFunction*)object;
      freeChunk(&function->chunk);
      FREE(ObjFunction, object);
      break;
    }
//< Calls and Functions free-function
//> Classes and Instances free-instance
    case OBJ_INSTANCE: {
      ObjInstance* instance = (ObjInstance*)object;
      freeTable(&instance->fields);
      FREE(ObjInstance, object);
      break;
    }
//< Classes and Instances free-instance
//> Calls and Functions free-native
    case OBJ_NATIVE:
      FREE(ObjNative, object);
      break;
//< Calls and Functions free-native
    case OBJ_STRING: {
      ObjString* string = (ObjString*)object;
      FREE_ARRAY(char, string->chars, string->length + 1);
      FREE(ObjString, object);
      break;
    }
//> Closures free-upvalue
    case OBJ_UPVALUE:
      FREE(ObjUpvalue, object);
      break;
//< Closures free-upvalue
  }
}
//< Strings free-object
//> Garbage Collection mark-roots
static void markRoots() {
  for (Value* slot = vm.stack; slot < vm.stackTop; slot++) {
    markValue(*slot);
  }
//> mark-closures

  for (int i = 0; i < vm.frameCount; i++) {
    markObject((Obj*)vm.frames[i].closure);
  }
//< mark-closures
//> mark-open-upvalues

  for (ObjUpvalue* upvalue = vm.openUpvalues;
       upvalue != NULL;
       upvalue = upvalue->next) {
    markObject((Obj*)upvalue);
  }
//< mark-open-upvalues
//> mark-globals

  markTable(&vm.globals);
//< mark-globals
//> call-mark-compiler-roots
  markCompilerRoots();
//< call-mark-compiler-roots
//> Methods and Initializers mark-init-string
  markObject((Obj*)vm.initString);
//< Methods and Initializers mark-init-string
}
//< Garbage Collection mark-roots
//> Garbage Collection trace-references
static void traceReferences() {
  while (vm.grayCount > 0) {
    Obj* object = vm.grayStack[--vm.grayCount];
    blackenObject(object);
  }
}
//< Garbage Collection trace-references
//> Garbage Collection sweep
static void sweep() {
  Obj* previous = NULL;
  Obj* object = vm.objects;
  while (object != NULL) {
    if (object->isMarked) {
//> unmark
      object->isMarked = false;
//< unmark
      previous = object;
      object = object->next;
    } else {
      Obj* unreached = object;
      object = object->next;
      if (previous != NULL) {
        previous->next = object;
      } else {
        vm.objects = object;
      }

      freeObject(unreached);
    }
  }
}
//< Garbage Collection sweep
//> Garbage Collection collect-garbage
void collectGarbage() {
//> log-before-collect
#ifdef DEBUG_LOG_GC
  printf("-- gc begin\n");
//> log-before-size
  size_t before = vm.bytesAllocated;
//< log-before-size
#endif
//< log-before-collect
//> call-mark-roots

  markRoots();
//< call-mark-roots
//> call-trace-references
  traceReferences();
//< call-trace-references
//> sweep-strings
  tableRemoveWhite(&vm.strings);
//< sweep-strings
//> call-sweep
  sweep();
//< call-sweep
//> update-next-gc

  vm.nextGC = vm.bytesAllocated * GC_HEAP_GROW_FACTOR;
//< update-next-gc
//> log-after-collect

#ifdef DEBUG_LOG_GC
  printf("-- gc end\n");
//> log-collected-amount
  printf("   collected %zu bytes (from %zu to %zu) next at %zu\n",
         before - vm.bytesAllocated, before, vm.bytesAllocated,
         vm.nextGC);
//< log-collected-amount
#endif
//< log-after-collect
}
//< Garbage Collection collect-garbage
//> Strings free-objects
void freeObjects() {
  Obj* object = vm.objects;
  while (object != NULL) {
    Obj* next = object->next;
    freeObject(object);
    object = next;
  }
//> Garbage Collection free-gray-stack

  free(vm.grayStack);
//< Garbage Collection free-gray-stack
}
//< Strings free-objects


================================================
FILE: c/memory.h
================================================
//> Chunks of Bytecode memory-h
#ifndef clox_memory_h
#define clox_memory_h

#include "common.h"
//> Strings memory-include-object
#include "object.h"
//< Strings memory-include-object

//> Strings allocate
#define ALLOCATE(type, count) \
    (type*)reallocate(NULL, 0, sizeof(type) * (count))
//> free

#define FREE(type, pointer) reallocate(pointer, sizeof(type), 0)
//< free

//< Strings allocate
#define GROW_CAPACITY(capacity) \
    ((capacity) < 8 ? 8 : (capacity) * 2)
//> grow-array

#define GROW_ARRAY(type, pointer, oldCount, newCount) \
    (type*)reallocate(pointer, sizeof(type) * (oldCount), \
        sizeof(type) * (newCount))
//> free-array

#define FREE_ARRAY(type, pointer, oldCount) \
    reallocate(pointer, sizeof(type) * (oldCount), 0)
//< free-array

void* reallocate(void* pointer, size_t oldSize, size_t newSize);
//< grow-array
//> Garbage Collection mark-object-h
void markObject(Obj* object);
//< Garbage Collection mark-object-h
//> Garbage Collection mark-value-h
void markValue(Value value);
//< Garbage Collection mark-value-h
//> Garbage Collection collect-garbage-h
void collectGarbage();
//< Garbage Collection collect-garbage-h
//> Strings free-objects-h
void freeObjects();
//< Strings free-objects-h

#endif


================================================
FILE: c/object.c
================================================
//> Strings object-c
#include <stdio.h>
#include <string.h>

#include "memory.h"
#include "object.h"
//> Hash Tables object-include-table
#include "table.h"
//< Hash Tables object-include-table
#include "value.h"
#include "vm.h"
//> allocate-obj

#define ALLOCATE_OBJ(type, objectType) \
    (type*)allocateObject(sizeof(type), objectType)
//< allocate-obj
//> allocate-object

static Obj* allocateObject(size_t size, ObjType type) {
  Obj* object = (Obj*)reallocate(NULL, 0, size);
  object->type = type;
//> Garbage Collection init-is-marked
  object->isMarked = false;
//< Garbage Collection init-is-marked
//> add-to-list
  
  object->next = vm.objects;
  vm.objects = object;
//< add-to-list
//> Garbage Collection debug-log-allocate

#ifdef DEBUG_LOG_GC
  printf("%p allocate %zu for %d\n", (void*)object, size, type);
#endif

//< Garbage Collection debug-log-allocate
  return object;
}
//< allocate-object
//> Methods and Initializers new-bound-method
ObjBoundMethod* newBoundMethod(Value receiver,
                               ObjClosure* method) {
  ObjBoundMethod* bound = ALLOCATE_OBJ(ObjBoundMethod,
                                       OBJ_BOUND_METHOD);
  bound->receiver = receiver;
  bound->method = method;
  return bound;
}
//< Methods and Initializers new-bound-method
//> Classes and Instances new-class
ObjClass* newClass(ObjString* name) {
  ObjClass* klass = ALLOCATE_OBJ(ObjClass, OBJ_CLASS);
  klass->name = name; // [klass]
//> Methods and Initializers init-methods
  initTable(&klass->methods);
//< Methods and Initializers init-methods
  return klass;
}
//< Classes and Instances new-class
//> Closures new-closure
ObjClosure* newClosure(ObjFunction* function) {
//> allocate-upvalue-array
  ObjUpvalue** upvalues = ALLOCATE(ObjUpvalue*,
                                   function->upvalueCount);
  for (int i = 0; i < function->upvalueCount; i++) {
    upvalues[i] = NULL;
  }

//< allocate-upvalue-array
  ObjClosure* closure = ALLOCATE_OBJ(ObjClosure, OBJ_CLOSURE);
  closure->function = function;
//> init-upvalue-fields
  closure->upvalues = upvalues;
  closure->upvalueCount = function->upvalueCount;
//< init-upvalue-fields
  return closure;
}
//< Closures new-closure
//> Calls and Functions new-function
ObjFunction* newFunction() {
  ObjFunction* function = ALLOCATE_OBJ(ObjFunction, OBJ_FUNCTION);
  function->arity = 0;
//> Closures init-upvalue-count
  function->upvalueCount = 0;
//< Closures init-upvalue-count
  function->name = NULL;
  initChunk(&function->chunk);
  return function;
}
//< Calls and Functions new-function
//> Classes and Instances new-instance
ObjInstance* newInstance(ObjClass* klass) {
  ObjInstance* instance = ALLOCATE_OBJ(ObjInstance, OBJ_INSTANCE);
  instance->klass = klass;
  initTable(&instance->fields);
  return instance;
}
//< Classes and Instances new-instance
//> Calls and Functions new-native
ObjNative* newNative(NativeFn function) {
  ObjNative* native = ALLOCATE_OBJ(ObjNative, OBJ_NATIVE);
  native->function = function;
  return native;
}
//< Calls and Functions new-native

/* Strings allocate-string < Hash Tables allocate-string
static ObjString* allocateString(char* chars, int length) {
*/
//> allocate-string
//> Hash Tables allocate-string
static ObjString* allocateString(char* chars, int length,
                                 uint32_t hash) {
//< Hash Tables allocate-string
  ObjString* string = ALLOCATE_OBJ(ObjString, OBJ_STRING);
  string->length = length;
  string->chars = chars;
//> Hash Tables allocate-store-hash
  string->hash = hash;
//< Hash Tables allocate-store-hash
//> Hash Tables allocate-store-string
//> Garbage Collection push-string

  push(OBJ_VAL(string));
//< Garbage Collection push-string
  tableSet(&vm.strings, string, NIL_VAL);
//> Garbage Collection pop-string
  pop();

//< Garbage Collection pop-string
//< Hash Tables allocate-store-string
  return string;
}
//< allocate-string
//> Hash Tables hash-string
static uint32_t hashString(const char* key, int length) {
  uint32_t hash = 2166136261u;
  for (int i = 0; i < length; i++) {
    hash ^= (uint8_t)key[i];
    hash *= 16777619;
  }
  return hash;
}
//< Hash Tables hash-string
//> take-string
ObjString* takeString(char* chars, int length) {
/* Strings take-string < Hash Tables take-string-hash
  return allocateString(chars, length);
*/
//> Hash Tables take-string-hash
  uint32_t hash = hashString(chars, length);
//> take-string-intern
  ObjString* interned = tableFindString(&vm.strings, chars, length,
                                        hash);
  if (interned != NULL) {
    FREE_ARRAY(char, chars, length + 1);
    return interned;
  }

//< take-string-intern
  return allocateString(chars, length, hash);
//< Hash Tables take-string-hash
}
//< take-string
ObjString* copyString(const char* chars, int length) {
//> Hash Tables copy-string-hash
  uint32_t hash = hashString(chars, length);
//> copy-string-intern
  ObjString* interned = tableFindString(&vm.strings, chars, length,
                                        hash);
  if (interned != NULL) return interned;

//< copy-string-intern
//< Hash Tables copy-string-hash
  char* heapChars = ALLOCATE(char, length + 1);
  memcpy(heapChars, chars, length);
  heapChars[length] = '\0';
/* Strings object-c < Hash Tables copy-string-allocate
  return allocateString(heapChars, length);
*/
//> Hash Tables copy-string-allocate
  return allocateString(heapChars, length, hash);
//< Hash Tables copy-string-allocate
}
//> Closures new-upvalue
ObjUpvalue* newUpvalue(Value* slot) {
  ObjUpvalue* upvalue = ALLOCATE_OBJ(ObjUpvalue, OBJ_UPVALUE);
//> init-closed
  upvalue->closed = NIL_VAL;
//< init-closed
  upvalue->location = slot;
//> init-next
  upvalue->next = NULL;
//< init-next
  return upvalue;
}
//< Closures new-upvalue
//> Calls and Functions print-function-helper
static void printFunction(ObjFunction* function) {
//> print-script
  if (function->name == NULL) {
    printf("<script>");
    return;
  }
//< print-script
  printf("<fn %s>", function->name->chars);
}
//< Calls and Functions print-function-helper
//> print-object
void printObject(Value value) {
  switch (OBJ_TYPE(value)) {
//> Methods and Initializers print-bound-method
    case OBJ_BOUND_METHOD:
      printFunction(AS_BOUND_METHOD(value)->method->function);
      break;
//< Methods and Initializers print-bound-method
//> Classes and Instances print-class
    case OBJ_CLASS:
      printf("%s", AS_CLASS(value)->name->chars);
      break;
//< Classes and Instances print-class
//> Closures print-closure
    case OBJ_CLOSURE:
      printFunction(AS_CLOSURE(value)->function);
      break;
//< Closures print-closure
//> Calls and Functions print-function
    case OBJ_FUNCTION:
      printFunction(AS_FUNCTION(value));
      break;
//< Calls and Functions print-function
//> Classes and Instances print-instance
    case OBJ_INSTANCE:
      printf("%s instance",
             AS_INSTANCE(value)->klass->name->chars);
      break;
//< Classes and Instances print-instance
//> Calls and Functions print-native
    case OBJ_NATIVE:
      printf("<native fn>");
      break;
//< Calls and Functions print-native
    case OBJ_STRING:
      printf("%s", AS_CSTRING(value));
      break;
//> Closures print-upvalue
    case OBJ_UPVALUE:
      printf("upvalue");
      break;
//< Closures print-upvalue
  }
}
//< print-object


================================================
FILE: c/object.h
================================================
//> Strings object-h
#ifndef clox_object_h
#define clox_object_h

#include "common.h"
//> Calls and Functions object-include-chunk
#include "chunk.h"
//< Calls and Functions object-include-chunk
//> Classes and Instances object-include-table
#include "table.h"
//< Classes and Instances object-include-table
#include "value.h"
//> obj-type-macro

#define OBJ_TYPE(value)        (AS_OBJ(value)->type)
//< obj-type-macro
//> is-string

//> Methods and Initializers is-bound-method
#define IS_BOUND_METHOD(value) isObjType(value, OBJ_BOUND_METHOD)
//< Methods and Initializers is-bound-method
//> Classes and Instances is-class
#define IS_CLASS(value)        isObjType(value, OBJ_CLASS)
//< Classes and Instances is-class
//> Closures is-closure
#define IS_CLOSURE(value)      isObjType(value, OBJ_CLOSURE)
//< Closures is-closure
//> Calls and Functions is-function
#define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
//< Calls and Functions is-function
//> Classes and Instances is-instance
#define IS_INSTANCE(value)     isObjType(value, OBJ_INSTANCE)
//< Classes and Instances is-instance
//> Calls and Functions is-native
#define IS_NATIVE(value)       isObjType(value, OBJ_NATIVE)
//< Calls and Functions is-native
#define IS_STRING(value)       isObjType(value, OBJ_STRING)
//< is-string
//> as-string

//> Methods and Initializers as-bound-method
#define AS_BOUND_METHOD(value) ((ObjBoundMethod*)AS_OBJ(value))
//< Methods and Initializers as-bound-method
//> Classes and Instances as-class
#define AS_CLASS(value)        ((ObjClass*)AS_OBJ(value))
//< Classes and Instances as-class
//> Closures as-closure
#define AS_CLOSURE(value)      ((ObjClosure*)AS_OBJ(value))
//< Closures as-closure
//> Calls and Functions as-function
#define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
//< Calls and Functions as-function
//> Classes and Instances as-instance
#define AS_INSTANCE(value)     ((ObjInstance*)AS_OBJ(value))
//< Classes and Instances as-instance
//> Calls and Functions as-native
#define AS_NATIVE(value) \
    (((ObjNative*)AS_OBJ(value))->function)
//< Calls and Functions as-native
#define AS_STRING(value)       ((ObjString*)AS_OBJ(value))
#define AS_CSTRING(value)      (((ObjString*)AS_OBJ(value))->chars)
//< as-string
//> obj-type

typedef enum {
//> Methods and Initializers obj-type-bound-method
  OBJ_BOUND_METHOD,
//< Methods and Initializers obj-type-bound-method
//> Classes and Instances obj-type-class
  OBJ_CLASS,
//< Classes and Instances obj-type-class
//> Closures obj-type-closure
  OBJ_CLOSURE,
//< Closures obj-type-closure
//> Calls and Functions obj-type-function
  OBJ_FUNCTION,
//< Calls and Functions obj-type-function
//> Classes and Instances obj-type-instance
  OBJ_INSTANCE,
//< Classes and Instances obj-type-instance
//> Calls and Functions obj-type-native
  OBJ_NATIVE,
//< Calls and Functions obj-type-native
  OBJ_STRING,
//> Closures obj-type-upvalue
  OBJ_UPVALUE
//< Closures obj-type-upvalue
} ObjType;
//< obj-type

struct Obj {
  ObjType type;
//> Garbage Collection is-marked-field
  bool isMarked;
//< Garbage Collection is-marked-field
//> next-field
  struct Obj* next;
//< next-field
};
//> Calls and Functions obj-function

typedef struct {
  Obj obj;
  int arity;
//> Closures upvalue-count
  int upvalueCount;
//< Closures upvalue-count
  Chunk chunk;
  ObjString* name;
} ObjFunction;
//< Calls and Functions obj-function
//> Calls and Functions obj-native

typedef Value (*NativeFn)(int argCount, Value* args);

typedef struct {
  Obj obj;
  NativeFn function;
} ObjNative;
//< Calls and Functions obj-native
//> obj-string

struct ObjString {
  Obj obj;
  int length;
  char* chars;
//> Hash Tables obj-string-hash
  uint32_t hash;
//< Hash Tables obj-string-hash
};
//< obj-string
//> Closures obj-upvalue
typedef struct ObjUpvalue {
  Obj obj;
  Value* location;
//> closed-field
  Value closed;
//< closed-field
//> next-field
  struct ObjUpvalue* next;
//< next-field
} ObjUpvalue;
//< Closures obj-upvalue
//> Closures obj-closure
typedef struct {
  Obj obj;
  ObjFunction* function;
//> upvalue-fields
  ObjUpvalue** upvalues;
  int upvalueCount;
//< upvalue-fields
} ObjClosure;
//< Closures obj-closure
//> Classes and Instances obj-class

typedef struct {
  Obj obj;
  ObjString* name;
//> Methods and Initializers class-methods
  Table methods;
//< Methods and Initializers class-methods
} ObjClass;
//< Classes and Instances obj-class
//> Classes and Instances obj-instance

typedef struct {
  Obj obj;
  ObjClass* klass;
  Table fields; // [fields]
} ObjInstance;
//< Classes and Instances obj-instance

//> Methods and Initializers obj-bound-method
typedef struct {
  Obj obj;
  Value receiver;
  ObjClosure* method;
} ObjBoundMethod;

//< Methods and Initializers obj-bound-method
//> Methods and Initializers new-bound-method-h
ObjBoundMethod* newBoundMethod(Value receiver,
                               ObjClosure* method);
//< Methods and Initializers new-bound-method-h
//> Classes and Instances new-class-h
ObjClass* newClass(ObjString* name);
//< Classes and Instances new-class-h
//> Closures new-closure-h
ObjClosure* newClosure(ObjFunction* function);
//< Closures new-closure-h
//> Calls and Functions new-function-h
ObjFunction* newFunction();
//< Calls and Functions new-function-h
//> Classes and Instances new-instance-h
ObjInstance* newInstance(ObjClass* klass);
//< Classes and Instances new-instance-h
//> Calls and Functions new-native-h
ObjNative* newNative(NativeFn function);
//< Calls and Functions new-native-h
//> take-string-h
ObjString* takeString(char* chars, int length);
//< take-string-h
//> copy-string-h
ObjString* copyString(const char* chars, int length);
//> Closures new-upvalue-h
ObjUpvalue* newUpvalue(Value* slot);
//< Closures new-upvalue-h
//> print-object-h
void printObject(Value value);
//< print-object-h

//< copy-string-h
//> is-obj-type
static inline bool isObjType(Value value, ObjType type) {
  return IS_OBJ(value) && AS_OBJ(value)->type == type;
}

//< is-obj-type
#endif


================================================
FILE: c/scanner.c
================================================
//> Scanning on Demand scanner-c
#include <stdio.h>
#include <string.h>

#include "common.h"
#include "scanner.h"

typedef struct {
  const char* start;
  const char* current;
  int line;
} Scanner;

Scanner scanner;
//> init-scanner
void initScanner(const char* source) {
  scanner.start = source;
  scanner.current = source;
  scanner.line = 1;
}
//< init-scanner
//> is-alpha
static bool isAlpha(char c) {
  return (c >= 'a' && c <= 'z') ||
         (c >= 'A' && c <= 'Z') ||
          c == '_';
}
//< is-alpha
//> is-digit
static bool isDigit(char c) {
  return c >= '0' && c <= '9';
}
//< is-digit
//> is-at-end
static bool isAtEnd() {
  return *scanner.current == '\0';
}
//< is-at-end
//> advance
static char advance() {
  scanner.current++;
  return scanner.current[-1];
}
//< advance
//> peek
static char peek() {
  return *scanner.current;
}
//< peek
//> peek-next
static char peekNext() {
  if (isAtEnd()) return '\0';
  return scanner.current[1];
}
//< peek-next
//> match
static bool match(char expected) {
  if (isAtEnd()) return false;
  if (*scanner.current != expected) return false;
  scanner.current++;
  return true;
}
//< match
//> make-token
static Token makeToken(TokenType type) {
  Token token;
  token.type = type;
  token.start = scanner.start;
  token.length = (int)(scanner.current - scanner.start);
  token.line = scanner.line;
  return token;
}
//< make-token
//> error-token
static Token errorToken(const char* message) {
  Token token;
  token.type = TOKEN_ERROR;
  token.start = message;
  token.length = (int)strlen(message);
  token.line = scanner.line;
  return token;
}
//< error-token
//> skip-whitespace
static void skipWhitespace() {
  for (;;) {
    char c = peek();
    switch (c) {
      case ' ':
      case '\r':
      case '\t':
        advance();
        break;
//> newline
      case '\n':
        scanner.line++;
        advance();
        break;
//< newline
//> comment
      case '/':
        if (peekNext() == '/') {
          // A comment goes until the end of the line.
          while (peek() != '\n' && !isAtEnd()) advance();
        } else {
          return;
        }
        break;
//< comment
      default:
        return;
    }
  }
}
//< skip-whitespace
//> check-keyword
static TokenType checkKeyword(int start, int length,
    const char* rest, TokenType type) {
  if (scanner.current - scanner.start == start + length &&
      memcmp(scanner.start + start, rest, length) == 0) {
    return type;
  }

  return TOKEN_IDENTIFIER;
}
//< check-keyword
//> identifier-type
static TokenType identifierType() {
//> keywords
  switch (scanner.start[0]) {
    case 'a': return checkKeyword(1, 2, "nd", TOKEN_AND);
    case 'c': return checkKeyword(1, 4, "lass", TOKEN_CLASS);
    case 'e': return checkKeyword(1, 3, "lse", TOKEN_ELSE);
//> keyword-f
    case 'f':
      if (scanner.current - scanner.start > 1) {
        switch (scanner.start[1]) {
          case 'a': return checkKeyword(2, 3, "lse", TOKEN_FALSE);
          case 'o': return checkKeyword(2, 1, "r", TOKEN_FOR);
          case 'u': return checkKeyword(2, 1, "n", TOKEN_FUN);
        }
      }
      break;
//< keyword-f
    case 'i': return checkKeyword(1, 1, "f", TOKEN_IF);
    case 'n': return checkKeyword(1, 2, "il", TOKEN_NIL);
    case 'o': return checkKeyword(1, 1, "r", TOKEN_OR);
    case 'p': return checkKeyword(1, 4, "rint", TOKEN_PRINT);
    case 'r': return checkKeyword(1, 5, "eturn", TOKEN_RETURN);
    case 's': return checkKeyword(1, 4, "uper", TOKEN_SUPER);
//> keyword-t
    case 't':
      if (scanner.current - scanner.start > 1) {
        switch (scanner.start[1]) {
          case 'h': return checkKeyword(2, 2, "is", TOKEN_THIS);
          case 'r': return checkKeyword(2, 2, "ue", TOKEN_TRUE);
        }
      }
      break;
//< keyword-t
    case 'v': return checkKeyword(1, 2, "ar", TOKEN_VAR);
    case 'w': return checkKeyword(1, 4, "hile", TOKEN_WHILE);
  }

//< keywords
  return TOKEN_IDENTIFIER;
}
//< identifier-type
//> identifier
static Token identifier() {
  while (isAlpha(peek()) || isDigit(peek())) advance();
  return makeToken(identifierType());
}
//< identifier
//> number
static Token number() {
  while (isDigit(peek())) advance();

  // Look for a fractional part.
  if (peek() == '.' && isDigit(peekNext())) {
    // Consume the ".".
    advance();

    while (isDigit(peek())) advance();
  }

  return makeToken(TOKEN_NUMBER);
}
//< number
//> string
static Token string() {
  while (peek() != '"' && !isAtEnd()) {
    if (peek() == '\n') scanner.line++;
    advance();
  }

  if (isAtEnd()) return errorToken("Unterminated string.");

  // The closing quote.
  advance();
  return makeToken(TOKEN_STRING);
}
//< string
//> scan-token
Token scanToken() {
//> call-skip-whitespace
  skipWhitespace();
//< call-skip-whitespace
  scanner.start = scanner.current;

  if (isAtEnd()) return makeToken(TOKEN_EOF);
//> scan-char
  
  char c = advance();
//> scan-identifier
  if (isAlpha(c)) return identifier();
//< scan-identifier
//> scan-number
  if (isDigit(c)) return number();
//< scan-number

  switch (c) {
    case '(': return makeToken(TOKEN_LEFT_PAREN);
    case ')': return makeToken(TOKEN_RIGHT_PAREN);
    case '{': return makeToken(TOKEN_LEFT_BRACE);
    case '}': return makeToken(TOKEN_RIGHT_BRACE);
    case ';': return makeToken(TOKEN_SEMICOLON);
    case ',': return makeToken(TOKEN_COMMA);
    case '.': return makeToken(TOKEN_DOT);
    case '-': return makeToken(TOKEN_MINUS);
    case '+': return makeToken(TOKEN_PLUS);
    case '/': return makeToken(TOKEN_SLASH);
    case '*': return makeToken(TOKEN_STAR);
//> two-char
    case '!':
      return makeToken(
          match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG);
    case '=':
      return makeToken(
          match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL);
    case '<':
      return makeToken(
          match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
    case '>':
      return makeToken(
          match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
//< two-char
//> scan-string
    case '"': return string();
//< scan-string
  }
//< scan-char

  return errorToken("Unexpected character.");
}
//< scan-token


================================================
FILE: c/scanner.h
================================================
//> Scanning on Demand scanner-h
#ifndef clox_scanner_h
#define clox_scanner_h
//> token-type

typedef enum {
  // Single-character tokens.
  TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN,
  TOKEN_LEFT_BRACE, TOKEN_RIGHT_BRACE,
  TOKEN_COMMA, TOKEN_DOT, TOKEN_MINUS, TOKEN_PLUS,
  TOKEN_SEMICOLON, TOKEN_SLASH, TOKEN_STAR,
  // One or two character tokens.
  TOKEN_BANG, TOKEN_BANG_EQUAL,
  TOKEN_EQUAL, TOKEN_EQUAL_EQUAL,
  TOKEN_GREATER, TOKEN_GREATER_EQUAL,
  TOKEN_LESS, TOKEN_LESS_EQUAL,
  // Literals.
  TOKEN_IDENTIFIER, TOKEN_STRING, TOKEN_NUMBER,
  // Keywords.
  TOKEN_AND, TOKEN_CLASS, TOKEN_ELSE, TOKEN_FALSE,
  TOKEN_FOR, TOKEN_FUN, TOKEN_IF, TOKEN_NIL, TOKEN_OR,
  TOKEN_PRINT, TOKEN_RETURN, TOKEN_SUPER, TOKEN_THIS,
  TOKEN_TRUE, TOKEN_VAR, TOKEN_WHILE,

  TOKEN_ERROR, TOKEN_EOF
} TokenType;
//< token-type
//> token-struct

typedef struct {
  TokenType type;
  const char* start;
  int length;
  int line;
} Token;
//< token-struct

void initScanner(const char* source);
//> scan-token-h
Token scanToken();
//< scan-token-h

#endif


================================================
FILE: c/table.c
================================================
//> Hash Tables table-c
#include <stdlib.h>
#include <string.h>

#include "memory.h"
#include "object.h"
#include "table.h"
#include "value.h"

//> max-load
#define TABLE_MAX_LOAD 0.75

//< max-load
void initTable(Table* table) {
  table->count = 0;
  table->capacity = 0;
  table->entries = NULL;
}
//> free-table
void freeTable(Table* table) {
  FREE_ARRAY(Entry, table->entries, table->capacity);
  initTable(table);
}
//< free-table
//> find-entry
//> omit
// NOTE: The "Optimization" chapter has a manual copy of this function.
// If you change it here, make sure to update that copy.
//< omit
static Entry* findEntry(Entry* entries, int capacity,
                        ObjString* key) {
/* Hash Tables find-entry < Optimization initial-index
  uint32_t index = key->hash % capacity;
*/
//> Optimization initial-index
  uint32_t index = key->hash & (capacity - 1);
//< Optimization initial-index
//> find-entry-tombstone
  Entry* tombstone = NULL;
  
//< find-entry-tombstone
  for (;;) {
    Entry* entry = &entries[index];
/* Hash Tables find-entry < Hash Tables find-tombstone
    if (entry->key == key || entry->key == NULL) {
      return entry;
    }
*/
//> find-tombstone
    if (entry->key == NULL) {
      if (IS_NIL(entry->value)) {
        // Empty entry.
        return tombstone != NULL ? tombstone : entry;
      } else {
        // We found a tombstone.
        if (tombstone == NULL) tombstone = entry;
      }
    } else if (entry->key == key) {
      // We found the key.
      return entry;
    }
//< find-tombstone

/* Hash Tables find-entry < Optimization next-index
    index = (index + 1) % capacity;
*/
//> Optimization next-index
    index = (index + 1) & (capacity - 1);
//< Optimization next-index
  }
}
//< find-entry
//> table-get
bool tableGet(Table* table, ObjString* key, Value* value) {
  if (table->count == 0) return false;

  Entry* entry = findEntry(table->entries, table->capacity, key);
  if (entry->key == NULL) return false;

  *value = entry->value;
  return true;
}
//< table-get
//> table-adjust-capacity
static void adjustCapacity(Table* table, int capacity) {
  Entry* entries = ALLOCATE(Entry, capacity);
  for (int i = 0; i < capacity; i++) {
    entries[i].key = NULL;
    entries[i].value = NIL_VAL;
  }
//> re-hash

//> resize-init-count
  table->count = 0;
//< resize-init-count
  for (int i = 0; i < table->capacity; i++) {
    Entry* entry = &table->entries[i];
    if (entry->key == NULL) continue;

    Entry* dest = findEntry(entries, capacity, entry->key);
    dest->key = entry->key;
    dest->value = entry->value;
//> resize-increment-count
    table->count++;
//< resize-increment-count
  }
//< re-hash

//> Hash Tables free-old-array
  FREE_ARRAY(Entry, table->entries, table->capacity);
//< Hash Tables free-old-array
  table->entries = entries;
  table->capacity = capacity;
}
//< table-adjust-capacity
//> table-set
bool tableSet(Table* table, ObjString* key, Value value) {
//> table-set-grow
  if (table->count + 1 > table->capacity * TABLE_MAX_LOAD) {
    int capacity = GROW_CAPACITY(table->capacity);
    adjustCapacity(table, capacity);
  }

//< table-set-grow
  Entry* entry = findEntry(table->entries, table->capacity, key);
  bool isNewKey = entry->key == NULL;
/* Hash Tables table-set < Hash Tables set-increment-count
  if (isNewKey) table->count++;
*/
//> set-increment-count
  if (isNewKey && IS_NIL(entry->value)) table->count++;
//< set-increment-count

  entry->key = key;
  entry->value = value;
  return isNewKey;
}
//< table-set
//> table-delete
bool tableDelete(Table* table, ObjString* key) {
  if (table->count == 0) return false;

  // Find the entry.
  Entry* entry = findEntry(table->entries, table->capacity, key);
  if (entry->key == NULL) return false;

  // Place a tombstone in the entry.
  entry->key = NULL;
  entry->value = BOOL_VAL(true);
  return true;
}
//< table-delete
//> table-add-all
void tableAddAll(Table* from, Table* to) {
  for (int i = 0; i < from->capacity; i++) {
    Entry* entry = &from->entries[i];
    if (entry->key != NULL) {
      tableSet(to, entry->key, entry->value);
    }
  }
}
//< table-add-all
//> table-find-string
ObjString* tableFindString(Table* table, const char* chars,
                           int length, uint32_t hash) {
  if (table->count == 0) return NULL;

/* Hash Tables table-find-string < Optimization find-string-index
  uint32_t index = hash % table->capacity;
*/
//> Optimization find-string-index
  uint32_t index = hash & (table->capacity - 1);
//< Optimization find-string-index
  for (;;) {
    Entry* entry = &table->entries[index];
    if (entry->key == NULL) {
      // Stop if we find an empty non-tombstone entry.
      if (IS_NIL(entry->value)) return NULL;
    } else if (entry->key->length == length &&
        entry->key->hash == hash &&
        memcmp(entry->key->chars, chars, length) == 0) {
      // We found it.
      return entry->key;
    }

/* Hash Tables table-find-string < Optimization find-string-next
    index = (index + 1) % table->capacity;
*/
//> Optimization find-string-next
    index = (index + 1) & (table->capacity - 1);
//< Optimization find-string-next
  }
}
//< table-find-string
//> Garbage Collection table-remove-white
void tableRemoveWhite(Table* table) {
  for (int i = 0; i < table->capacity; i++) {
    Entry* entry = &table->entries[i];
    if (entry->key != NULL && !entry->key->obj.isMarked) {
      tableDelete(table, entry->key);
    }
  }
}
//< Garbage Collection table-remove-white
//> Garbage Collection mark-table
void markTable(Table* table) {
  for (int i = 0; i < table->capacity; i++) {
    Entry* entry = &table->entries[i];
    markObject((Obj*)entry->key);
    markValue(entry->value);
  }
}
//< Garbage Collection mark-table


================================================
FILE: c/table.h
================================================
//> Hash Tables table-h
#ifndef clox_table_h
#define clox_table_h

#include "common.h"
#include "value.h"
//> entry

typedef struct {
  ObjString* key;
  Value value;
} Entry;
//< entry

typedef struct {
  int count;
  int capacity;
  Entry* entries;
} Table;

//> init-table-h
void initTable(Table* table);
//> free-table-h
void freeTable(Table* table);
//< free-table-h
//> table-get-h
bool tableGet(Table* table, ObjString* key, Value* value);
//< table-get-h
//> table-set-h
bool tableSet(Table* table, ObjString* key, Value value);
//< table-set-h
//> table-delete-h
bool tableDelete(Table* table, ObjString* key);
//< table-delete-h
//> table-add-all-h
void tableAddAll(Table* from, Table* to);
//< table-add-all-h
//> table-find-string-h
ObjString* tableFindString(Table* table, const char* chars,
                           int length, uint32_t hash);
//< table-find-string-h
//> Garbage Collection table-remove-white-h

void tableRemoveWhite(Table* table);
//< Garbage Collection table-remove-white-h
//> Garbage Collection mark-table-h
void markTable(Table* table);
//< Garbage Collection mark-table-h

//< init-table-h
#endif


================================================
FILE: c/value.c
================================================
//> Chunks of Bytecode value-c
#include <stdio.h>
//> Strings value-include-string
#include <string.h>
//< Strings value-include-string

//> Strings value-include-object
#include "object.h"
//< Strings value-include-object
#include "memory.h"
#include "value.h"

void initValueArray(ValueArray* array) {
  array->values = NULL;
  array->capacity = 0;
  array->count = 0;
}
//> write-value-array
void writeValueArray(ValueArray* array, Value value) {
  if (array->capacity < array->count + 1) {
    int oldCapacity = array->capacity;
    array->capacity = GROW_CAPACITY(oldCapacity);
    array->values = GROW_ARRAY(Value, array->values,
                               oldCapacity, array->capacity);
  }
  
  array->values[array->count] = value;
  array->count++;
}
//< write-value-array
//> free-value-array
void freeValueArray(ValueArray* array) {
  FREE_ARRAY(Value, array->values, array->capacity);
  initValueArray(array);
}
//< free-value-array
//> print-value
void printValue(Value value) {
//> Optimization print-value
#ifdef NAN_BOXING
  if (IS_BOOL(value)) {
    printf(AS_BOOL(value) ? "true" : "false");
  } else if (IS_NIL(value)) {
    printf("nil");
  } else if (IS_NUMBER(value)) {
    printf("%g", AS_NUMBER(value));
  } else if (IS_OBJ(value)) {
    printObject(value);
  }
#else
//< Optimization print-value
/* Chunks of Bytecode print-value < Types of Values print-number-value
  printf("%g", value);
*/
/* Types of Values print-number-value < Types of Values print-value
 printf("%g", AS_NUMBER(value));
 */
//> Types of Values print-value
  switch (value.type) {
    case VAL_BOOL:
      printf(AS_BOOL(value) ? "true" : "false");
      break;
    case VAL_NIL: printf("nil"); break;
    case VAL_NUMBER: printf("%g", AS_NUMBER(value)); break;
//> Strings call-print-object
    case VAL_OBJ: printObject(value); break;
//< Strings call-print-object
  }
//< Types of Values print-value
//> Optimization end-print-value
#endif
//< Optimization end-print-value
}
//< print-value
//> Types of Values values-equal
bool valuesEqual(Value a, Value b) {
//> Optimization values-equal
#ifdef NAN_BOXING
//> nan-equality
  if (IS_NUMBER(a) && IS_NUMBER(b)) {
    return AS_NUMBER(a) == AS_NUMBER(b);
  }
//< nan-equality
  return a == b;
#else
//< Optimization values-equal
  if (a.type != b.type) return false;
  switch (a.type) {
    case VAL_BOOL:   return AS_BOOL(a) == AS_BOOL(b);
    case VAL_NIL:    return true;
    case VAL_NUMBER: return AS_NUMBER(a) == AS_NUMBER(b);
/* Strings strings-equal < Hash Tables equal
    case VAL_OBJ: {
      ObjString* aString = AS_STRING(a);
      ObjString* bString = AS_STRING(b);
      return aString->length == bString->length &&
          memcmp(aString->chars, bString->chars,
                 aString->length) == 0;
    }
 */
//> Hash Tables equal
    case VAL_OBJ:    return AS_OBJ(a) == AS_OBJ(b);
//< Hash Tables equal
    default:         return false; // Unreachable.
  }
//> Optimization end-values-equal
#endif
//< Optimization end-values-equal
}
//< Types of Values values-equal


================================================
FILE: c/value.h
================================================
//> Chunks of Bytecode value-h
#ifndef clox_value_h
#define clox_value_h
//> Optimization include-string

#include <string.h>
//< Optimization include-string

#include "common.h"

//> Strings forward-declare-obj
typedef struct Obj Obj;
//> forward-declare-obj-string
typedef struct ObjString ObjString;
//< forward-declare-obj-string

//< Strings forward-declare-obj
//> Optimization nan-boxing
#ifdef NAN_BOXING
//> qnan

//> sign-bit
#define SIGN_BIT ((uint64_t)0x8000000000000000)
//< sign-bit
#define QNAN     ((uint64_t)0x7ffc000000000000)
//< qnan
//> tags

#define TAG_NIL   1 // 01.
#define TAG_FALSE 2 // 10.
#define TAG_TRUE  3 // 11.
//< tags

typedef uint64_t Value;
//> is-number

//> is-bool
#define IS_BOOL(value)      (((value) | 1) == TRUE_VAL)
//< is-bool
//> is-nil
#define IS_NIL(value)       ((value) == NIL_VAL)
//< is-nil
#define IS_NUMBER(value)    (((value) & QNAN) != QNAN)
//< is-number
//> is-obj
#define IS_OBJ(value) \
    (((value) & (QNAN | SIGN_BIT)) == (QNAN | SIGN_BIT))
//< is-obj
//> as-number

//> as-bool
#define AS_BOOL(value)      ((value) == TRUE_VAL)
//< as-bool
#define AS_NUMBER(value)    valueToNum(value)
//< as-number
//> as-obj
#define AS_OBJ(value) \
    ((Obj*)(uintptr_t)((value) & ~(SIGN_BIT | QNAN)))
//< as-obj
//> number-val

//> bool-val
#define BOOL_VAL(b)     ((b) ? TRUE_VAL : FALSE_VAL)
//< bool-val
//> false-true-vals
#define FALSE_VAL       ((Value)(uint64_t)(QNAN | TAG_FALSE))
#define TRUE_VAL        ((Value)(uint64_t)(QNAN | TAG_TRUE))
//< false-true-vals
//> nil-val
#define NIL_VAL         ((Value)(uint64_t)(QNAN | TAG_NIL))
//< nil-val
#define NUMBER_VAL(num) numToValue(num)
//< number-val
//> obj-val
#define OBJ_VAL(obj) \
    (Value)(SIGN_BIT | QNAN | (uint64_t)(uintptr_t)(obj))
//< obj-val
//> value-to-num

static inline double valueToNum(Value value) {
  double num;
  memcpy(&num, &value, sizeof(Value));
  return num;
}
//< value-to-num
//> num-to-value

static inline Value numToValue(double num) {
  Value value;
  memcpy(&value, &num, sizeof(double));
  return value;
}
//< num-to-value

#else

//< Optimization nan-boxing
//> Types of Values value-type
typedef enum {
  VAL_BOOL,
  VAL_NIL, // [user-types]
  VAL_NUMBER,
//> Strings val-obj
  VAL_OBJ
//< Strings val-obj
} ValueType;

//< Types of Values value-type
/* Chunks of Bytecode value-h < Types of Values value
typedef double Value;
*/
//> Types of Values value
typedef struct {
  ValueType type;
  union {
    bool boolean;
    double number;
//> Strings union-object
    Obj* obj;
//< Strings union-object
  } as; // [as]
} Value;
//< Types of Values value
//> Types of Values is-macros

#define IS_BOOL(value)    ((value).type == VAL_BOOL)
#define IS_NIL(value)     ((value).type == VAL_NIL)
#define IS_NUMBER(value)  ((value).type == VAL_NUMBER)
//> Strings is-obj
#define IS_OBJ(value)     ((value).type == VAL_OBJ)
//< Strings is-obj
//< Types of Values is-macros
//> Types of Values as-macros

//> Strings as-obj
#define AS_OBJ(value)     ((value).as.obj)
//< Strings as-obj
#define AS_BOOL(value)    ((value).as.boolean)
#define AS_NUMBER(value)  ((value).as.number)
//< Types of Values as-macros
//> Types of Values value-macros

#define BOOL_VAL(value)   ((Value){VAL_BOOL, {.boolean = value}})
#define NIL_VAL           ((Value){VAL_NIL, {.number = 0}})
#define NUMBER_VAL(value) ((Value){VAL_NUMBER, {.number = value}})
//> Strings obj-val
#define OBJ_VAL(object)   ((Value){VAL_OBJ, {.obj = (Obj*)object}})
//< Strings obj-val
//< Types of Values value-macros
//> Optimization end-if-nan-boxing

#endif
//< Optimization end-if-nan-boxing
//> value-array

typedef struct {
  int capacity;
  int count;
  Value* values;
} ValueArray;
//< value-array
//> array-fns-h

//> Types of Values values-equal-h
bool valuesEqual(Value a, Value b);
//< Types of Values values-equal-h
void initValueArray(ValueArray* array);
void writeValueArray(ValueArray* array, Value value);
void freeValueArray(ValueArray* array);
//< array-fns-h
//> print-value-h
void printValue(Value value);
//< print-value-h

#endif


================================================
FILE: c/vm.c
================================================
//> A Virtual Machine vm-c
//> Types of Values include-stdarg
#include <stdarg.h>
//< Types of Values include-stdarg
//> vm-include-stdio
#include <stdio.h>
//> Strings vm-include-string
#include <string.h>
//< Strings vm-include-string
//> Calls and Functions vm-include-time
#include <time.h>
//< Calls and Functions vm-include-time

//< vm-include-stdio
#include "common.h"
//> Scanning on Demand vm-include-compiler
#include "compiler.h"
//< Scanning on Demand vm-include-compiler
//> vm-include-debug
#include "debug.h"
//< vm-include-debug
//> Strings vm-include-object-memory
#include "object.h"
#include "memory.h"
//< Strings vm-include-object-memory
#include "vm.h"

VM vm; // [one]
//> Calls and Functions clock-native
static Value clockNative(int argCount, Value* args) {
  return NUMBER_VAL((double)clock() / CLOCKS_PER_SEC);
}
//< Calls and Functions clock-native
//> reset-stack
static void resetStack() {
  vm.stackTop = vm.stack;
//> Calls and Functions reset-frame-count
  vm.frameCount = 0;
//< Calls and Functions reset-frame-count
//> Closures init-open-upvalues
  vm.openUpvalues = NULL;
//< Closures init-open-upvalues
}
//< reset-stack
//> Types of Values runtime-error
static void runtimeError(const char* format, ...) {
  va_list args;
  va_start(args, format);
  vfprintf(stderr, format, args);
  va_end(args);
  fputs("\n", stderr);

/* Types of Values runtime-error < Calls and Functions runtime-error-temp
  size_t instruction = vm.ip - vm.chunk->code - 1;
  int line = vm.chunk->lines[instruction];
*/
/* Calls and Functions runtime-error-temp < Calls and Functions runtime-error-stack
  CallFrame* frame = &vm.frames[vm.frameCount - 1];
  size_t instruction = frame->ip - frame->function->chunk.code - 1;
  int line = frame->function->chunk.lines[instruction];
*/
/* Types of Values runtime-error < Calls and Functions runtime-error-stack
  fprintf(stderr, "[line %d] in script\n", line);
*/
//> Calls and Functions runtime-error-stack
  for (int i = vm.frameCount - 1; i >= 0; i--) {
    CallFrame* frame = &vm.frames[i];
/* Calls and Functions runtime-error-stack < Closures runtime-error-function
    ObjFunction* function = frame->function;
*/
//> Closures runtime-error-function
    ObjFunction* function = frame->closure->function;
//< Closures runtime-error-function
    size_t instruction = frame->ip - function->chunk.code - 1;
    fprintf(stderr, "[line %d] in ", // [minus]
            function->chunk.lines[instruction]);
    if (function->name == NULL) {
      fprintf(stderr, "script\n");
    } else {
      fprintf(stderr, "%s()\n", function->name->chars);
    }
  }

//< Calls and Functions runtime-error-stack
  resetStack();
}
//< Types of Values runtime-error
//> Calls and Functions define-native
static void defineNative(const char* name, NativeFn function) {
  push(OBJ_VAL(copyString(name, (int)strlen(name))));
  push(OBJ_VAL(newNative(function)));
  tableSet(&vm.globals, AS_STRING(vm.stack[0]), vm.stack[1]);
  pop();
  pop();
}
//< Calls and Functions define-native

void initVM() {
//> call-reset-stack
  resetStack();
//< call-reset-stack
//> Strings init-objects-root
  vm.objects = NULL;
//< Strings init-objects-root
//> Garbage Collection init-gc-fields
  vm.bytesAllocated = 0;
  vm.nextGC = 1024 * 1024;
//< Garbage Collection init-gc-fields
//> Garbage Collection init-gray-stack

  vm.grayCount = 0;
  vm.grayCapacity = 0;
  vm.grayStack = NULL;
//< Garbage Collection init-gray-stack
//> Global Variables init-globals

  initTable(&vm.globals);
//< Global Variables init-globals
//> Hash Tables init-strings
  initTable(&vm.strings);
//< Hash Tables init-strings
//> Methods and Initializers init-init-string

//> null-init-string
  vm.initString = NULL;
//< null-init-string
  vm.initString = copyString("init", 4);
//< Methods and Initializers init-init-string
//> Calls and Functions define-native-clock

  defineNative("clock", clockNative);
//< Calls and Functions define-native-clock
}

void freeVM() {
//> Global Variables free-globals
  freeTable(&vm.globals);
//< Global Variables free-globals
//> Hash Tables free-strings
  freeTable(&vm.strings);
//< Hash Tables free-strings
//> Methods and Initializers clear-init-string
  vm.initString = NULL;
//< Methods and Initializers clear-init-string
//> Strings call-free-objects
  freeObjects();
//< Strings call-free-objects
}
//> push
void push(Value value) {
  *vm.stackTop = value;
  vm.stackTop++;
}
//< push
//> pop
Value pop() {
  vm.stackTop--;
  return *vm.stackTop;
}
//< pop
//> Types of Values peek
static Value peek(int distance) {
  return vm.stackTop[-1 - distance];
}
//< Types of Values peek
/* Calls and Functions call < Closures call-signature
static bool call(ObjFunction* function, int argCount) {
*/
//> Calls and Functions call
//> Closures call-signature
static bool call(ObjClosure* closure, int argCount) {
//< Closures call-signature
/* Calls and Functions check-arity < Closures check-arity
  if (argCount != function->arity) {
    runtimeError("Expected %d arguments but got %d.",
        function->arity, argCount);
*/
//> Closures check-arity
  if (argCount != closure->function->arity) {
    runtimeError("Expected %d arguments but got %d.",
        closure->function->arity, argCount);
//< Closures check-arity
//> check-arity
    return false;
  }

//< check-arity
//> check-overflow
  if (vm.frameCount == FRAMES_MAX) {
    runtimeError("Stack overflow.");
    return false;
  }

//< check-overflow
  CallFrame* frame = &vm.frames[vm.frameCount++];
/* Calls and Functions call < Closures call-init-closure
  frame->function = function;
  frame->ip = function->chunk.code;
*/
//> Closures call-init-closure
  frame->closure = closure;
  frame->ip = closure->function->chunk.code;
//< Closures call-init-closure
  frame->slots = vm.stackTop - argCount - 1;
  return true;
}
//< Calls and Functions call
//> Calls and Functions call-value
static bool callValue(Value callee, int argCount) {
  if (IS_OBJ(callee)) {
    switch (OBJ_TYPE(callee)) {
//> Methods and Initializers call-bound-method
      case OBJ_BOUND_METHOD: {
        ObjBoundMethod* bound = AS_BOUND_METHOD(callee);
//> store-receiver
        vm.stackTop[-argCount - 1] = bound->receiver;
//< store-receiver
        return call(bound->method, argCount);
      }
//< Methods and Initializers call-bound-method
//> Classes and Instances call-class
      case OBJ_CLASS: {
        ObjClass* klass = AS_CLASS(callee);
        vm.stackTop[-argCount - 1] = OBJ_VAL(newInstance(klass));
//> Methods and Initializers call-init
        Value initializer;
        if (tableGet(&klass->methods, vm.initString,
                     &initializer)) {
          return call(AS_CLOSURE(initializer), argCount);
//> no-init-arity-error
        } else if (argCount != 0) {
          runtimeError("Expected 0 arguments but got %d.",
                       argCount);
          return false;
//< no-init-arity-error
        }
//< Methods and Initializers call-init
        return true;
      }
//< Classes and Instances call-class
//> Closures call-value-closure
      case OBJ_CLOSURE:
        return call(AS_CLOSURE(callee), argCount);
//< Closures call-value-closure
/* Calls and Functions call-value < Closures call-value-closure
      case OBJ_FUNCTION: // [switch]
        return call(AS_FUNCTION(callee), argCount);
*/
//> call-native
      case OBJ_NATIVE: {
        NativeFn native = AS_NATIVE(callee);
        Value result = native(argCount, vm.stackTop - argCount);
        vm.stackTop -= argCount + 1;
        push(result);
        return true;
      }
//< call-native
      default:
        break; // Non-callable object type.
    }
  }
  runtimeError("Can only call functions and classes.");
  return false;
}
//< Calls and Functions call-value
//> Methods and Initializers invoke-from-class
static bool invokeFromClass(ObjClass* klass, ObjString* name,
                            int argCount) {
  Value method;
  if (!tableGet(&klass->methods, name, &method)) {
    runtimeError("Undefined property '%s'.", name->chars);
    return false;
  }
  return call(AS_CLOSURE(method), argCount);
}
//< Methods and Initializers invoke-from-class
//> Methods and Initializers invoke
static bool invoke(ObjString* name, int argCount) {
  Value receiver = peek(argCount);
//> invoke-check-type

  if (!IS_INSTANCE(receiver)) {
    runtimeError("Only instances have methods.");
    return false;
  }

//< invoke-check-type
  ObjInstance* instance = AS_INSTANCE(receiver);
//> invoke-field

  Value value;
  if (tableGet(&instance->fields, name, &value)) {
    vm.stackTop[-argCount - 1] = value;
    return callValue(value, argCount);
  }

//< invoke-field
  return invokeFromClass(instance->klass, name, argCount);
}
//< Methods and Initializers invoke
//> Methods and Initializers bind-method
static bool bindMethod(ObjClass* klass, ObjString* name) {
  Value method;
  if (!tableGet(&klass->methods, name, &method)) {
    runtimeError("Undefined property '%s'.", name->chars);
    return false;
  }

  ObjBoundMethod* bound = newBoundMethod(peek(0),
                                         AS_CLOSURE(method));
  pop();
  push(OBJ_VAL(bound));
  return true;
}
//< Methods and Initializers bind-method
//> Closures capture-upvalue
static ObjUpvalue* captureUpvalue(Value* local) {
//> look-for-existing-upvalue
  ObjUpvalue* prevUpvalue = NULL;
  ObjUpvalue* upvalue = vm.openUpvalues;
  while (upvalue != NULL && upvalue->location > local) {
    prevUpvalue = upvalue;
    upvalue = upvalue->next;
  }

  if (upvalue != NULL && upvalue->location == local) {
    return upvalue;
  }

//< look-for-existing-upvalue
  ObjUpvalue* createdUpvalue = newUpvalue(local);
//> insert-upvalue-in-list
  createdUpvalue->next = upvalue;

  if (prevUpvalue == NULL) {
    vm.openUpvalues = createdUpvalue;
  } else {
    prevUpvalue->next = createdUpvalue;
  }

//< insert-upvalue-in-list
  return createdUpvalue;
}
//< Closures capture-upvalue
//> Closures close-upvalues
static void closeUpvalues(Value* last) {
  while (vm.openUpvalues != NULL &&
         vm.openUpvalues->location >= last) {
    ObjUpvalue* upvalue = vm.openUpvalues;
    upvalue->closed = *upvalue->location;
    upvalue->location = &upvalue->closed;
    vm.openUpvalues = upvalue->next;
  }
}
//< Closures close-upvalues
//> Methods and Initializers define-method
static void defineMethod(ObjString* name) {
  Value method = peek(0);
  ObjClass* klass = AS_CLASS(peek(1));
  tableSet(&klass->methods, name, method);
  pop();
}
//< Methods and Initializers define-method
//> Types of Values is-falsey
static bool isFalsey(Value value) {
  return IS_NIL(value) || (IS_BOOL(value) && !AS_BOOL(value));
}
//< Types of Values is-falsey
//> Strings concatenate
static void concatenate() {
/* Strings concatenate < Garbage Collection concatenate-peek
  ObjString* b = AS_STRING(pop());
  ObjString* a = AS_STRING(pop());
*/
//> Garbage Collection concatenate-peek
  ObjString* b = AS_STRING(peek(0));
  ObjString* a = AS_STRING(peek(1));
//< Garbage Collection concatenate-peek

  int length = a->length + b->length;
  char* chars = ALLOCATE(char, length + 1);
  memcpy(chars, a->chars, a->length);
  memcpy(chars + a->length, b->chars, b->length);
  chars[length] = '\0';

  ObjString* result = takeString(chars, length);
//> Garbage Collection concatenate-pop
  pop();
  pop();
//< Garbage Collection concatenate-pop
  push(OBJ_VAL(result));
}
//< Strings concatenate
//> run
static InterpretResult run() {
//> Calls and Functions run
  CallFrame* frame = &vm.frames[vm.frameCount - 1];

/* A Virtual Machine run < Calls and Functions run
#define READ_BYTE() (*vm.ip++)
*/
#define READ_BYTE() (*frame->ip++)
/* A Virtual Machine read-constant < Calls and Functions run
#define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
*/

/* Jumping Back and Forth read-short < Calls and Functions run
#define READ_SHORT() \
    (vm.ip += 2, (uint16_t)((vm.ip[-2] << 8) | vm.ip[-1]))
*/
#define READ_SHORT() \
    (frame->ip += 2, \
    (uint16_t)((frame->ip[-2] << 8) | frame->ip[-1]))

/* Calls and Functions run < Closures read-constant
#define READ_CONSTANT() \
    (frame->function->chunk.constants.values[READ_BYTE()])
*/
//> Closures read-constant
#define READ_CONSTANT() \
    (frame->closure->function->chunk.constants.values[READ_BYTE()])
//< Closures read-constant

//< Calls and Functions run
//> Global Variables read-string
#define READ_STRING() AS_STRING(READ_CONSTANT())
//< Global Variables read-string
/* A Virtual Machine binary-op < Types of Values binary-op
#define BINARY_OP(op) \
    do { \
      double b = pop(); \
      double a = pop(); \
      push(a op b); \
    } while (false)
*/
//> Types of Values binary-op
#define BINARY_OP(valueType, op) \
    do { \
      if (!IS_NUMBER(peek(0)) || !IS_NUMBER(peek(1))) { \
        runtimeError("Operands must be numbers."); \
        return INTERPRET_RUNTIME_ERROR; \
      } \
      double b = AS_NUMBER(pop()); \
      double a = AS_NUMBER(pop()); \
      push(valueType(a op b)); \
    } while (false)
//< Types of Values binary-op

  for (;;) {
//> trace-execution
#ifdef DEBUG_TRACE_EXECUTION
//> trace-stack
    printf("          ");
    for (Value* slot = vm.stack; slot < vm.stackTop; slot++) {
      printf("[ ");
      printValue(*slot);
      printf(" ]");
    }
    printf("\n");
//< trace-stack
/* A Virtual Machine trace-execution < Calls and Functions trace-execution
    disassembleInstruction(vm.chunk,
                           (int)(vm.ip - vm.chunk->code));
*/
/* Calls and Functions trace-execution < Closures disassemble-instruction
    disassembleInstruction(&frame->function->chunk,
        (int)(frame->ip - frame->function->chunk.code));
*/
//> Closures disassemble-instruction
    disassembleInstruction(&frame->closure->function->chunk,
        (int)(frame->ip - frame->closure->function->chunk.code));
//< Closures disassemble-instruction
#endif

//< trace-execution
    uint8_t instruction;
    switch (instruction = READ_BYTE()) {
//> op-constant
      case OP_CONSTANT: {
        Value constant = READ_CONSTANT();
/* A Virtual Machine op-constant < A Virtual Machine push-constant
        printValue(constant);
        printf("\n");
*/
//> push-constant
        push(constant);
//< push-constant
        break;
      }
//< op-constant
//> Types of Values interpret-literals
      case OP_NIL: push(NIL_VAL); break;
      case OP_TRUE: push(BOOL_VAL(true)); break;
      case OP_FALSE: push(BOOL_VAL(false)); break;
//< Types of Values interpret-literals
//> Global Variables interpret-pop
      case OP_POP: pop(); break;
//< Global Variables interpret-pop
//> Local Variables interpret-get-local
      case OP_GET_LOCAL: {
        uint8_t slot = READ_BYTE();
/* Local Variables interpret-get-local < Calls and Functions push-local
        push(vm.stack[slot]); // [slot]
*/
//> Calls and Functions push-local
        push(frame->slots[slot]);
//< Calls and Functions push-local
        break;
      }
//< Local Variables interpret-get-local
//> Local Variables interpret-set-local
      case OP_SET_LOCAL: {
        uint8_t slot = READ_BYTE();
/* Local Variables interpret-set-local < Calls and Functions set-local
        vm.stack[slot] = peek(0);
*/
//> Calls and Functions set-local
        frame->slots[slot] = peek(0);
//< Calls and Functions set-local
        break;
      }
//< Local Variables interpret-set-local
//> Global Variables interpret-get-global
      case OP_GET_GLOBAL: {
        ObjString* name = READ_STRING();
        Value value;
        if (!tableGet(&vm.globals, name, &value)) {
          runtimeError("Undefined variable '%s'.", name->chars);
          return INTERPRET_RUNTIME_ERROR;
        }
        push(value);
        break;
      }
//< Global Variables interpret-get-global
//> Global Variables interpret-define-global
      case OP_DEFINE_GLOBAL: {
        ObjString* name = READ_STRING();
        tableSet(&vm.globals, name, peek(0));
        pop();
        break;
      }
//< Global Variables interpret-define-global
//> Global Variables interpret-set-global
      case OP_SET_GLOBAL: {
        ObjString* name = READ_STRING();
        if (tableSet(&vm.globals, name, peek(0))) {
          tableDelete(&vm.globals, name); // [delete]
          runtimeError("Undefined variable '%s'.", name->chars);
          return INTERPRET_RUNTIME_ERROR;
        }
        break;
      }
//< Global Variables interpret-set-global
//> Closures interpret-get-upvalue
      case OP_GET_UPVALUE: {
        uint8_t slot = READ_BYTE();
        push(*frame->closure->upvalues[slot]->location);
        break;
      }
//< Closures interpret-get-upvalue
//> Closures interpret-set-upvalue
      case OP_SET_UPVALUE: {
        uint8_t slot = READ_BYTE();
        *frame->closure->upvalues[slot]->location = peek(0);
        break;
      }
//< Closures interpret-set-upvalue
//> Classes and Instances interpret-get-property
      case OP_GET_PROPERTY: {
//> get-not-instance
        if (!IS_INSTANCE(peek(0))) {
          runtimeError("Only instances have properties.");
          return INTERPRET_RUNTIME_ERROR;
        }

//< get-not-instance
        ObjInstance* instance = AS_INSTANCE(peek(0));
        ObjString* name = READ_STRING();
        
        Value value;
        if (tableGet(&instance->fields, name, &value)) {
          pop(); // Instance.
          push(value);
          break;
        }
//> get-undefined

//< get-undefined
/* Classes and Instances get-undefined < Methods and Initializers get-method
        runtimeError("Undefined property '%s'.", name->chars);
        return INTERPRET_RUNTIME_ERROR;
*/
//> Methods and Initializers get-method
        if (!bindMethod(instance->klass, name)) {
          return INTERPRET_RUNTIME_ERROR;
        }
        break;
//< Methods and Initializers get-method
      }
//< Classes and Instances interpret-get-property
//> Classes and Instances interpret-set-property
      case OP_SET_PROPERTY: {
//> set-not-instance
        if (!IS_INSTANCE(peek(1))) {
          runtimeError("Only instances have fields.");
          return INTERPRET_RUNTIME_ERROR;
        }

//< set-not-instance
        ObjInstance* instance = AS_INSTANCE(peek(1));
        tableSet(&instance->fields, READ_STRING(), peek(0));
        Value value = pop();
        pop();
        push(value);
        break;
      }
//< Classes and Instances interpret-set-property
//> Superclasses interpret-get-super
      case OP_GET_SUPER: {
        ObjString* name = READ_STRING();
        ObjClass* superclass = AS_CLASS(pop());
        
        if (!bindMethod(superclass, name)) {
          return INTERPRET_RUNTIME_ERROR;
        }
        break;
      }
//< Superclasses interpret-get-super
//> Types of Values interpret-equal
      case OP_EQUAL: {
        Value b = pop();
        Value a = pop();
        push(BOOL_VAL(valuesEqual(a, b)));
        break;
      }
//< Types of Values interpret-equal
//> Types of Values interpret-comparison
      case OP_GREATER:  BINARY_OP(BOOL_VAL, >); break;
      case OP_LESS:     BINARY_OP(BOOL_VAL, <); break;
//< Types of Values interpret-comparison
/* A Virtual Machine op-binary < Types of Values op-arithmetic
      case OP_ADD:      BINARY_OP(+); break;
      case OP_SUBTRACT: BINARY_OP(-); break;
      case OP_MULTIPLY: BINARY_OP(*); break;
      case OP_DIVIDE:   BINARY_OP(/); break;
*/
/* A Virtual Machine op-negate < Types of Values op-negate
      case OP_NEGATE:   push(-pop()); break;
*/
/* Types of Values op-arithmetic < Strings add-strings
      case OP_ADD:      BINARY_OP(NUMBER_VAL, +); break;
*/
//> Strings add-strings
      case OP_ADD: {
        if (IS_STRING(peek(0)) && IS_STRING(peek(1))) {
          concatenate();
        } else if (IS_NUMBER(peek(0)) && IS_NUMBER(peek(1))) {
          double b = AS_NUMBER(pop());
          double a = AS_NUMBER(pop());
          push(NUMBER_VAL(a + b));
        } else {
          runtimeError(
              "Operands must be two numbers or two strings.");
          return INTERPRET_RUNTIME_ERROR;
        }
        break;
      }
//< Strings add-strings
//> Types of Values op-arithmetic
      case OP_SUBTRACT: BINARY_OP(NUMBER_VAL, -); break;
      case OP_MULTIPLY: BINARY_OP(NUMBER_VAL, *); break;
      case OP_DIVIDE:   BINARY_OP(NUMBER_VAL, /); break;
//< Types of Values op-arithmetic
//> Types of Values op-not
      case OP_NOT:
        push(BOOL_VAL(isFalsey(pop())));
        break;
//< Types of Values op-not
//> Types of Values op-negate
      case OP_NEGATE:
        if (!IS_NUMBER(peek(0))) {
          runtimeError("Operand must be a number.");
          return INTERPRET_RUNTIME_ERROR;
        }
        push(NUMBER_VAL(-AS_NUMBER(pop())));
        break;
//< Types of Values op-negate
//> Global Variables interpret-print
      case OP_PRINT: {
        printValue(pop());
        printf("\n");
        break;
      }
//< Global Variables interpret-print
//> Jumping Back and Forth op-jump
      case OP_JUMP: {
        uint16_t offset = READ_SHORT();
/* Jumping Back and Forth op-jump < Calls and Functions jump
        vm.ip += offset;
*/
//> Calls and Functions jump
        frame->ip += offset;
//< Calls and Functions jump
        break;
      }
//< Jumping Back and Forth op-jump
//> Jumping Back and Forth op-jump-if-false
      case OP_JUMP_IF_FALSE: {
        uint16_t offset = READ_SHORT();
/* Jumping Back and Forth op-jump-if-false < Calls and Functions jump-if-false
        if (isFalsey(peek(0))) vm.ip += offset;
*/
//> Calls and Functions jump-if-false
        if (isFalsey(peek(0))) frame->ip += offset;
//< Calls and Functions jump-if-false
        break;
      }
//< Jumping Back and Forth op-jump-if-false
//> Jumping Back and Forth op-loop
      case OP_LOOP: {
        uint16_t offset = READ_SHORT();
/* Jumping Back and Forth op-loop < Calls and Functions loop
        vm.ip -= offset;
*/
//> Calls and Functions loop
        frame->ip -= offset;
//< Calls and Functions loop
        break;
      }
//< Jumping Back and Forth op-loop
//> Calls and Functions interpret-call
      case OP_CALL: {
        int argCount = READ_BYTE();
        if (!callValue(peek(argCount), argCount)) {
          return INTERPRET_RUNTIME_ERROR;
        }
//> update-frame-after-call
        frame = &vm.frames[vm.frameCount - 1];
//< update-frame-after-call
        break;
      }
//< Calls and Functions interpret-call
//> Methods and Initializers interpret-invoke
      case OP_INVOKE: {
        ObjString* method = READ_STRING();
        int argCount = READ_BYTE();
        if (!invoke(method, argCount)) {
          return INTERPRET_RUNTIME_ERROR;
        }
        frame = &vm.frames[vm.frameCount - 1];
        break;
      }
//< Methods and Initializers interpret-invoke
//> Superclasses interpret-super-invoke
      case OP_SUPER_INVOKE: {
        ObjString* method = READ_STRING();
        int argCount = READ_BYTE();
        ObjClass* superclass = AS_CLASS(pop());
        if (!invokeFromClass(superclass, method, argCount)) {
          return INTERPRET_RUNTIME_ERROR;
        }
        frame = &vm.frames[vm.frameCount - 1];
        break;
      }
//< Superclasses interpret-super-invoke
//> Closures interpret-closure
      case OP_CLOSURE: {
        ObjFunction* function = AS_FUNCTION(READ_CONSTANT());
        ObjClosure* closure = newClosure(function);
        push(OBJ_VAL(closure));
//> interpret-capture-upvalues
        for (int i = 0; i < closure->upvalueCount; i++) {
          uint8_t isLocal = READ_BYTE();
          uint8_t index = READ_BYTE();
          if (isLocal) {
            closure->upvalues[i] =
                captureUpvalue(frame->slots + index);
          } else {
            closure->upvalues[i] = frame->closure->upvalues[index];
          }
        }
//< interpret-capture-upvalues
        break;
      }
//< Closures interpret-closure
//> Closures interpret-close-upvalue
      case OP_CLOSE_UPVALUE:
        closeUpvalues(vm.stackTop - 1);
        pop();
        break;
//< Closures interpret-close-upvalue
      case OP_RETURN: {
/* A Virtual Machine print-return < Global Variables op-return
        printValue(pop());
        printf("\n");
*/
/* Global Variables op-return < Calls and Functions interpret-return
        // Exit interpreter.
*/
/* A Virtual Machine run < Calls and Functions interpret-return
        return INTERPRET_OK;
*/
//> Calls and Functions interpret-return
        Value result = pop();
//> Closures return-close-upvalues
        closeUpvalues(frame->slots);
//< Closures return-close-upvalues
        vm.frameCount--;
        if (vm.frameCount == 0) {
          pop();
          return INTERPRET_OK;
        }

        vm.stackTop = frame->slots;
        push(result);
        frame = &vm.frames[vm.frameCount - 1];
        break;
//< Calls and Functions interpret-return
      }
//> Classes and Instances interpret-class
      case OP_CLASS:
        push(OBJ_VAL(newClass(READ_STRING())));
        break;
//< Classes and Instances interpret-class
//> Superclasses interpret-inherit
      case OP_INHERIT: {
        Value superclass = peek(1);
//> inherit-non-class
        if (!IS_CLASS(superclass)) {
          runtimeError("Superclass must be a class.");
          return INTERPRET_RUNTIME_ERROR;
        }

//< inherit-non-class
        ObjClass* subclass = AS_CLASS(peek(0));
        tableAddAll(&AS_CLASS(superclass)->methods,
                    &subclass->methods);
        pop(); // Subclass.
        break;
      }
//< Superclasses interpret-inherit
//> Methods and Initializers interpret-method
      case OP_METHOD:
        defineMethod(READ_STRING());
        break;
//< Methods and Initializers interpret-method
    }
  }

#undef READ_BYTE
//> Jumping Back and Forth undef-read-short
#undef READ_SHORT
//< Jumping Back and Forth undef-read-short
//> undef-read-constant
#undef READ_CONSTANT
//< undef-read-constant
//> Global Variables undef-read-string
#undef READ_STRING
//< Global Variables undef-read-string
//> undef-binary-op
#undef BINARY_OP
//< undef-binary-op
}
//< run
//> omit
void hack(bool b) {
  // Hack to avoid unused function error. run() is not used in the
  // scanning chapter.
  run();
  if (b) hack(false);
}
//< omit
//> interpret
/* A Virtual Machine interpret < Scanning on Demand vm-interpret-c
InterpretResult interpret(Chunk* chunk) {
  vm.chunk = chunk;
  vm.ip = vm.chunk->code;
  return run();
*/
//> Scanning on Demand vm-interpret-c
InterpretResult interpret(const char* source) {
/* Scanning on Demand vm-interpret-c < Compiling Expressions interpret-chunk
  compile(source);
  return INTERPRET_OK;
*/
/* Compiling Expressions interpret-chunk < Calls and Functions interpret-stub
  Chunk chunk;
  initChunk(&chunk);

  if (!compile(source, &chunk)) {
    freeChunk(&chunk);
    return INTERPRET_COMPILE_ERROR;
  }

  vm.chunk = &chunk;
  vm.ip = vm.chunk->code;
*/
//> Calls and Functions interpret-stub
  ObjFunction* function = compile(source);
  if (function == NULL) return INTERPRET_COMPILE_ERROR;

  push(OBJ_VAL(function));
//< Calls and Functions interpret-stub
/* Calls and Functions interpret-stub < Calls and Functions interpret
  CallFrame* frame = &vm.frames[vm.frameCount++];
  frame->function = function;
  frame->ip = function->chunk.code;
  frame->slots = vm.stack;
*/
/* Calls and Functions interpret < Closures interpret
  call(function, 0);
*/
//> Closures interpret
  ObjClosure* closure = newClosure(function);
  pop();
  push(OBJ_VAL(closure));
  call(closure, 0);
//< Closures interpret
//< Scanning on Demand vm-interpret-c
//> Compiling Expressions interpret-chunk

/* Compiling Expressions interpret-chunk < Calls and Functions end-interpret
  InterpretResult result = run();

  freeChunk(&chunk);
  return result;
*/
//> Calls and Functions end-interpret
  return run();
//< Calls and Functions end-interpret
//< Compiling Expressions interpret-chunk
}
//< interpret


================================================
FILE: c/vm.h
================================================
//> A Virtual Machine vm-h
#ifndef clox_vm_h
#define clox_vm_h

/* A Virtual Machine vm-h < Calls and Functions vm-include-object
#include "chunk.h"
*/
//> Calls and Functions vm-include-object
#include "object.h"
//< Calls and Functions vm-include-object
//> Hash Tables vm-include-table
#include "table.h"
//< Hash Tables vm-include-table
//> vm-include-value
#include "value.h"
//< vm-include-value
//> stack-max

//< stack-max
/* A Virtual Machine stack-max < Calls and Functions frame-max
#define STACK_MAX 256
*/
//> Calls and Functions frame-max
#define FRAMES_MAX 64
#define STACK_MAX (FRAMES_MAX * UINT8_COUNT)
//< Calls and Functions frame-max
//> Calls and Functions call-frame

typedef struct {
/* Calls and Functions call-frame < Closures call-frame-closure
  ObjFunction* function;
*/
//> Closures call-frame-closure
  ObjClosure* closure;
//< Closures call-frame-closure
  uint8_t* ip;
  Value* slots;
} CallFrame;
//< Calls and Functions call-frame

typedef struct {
/* A Virtual Machine vm-h < Calls and Functions frame-array
  Chunk* chunk;
*/
/* A Virtual Machine ip < Calls and Functions frame-array
  uint8_t* ip;
*/
//> Calls and Functions frame-array
  CallFrame frames[FRAMES_MAX];
  int frameCount;
  
//< Calls and Functions frame-array
//> vm-stack
  Value stack[STACK_MAX];
  Value* stackTop;
//< vm-stack
//> Global Variables vm-globals
  Table globals;
//< Global Variables vm-globals
//> Hash Tables vm-strings
  Table strings;
//< Hash Tables vm-strings
//> Methods and Initializers vm-init-string
  ObjString* initString;
//< Methods and Initializers vm-init-string
//> Closures open-upvalues-field
  ObjUpvalue* openUpvalues;
//< Closures open-upvalues-field
//> Garbage Collection vm-fields

  size_t bytesAllocated;
  size_t nextGC;
//< Garbage Collection vm-fields
//> Strings objects-root
  Obj* objects;
//< Strings objects-root
//> Garbage Collection vm-gray-stack
  int grayCount;
  int grayCapacity;
  Obj** grayStack;
//< Garbage Collection vm-gray-stack
} VM;

//> interpret-result
typedef enum {
  INTERPRET_OK,
  INTERPRET_COMPILE_ERROR,
  INTERPRET_RUNTIME_ERROR
} InterpretResult;

//< interpret-result
//> Strings extern-vm
extern VM vm;

//< Strings extern-vm
void initVM();
void freeVM();
/* A Virtual Machine interpret-h < Scanning on Demand vm-interpret-h
InterpretResult interpret(Chunk* chunk);
*/
//> Scanning on Demand vm-interpret-h
InterpretResult interpret(const char* source);
//< Scanning on Demand vm-interpret-h
//> push-pop
void push(Value value);
Value pop();
//< push-pop

#endif


================================================
FILE: java/com/craftinginterpreters/lox/AstPrinter.java
================================================
//> Representing Code ast-printer
package com.craftinginterpreters.lox;
//> omit

import java.util.List;
//< omit

/* Representing Code ast-printer < Statements and State omit
class AstPrinter implements Expr.Visitor<String> {
*/
//> Statements and State omit
class AstPrinter implements Expr.Visitor<String>, Stmt.Visitor<String> {
//< Statements and State omit
  String print(Expr expr) {
    return expr.accept(this);
  }
//> Statements and State omit

  String print(Stmt stmt) {
    return stmt.accept(this);
  }
//< Statements and State omit
//> visit-methods
//> Statements and State omit
  @Override
  public String visitBlockStmt(Stmt.Block stmt) {
    StringBuilder builder = new StringBuilder();
    builder.append("(block ");

    for (Stmt statement : stmt.statements) {
      builder.append(statement.accept(this));
    }

    builder.append(")");
    return builder.toString();
  }
//< Statements and State omit
//> Classes omit

  @Override
  public String visitClassStmt(Stmt.Class stmt) {
    StringBuilder builder = new StringBuilder();
    builder.append("(class " + stmt.name.lexeme);
//> Inheritance omit

    if (stmt.superclass != null) {
      builder.append(" < " + print(stmt.superclass));
    }
//< Inheritance omit

    for (Stmt.Function method : stmt.methods) {
      builder.append(" " + print(method));
    }

    builder.append(")");
    return builder.toString();
  }
//< Classes omit
//> Statements and State omit

  @Override
  public String visitExpressionStmt(Stmt.Expression stmt) {
    return parenthesize(";", stmt.expression);
  }
//< Statements and State omit
//> Functions omit

  @Override
  public String visitFunctionStmt(Stmt.Function stmt) {
    StringBuilder builder = new StringBuilder();
    builder.append("(fun " + stmt.name.lexeme + "(");

    for (Token param : stmt.params) {
      if (param != stmt.params.get(0)) builder.append(" ");
      builder.append(param.lexeme);
    }

    builder.append(") ");

    for (Stmt body : stmt.body) {
      builder.append(body.accept(this));
    }

    builder.append(")");
    return builder.toString();
  }
//< Functions omit
//> Control Flow omit

  @Override
  public String visitIfStmt(Stmt.If stmt) {
    if (stmt.elseBranch == null) {
      return parenthesize2("if", stmt.condition, stmt.thenBranch);
    }

    return parenthesize2("if-else", stmt.condition, stmt.thenBranch,
        stmt.elseBranch);
  }
//< Control Flow omit
//> Statements and State omit

  @Override
  public String visitPrintStmt(Stmt.Print stmt) {
    return parenthesize("print", stmt.expression);
  }
//< Statements and State omit
//> Functions omit

  @Override
  public String visitReturnStmt(Stmt.Return stmt) {
    if (stmt.value == null) return "(return)";
    return parenthesize("return", stmt.value);
  }
//< Functions omit
//> Statements and State omit

  @Override
  public String visitVarStmt(Stmt.Var stmt) {
    if (stmt.initializer == null) {
      return parenthesize2("var", stmt.name);
    }

    return parenthesize2("var", stmt.name, "=", stmt.initializer);
  }
//< Statements and State omit
//> Control Flow omit

  @Override
  public String visitWhileStmt(Stmt.While stmt) {
    return parenthesize2("while", stmt.condition, stmt.body);
  }
//< Control Flow omit
//> Statements and State omit

  @Override
  public String visitAssignExpr(Expr.Assign expr) {
    return parenthesize2("=", expr.name.lexeme, expr.value);
  }
//< Statements and State omit

  @Override
  public String visitBinaryExpr(Expr.Binary expr) {
    return parenthesize(expr.operator.lexeme,
                        expr.left, expr.right);
  }
//> Functions omit

  @Override
  public String visitCallExpr(Expr.Call expr) {
    return parenthesize2("call", expr.callee, expr.arguments);
  }
//< Functions omit
//> Classes omit

  @Override
  public String visitGetExpr(Expr.Get expr) {
    return parenthesize2(".", expr.object, expr.name.lexeme);
  }
//< Classes omit

  @Override
  public String visitGroupingExpr(Expr.Grouping expr) {
    return parenthesize("group", expr.expression);
  }

  @Override
  public String visitLiteralExpr(Expr.Literal expr) {
    if (expr.value == null) return "nil";
    return expr.value.toString();
  }
//> Control Flow omit

  @Override
  public String visitLogicalExpr(Expr.Logical expr) {
    return parenthesize(expr.operator.lexeme, expr.left, expr.right);
  }
//< Control Flow omit
//> Classes omit

  @Override
  public String visitSetExpr(Expr.Set expr) {
    return parenthesize2("=",
        expr.object, expr.name.lexeme, expr.value);
  }
//< Classes omit
//> Inheritance omit

  @Override
  public String visitSuperExpr(Expr.Super expr) {
    return parenthesize2("super", expr.method);
  }
//< Inheritance omit
//> Classes omit

  @Override
  public String visitThisExpr(Expr.This expr) {
    return "this";
  }
//< Classes omit

  @Override
  public String visitUnaryExpr(Expr.Unary expr) {
    return parenthesize(expr.operator.lexeme, expr.right);
  }
//> Statements and State omit

  @Override
  public String visitVariableExpr(Expr.Variable expr) {
    return expr.name.lexeme;
  }
//< Statements and State omit
//< visit-methods
//> print-utilities
  private String parenthesize(String name, Expr... exprs) {
    StringBuilder builder = new StringBuilder();

    builder.append("(").append(name);
    for (Expr expr : exprs) {
      builder.append(" ");
      builder.append(expr.accept(this));
    }
    builder.append(")");

    return builder.toString();
  }
//< print-utilities
//> omit
  // Note: AstPrinting other types of syntax trees is not shown in the
  // book, but this is provided here as a reference for those reading
  // the full code.
  private String parenthesize2(String name, Object... parts) {
    StringBuilder builder = new StringBuilder();

    builder.append("(").append(name);
    transform(builder, parts);
    builder.append(")");

    return builder.toString();
  }

  private void transform(StringBuilder builder, Object... parts) {
    for (Object part : parts) {
      builder.append(" ");
      if (part instanceof Expr) {
        builder.append(((Expr)part).accept(this));
//> Statements and State omit
      } else if (part instanceof Stmt) {
        builder.append(((Stmt) part).accept(this));
//< Statements and State omit
      } else if (part instanceof Token) {
        builder.append(((Token) part).lexeme);
      } else if (part instanceof List) {
        transform(builder, ((List) part).toArray());
      } else {
        builder.append(part);
      }
    }
  }
//< omit
/* Representing Code printer-main < Representing Code omit
  public static void main(String[] args) {
    Expr expression = new Expr.Binary(
        new Expr.Unary(
            new Token(TokenType.MINUS, "-", null, 1),
            new Expr.Literal(123)),
        new Token(TokenType.STAR, "*", null, 1),
        new Expr.Grouping(
            new Expr.Literal(45.67)));

    System.out.println(new AstPrinter().print(expression));
  }
*/
}


================================================
FILE: java/com/craftinginterpreters/lox/Environment.java
================================================
//> Statements and State environment-class
package com.craftinginterpreters.lox;

import java.util.HashMap;
import java.util.Map;

class Environment {
//> enclosing-field
  final Environment enclosing;
//< enclosing-field
  private final Map<String, Object> values = new HashMap<>();
//> environment-constructors
  Environment() {
    enclosing = null;
  }

  Environment(Environment enclosing) {
    this.enclosing = enclosing;
  }
//< environment-constructors
//> environment-get

  Object get(Token name) {
    if (values.containsKey(name.lexeme)) {
      return values.get(name.lexeme);
    }
//> environment-get-enclosing

    if (enclosing != null) return enclosing.get(name);
//< environment-get-enclosing

    throw new RuntimeError(name,
        "Undefined variable '" + name.lexeme + "'.");
  }

//< environment-get
//> environment-assign
  void assign(Token name, Object value) {
    if (values.containsKey(name.lexeme)) {
      values.put(name.lexeme, value);
      return;
    }

//> environment-assign-enclosing
    if (enclosing != null) {
      enclosing.assign(name, value);
      return;
    }

//< environment-assign-enclosing
    throw new RuntimeError(name,
        "Undefined variable '" + name.lexeme + "'.");
  }
//< environment-assign
//> environment-define
  void define(String name, Object value) {
    values.put(name, value);
  }
//< environment-define
//> Resolving and Binding ancestor
  Environment ancestor(int distance) {
    Environment environment = this;
    for (int i = 0; i < distance; i++) {
      environment = environment.enclosing; // [coupled]
    }

    return environment;
  }
//< Resolving and Binding ancestor
//> Resolving and Binding get-at
  Object getAt(int distance, String name) {
    return ancestor(distance).values.get(name);
  }
//< Resolving and Binding get-at
//> Resolving and Binding assign-at
  void assignAt(int distance, Token name, Object value) {
    ancestor(distance).values.put(name.lexeme, value);
  }
//< Resolving and Binding assign-at
//> omit
  @Override
  public String toString() {
    String result = values.toString();
    if (enclosing != null) {
      result += " -> " + enclosing.toString();
    }

    return result;
  }
//< omit
}


================================================
FILE: java/com/craftinginterpreters/lox/Expr.java
================================================
//> Appendix II expr
package com.craftinginterpreters.lox;

import java.util.List;

abstract class Expr {
  interface Visitor<R> {
    R visitAssignExpr(Assign expr);
    R visitBinaryExpr(Binary expr);
    R visitCallExpr(Call expr);
    R visitGetExpr(Get expr);
    R visitGroupingExpr(Grouping expr);
    R visitLiteralExpr(Literal expr);
    R visitLogicalExpr(Logical expr);
    R visitSetExpr(Set expr);
    R visitSuperExpr(Super expr);
    R visitThisExpr(This expr);
    R visitUnaryExpr(Unary expr);
    R visitVariableExpr(Variable expr);
  }

  // Nested Expr classes here...
//> expr-assign
  static class Assign extends Expr {
    Assign(Token name, Expr value) {
      this.name = name;
      this.value = value;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitAssignExpr(this);
    }

    final Token name;
    final Expr value;
  }
//< expr-assign
//> expr-binary
  static class Binary extends Expr {
    Binary(Expr left, Token operator, Expr right) {
      this.left = left;
      this.operator = operator;
      this.right = right;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitBinaryExpr(this);
    }

    final Expr left;
    final Token operator;
    final Expr right;
  }
//< expr-binary
//> expr-call
  static class Call extends Expr {
    Call(Expr callee, Token paren, List<Expr> arguments) {
      this.callee = callee;
      this.paren = paren;
      this.arguments = arguments;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitCallExpr(this);
    }

    final Expr callee;
    final Token paren;
    final List<Expr> arguments;
  }
//< expr-call
//> expr-get
  static class Get extends Expr {
    Get(Expr object, Token name) {
      this.object = object;
      this.name = name;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitGetExpr(this);
    }

    final Expr object;
    final Token name;
  }
//< expr-get
//> expr-grouping
  static class Grouping extends Expr {
    Grouping(Expr expression) {
      this.expression = expression;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitGroupingExpr(this);
    }

    final Expr expression;
  }
//< expr-grouping
//> expr-literal
  static class Literal extends Expr {
    Literal(Object value) {
      this.value = value;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitLiteralExpr(this);
    }

    final Object value;
  }
//< expr-literal
//> expr-logical
  static class Logical extends Expr {
    Logical(Expr left, Token operator, Expr right) {
      this.left = left;
      this.operator = operator;
      this.right = right;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitLogicalExpr(this);
    }

    final Expr left;
    final Token operator;
    final Expr right;
  }
//< expr-logical
//> expr-set
  static class Set extends Expr {
    Set(Expr object, Token name, Expr value) {
      this.object = object;
      this.name = name;
      this.value = value;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitSetExpr(this);
    }

    final Expr object;
    final Token name;
    final Expr value;
  }
//< expr-set
//> expr-super
  static class Super extends Expr {
    Super(Token keyword, Token method) {
      this.keyword = keyword;
      this.method = method;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitSuperExpr(this);
    }

    final Token keyword;
    final Token method;
  }
//< expr-super
//> expr-this
  static class This extends Expr {
    This(Token keyword) {
      this.keyword = keyword;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitThisExpr(this);
    }

    final Token keyword;
  }
//< expr-this
//> expr-unary
  static class Unary extends Expr {
    Unary(Token operator, Expr right) {
      this.operator = operator;
      this.right = right;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitUnaryExpr(this);
    }

    final Token operator;
    final Expr right;
  }
//< expr-unary
//> expr-variable
  static class Variable extends Expr {
    Variable(Token name) {
      this.name = name;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitVariableExpr(this);
    }

    final Token name;
  }
//< expr-variable

  abstract <R> R accept(Visitor<R> visitor);
}
//< Appendix II expr


================================================
FILE: java/com/craftinginterpreters/lox/Interpreter.java
================================================
//> Evaluating Expressions interpreter-class
package com.craftinginterpreters.lox;
//> Statements and State import-list

//> Functions import-array-list
import java.util.ArrayList;
//< Functions import-array-list
//> Resolving and Binding import-hash-map
import java.util.HashMap;
//< Resolving and Binding import-hash-map
import java.util.List;
//< Statements and State import-list
//> Resolving and Binding import-map
import java.util.Map;
//< Resolving and Binding import-map

/* Evaluating Expressions interpreter-class < Statements and State interpreter
class Interpreter implements Expr.Visitor<Object> {
*/
//> Statements and State interpreter
class Interpreter implements Expr.Visitor<Object>,
                             Stmt.Visitor<Void> {
//< Statements and State interpreter
/* Statements and State environment-field < Functions global-environment
  private Environment environment = new Environment();
*/
//> Functions global-environment
  final Environment globals = new Environment();
  private Environment environment = globals;
//< Functions global-environment
//> Resolving and Binding locals-field
  private final Map<Expr, Integer> locals = new HashMap<>();
//< Resolving and Binding locals-field
//> Statements and State environment-field

//< Statements and State environment-field
//> Functions interpreter-constructor
  Interpreter() {
    globals.define("clock", new LoxCallable() {
      @Override
      public int arity() { return 0; }

      @Override
      public Object call(Interpreter interpreter,
                         List<Object> arguments) {
        return (double)System.currentTimeMillis() / 1000.0;
      }

      @Override
      public String toString() { return "<native fn>"; }
    });
  }
  
//< Functions interpreter-constructor
/* Evaluating Expressions interpret < Statements and State interpret
  void interpret(Expr expression) { // [void]
    try {
      Object value = evaluate(expression);
      System.out.println(stringify(value));
    } catch (RuntimeError error) {
      Lox.runtimeError(error);
    }
  }
*/
//> Statements and State interpret
  void interpret(List<Stmt> statements) {
    try {
      for (Stmt statement : statements) {
        execute(statement);
      }
    } catch (RuntimeError error) {
      Lox.runtimeError(error);
    }
  }
//< Statements and State interpret
//> evaluate
  private Object evaluate(Expr expr) {
    return expr.accept(this);
  }
//< evaluate
//> Statements and State execute
  private void execute(Stmt stmt) {
    stmt.accept(this);
  }
//< Statements and State execute
//> Resolving and Binding resolve
  void resolve(Expr expr, int depth) {
    locals.put(expr, depth);
  }
//< Resolving and Binding resolve
//> Statements and State execute-block
  void executeBlock(List<Stmt> statements,
                    Environment environment) {
    Environment previous = this.environment;
    try {
      this.environment = environment;

      for (Stmt statement : statements) {
        execute(statement);
      }
    } finally {
      this.environment = previous;
    }
  }
//< Statements and State execute-block
//> Statements and State visit-block
  @Override
  public Void visitBlockStmt(Stmt.Block stmt) {
    executeBlock(stmt.statements, new Environment(environment));
    return null;
  }
//< Statements and State visit-block
//> Classes interpreter-visit-class
  @Override
  public Void visitClassStmt(Stmt.Class stmt) {
//> Inheritance interpret-superclass
    Object superclass = null;
    if (stmt.superclass != null) {
      superclass = evaluate(stmt.superclass);
      if (!(superclass instanceof LoxClass)) {
        throw new RuntimeError(stmt.superclass.name,
            "Superclass must be a class.");
      }
    }

//< Inheritance interpret-superclass
    environment.define(stmt.name.lexeme, null);
//> Inheritance begin-superclass-environment

    if (stmt.superclass != null) {
      environment = new Environment(environment);
      environment.define("super", superclass);
    }
//< Inheritance begin-superclass-environment
//> interpret-methods

    Map<String, LoxFunction> methods = new HashMap<>();
    for (Stmt.Function method : stmt.methods) {
/* Classes interpret-methods < Classes interpreter-method-initializer
      LoxFunction function = new LoxFunction(method, environment);
*/
//> interpreter-method-initializer
      LoxFunction function = new LoxFunction(method, environment,
          method.name.lexeme.equals("init"));
//< interpreter-method-initializer
      methods.put(method.name.lexeme, function);
    }

/* Classes interpret-methods < Inheritance interpreter-construct-class
    LoxClass klass = new LoxClass(stmt.name.lexeme, methods);
*/
//> Inheritance interpreter-construct-class
    LoxClass klass = new LoxClass(stmt.name.lexeme,
        (LoxClass)superclass, methods);
//> end-superclass-environment

    if (superclass != null) {
      environment = environment.enclosing;
    }
//< end-superclass-environment

//< Inheritance interpreter-construct-class
//< interpret-methods
/* Classes interpreter-visit-class < Classes interpret-methods
    LoxClass klass = new LoxClass(stmt.name.lexeme);
*/
    environment.assign(stmt.name, klass);
    return null;
  }
//< Classes interpreter-visit-class
//> Statements and State visit-expression-stmt
  @Override
  public Void visitExpressionStmt(Stmt.Expression stmt) {
    evaluate(stmt.expression);
    return null;
  }
//< Statements and State visit-expression-stmt
//> Functions visit-function
  @Override
  public Void visitFunctionStmt(Stmt.Function stmt) {
/* Functions visit-function < Functions visit-closure
    LoxFunction function = new LoxFunction(stmt);
*/
/* Functions visit-closure < Classes construct-function
    LoxFunction function = new LoxFunction(stmt, environment);
*/
//> Classes construct-function
    LoxFunction function = new LoxFunction(stmt, environment,
                                           false);
//< Classes construct-function
    environment.define(stmt.name.lexeme, function);
    return null;
  }
//< Functions visit-function
//> Control Flow visit-if
  @Override
  public Void visitIfStmt(Stmt.If stmt) {
    if (isTruthy(evaluate(stmt.condition))) {
      execute(stmt.thenBranch);
    } else if (stmt.elseBranch != null) {
      execute(stmt.elseBranch);
    }
    return null;
  }
//< Control Flow visit-if
//> Statements and State visit-print
  @Override
  public Void visitPrintStmt(Stmt.Print stmt) {
    Object value = evaluate(stmt.expression);
    System.out.println(stringify(value));
    return null;
  }
//< Statements and State visit-print
//> Functions visit-return
  @Override
  public Void visitReturnStmt(Stmt.Return stmt) {
    Object value = null;
    if (stmt.value != null) value = evaluate(stmt.value);

    throw new Return(value);
  }
//< Functions visit-return
//> Statements and State visit-var
  @Override
  public Void visitVarStmt(Stmt.Var stmt) {
    Object value = null;
    if (stmt.initializer != null) {
      value = evaluate(stmt.initializer);
    }

    environment.define(stmt.name.lexeme, value);
    return null;
  }
//< Statements and State visit-var
//> Control Flow visit-while
  @Override
  public Void visitWhileStmt(Stmt.While stmt) {
    while (isTruthy(evaluate(stmt.condition))) {
      execute(stmt.body);
    }
    return null;
  }
//< Control Flow visit-while
//> Statements and State visit-assign
  @Override
  public Object visitAssignExpr(Expr.Assign expr) {
    Object value = evaluate(expr.value);
/* Statements and State visit-assign < Resolving and Binding resolved-assign
    environment.assign(expr.name, value);
*/
//> Resolving and Binding resolved-assign

    Integer distance = locals.get(expr);
    if (distance != null) {
      environment.assignAt(distance, expr.name, value);
    } else {
      globals.assign(expr.name, value);
    }

//< Resolving and Binding resolved-assign
    return value;
  }
//< Statements and State visit-assign
//> visit-binary
  @Override
  public Object visitBinaryExpr(Expr.Binary expr) {
    Object left = evaluate(expr.left);
    Object right = evaluate(expr.right); // [left]

    switch (expr.operator.type) {
//> binary-equality
      case BANG_EQUAL: return !isEqual(left, right);
      case EQUAL_EQUAL: return isEqual(left, right);
//< binary-equality
//> binary-comparison
      case GREATER:
//> check-greater-operand
        checkNumberOperands(expr.operator, left, right);
//< check-greater-operand
        return (double)left > (double)right;
      case GREATER_EQUAL:
//> check-greater-equal-operand
        checkNumberOperands(expr.operator, left, right);
//< check-greater-equal-operand
        return (double)left >= (double)right;
      case LESS:
//> check-less-operand
        checkNumberOperands(expr.operator, left, right);
//< check-less-operand
        return (double)left < (double)right;
      case LESS_EQUAL:
//> check-less-equal-operand
        checkNumberOperands(expr.operator, left, right);
//< check-less-equal-operand
        return (double)left <= (double)right;
//< binary-comparison
      case MINUS:
//> check-minus-operand
        checkNumberOperands(expr.operator, left, right);
//< check-minus-operand
        return (double)left - (double)right;
//> binary-plus
      case PLUS:
        if (left instanceof Double && right instanceof Double) {
          return (double)left + (double)right;
        } // [plus]

        if (left instanceof String && right instanceof String) {
          return (String)left + (String)right;
        }

/* Evaluating Expressions binary-plus < Evaluating Expressions string-wrong-type
        break;
*/
//> string-wrong-type
        throw new RuntimeError(expr.operator,
            "Operands must be two numbers or two strings.");
//< string-wrong-type
//< binary-plus
      case SLASH:
//> check-slash-operand
        checkNumberOperands(expr.operator, left, right);
//< check-slash-operand
        return (double)left / (double)right;
      case STAR:
//> check-star-operand
        checkNumberOperands(expr.operator, left, right);
//< check-star-operand
        return (double)left * (double)right;
    }

    // Unreachable.
    return null;
  }
//< visit-binary
//> Functions visit-call
  @Override
  public Object visitCallExpr(Expr.Call expr) {
    Object callee = evaluate(expr.callee);

    List<Object> arguments = new ArrayList<>();
    for (Expr argument : expr.arguments) { // [in-order]
      arguments.add(evaluate(argument));
    }

//> check-is-callable
    if (!(callee instanceof LoxCallable)) {
      throw new RuntimeError(expr.paren,
          "Can only call functions and classes.");
    }

//< check-is-callable
    LoxCallable function = (LoxCallable)callee;
//> check-arity
    if (arguments.size() != function.arity()) {
      throw new RuntimeError(expr.paren, "Expected " +
          function.arity() + " arguments but got " +
          arguments.size() + ".");
    }

//< check-arity
    return function.call(this, arguments);
  }
//< Functions visit-call
//> Classes interpreter-visit-get
  @Override
  public Object visitGetExpr(Expr.Get expr) {
    Object object = evaluate(expr.object);
    if (object instanceof LoxInstance) {
      return ((LoxInstance) object).get(expr.name);
    }

    throw new RuntimeError(expr.name,
        "Only instances have properties.");
  }
//< Classes interpreter-visit-get
//> visit-grouping
  @Override
  public Object visitGroupingExpr(Expr.Grouping expr) {
    return evaluate(expr.expression);
  }
//< visit-grouping
//> visit-literal
  @Override
  public Object visitLiteralExpr(Expr.Literal expr) {
    return expr.value;
  }
//< visit-literal
//> Control Flow visit-logical
  @Override
  public Object visitLogicalExpr(Expr.Logical expr) {
    Object left = evaluate(expr.left);

    if (expr.operator.type == TokenType.OR) {
      if (isTruthy(left)) return left;
    } else {
      if (!isTruthy(left)) return left;
    }

    return evaluate(expr.right);
  }
//< Control Flow visit-logical
//> Classes interpreter-visit-set
  @Override
  public Object visitSetExpr(Expr.Set expr) {
    Object object = evaluate(expr.object);

    if (!(object instanceof LoxInstance)) { // [order]
      throw new RuntimeError(expr.name,
                             "Only instances have fields.");
    }

    Object value = evaluate(expr.value);
    ((LoxInstance)object).set(expr.name, value);
    return value;
  }
//< Classes interpreter-visit-set
//> Inheritance interpreter-visit-super
  @Override
  public Object visitSuperExpr(Expr.Super expr) {
    int distance = locals.get(expr);
    LoxClass superclass = (LoxClass)environment.getAt(
        distance, "super");
//> super-find-this

    LoxInstance object = (LoxInstance)environment.getAt(
        distance - 1, "this");
//< super-find-this
//> super-find-method

    LoxFunction method = superclass.findMethod(expr.method.lexeme);
//> super-no-method

    if (method == null) {
      throw new RuntimeError(expr.method,
          "Undefined property '" + expr.method.lexeme + "'.");
    }

//< super-no-method
    return method.bind(object);
//< super-find-method
  }
//< Inheritance interpreter-visit-super
//> Classes interpreter-visit-this
  @Override
  public Object visitThisExpr(Expr.This expr) {
    return lookUpVariable(expr.keyword, expr);
  }
//< Classes interpreter-visit-this
//> visit-unary
  @Override
  public Object visitUnaryExpr(Expr.Unary expr) {
    Object right = evaluate(expr.right);

    switch (expr.operator.type) {
//> unary-bang
      case BANG:
        return !isTruthy(right);
//< unary-bang
      case MINUS:
//> check-unary-operand
        checkNumberOperand(expr.operator, right);
//< check-unary-operand
        return -(double)right;
    }

    // Unreachable.
    return null;
  }
//< visit-unary
//> Statements and State visit-variable
  @Override
  public Object visitVariableExpr(Expr.Variable expr) {
/* Statements and State visit-variable < Resolving and Binding call-look-up-variable
    return environment.get(expr.name);
*/
//> Resolving and Binding call-look-up-variable
    return lookUpVariable(expr.name, expr);
//< Resolving and Binding call-look-up-variable
  }
//> Resolving and Binding look-up-variable
  private Object lookUpVariable(Token name, Expr expr) {
    Integer distance = locals.get(expr);
    if (distance != null) {
      return environment.getAt(distance, name.lexeme);
    } else {
      return globals.get(name);
    }
  }
//< Resolving and Binding look-up-variable
//< Statements and State visit-variable
//> check-operand
  private void checkNumberOperand(Token operator, Object operand) {
    if (operand instanceof Double) return;
    throw new RuntimeError(operator, "Operand must be a number.");
  }
//< check-operand
//> check-operands
  private void checkNumberOperands(Token operator,
                                   Object left, Object right) {
    if (left instanceof Double && right instanceof Double) return;
    // [operand]
    throw new RuntimeError(operator, "Operands must be numbers.");
  }
//< check-operands
//> is-truthy
  private boolean isTruthy(Object object) {
    if (object == null) return false;
    if (object instanceof Boolean) return (boolean)object;
    return true;
  }
//< is-truthy
//> is-equal
  private boolean isEqual(Object a, Object b) {
    if (a == null && b == null) return true;
    if (a == null) return false;

    return a.equals(b);
  }
//< is-equal
//> stringify
  private String stringify(Object object) {
    if (object == null) return "nil";

    if (object instanceof Double) {
      String text = object.toString();
      if (text.endsWith(".0")) {
        text = text.substring(0, text.length() - 2);
      }
      return text;
    }

    return object.toString();
  }
//< stringify
}


================================================
FILE: java/com/craftinginterpreters/lox/Lox.java
================================================
//> Scanning lox-class
package com.craftinginterpreters.lox;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

public class Lox {
//> Evaluating Expressions interpreter-instance
  private static final Interpreter interpreter = new Interpreter();
//< Evaluating Expressions interpreter-instance
//> had-error
  static boolean hadError = false;
//< had-error
//> Evaluating Expressions had-runtime-error-field
  static boolean hadRuntimeError = false;

//< Evaluating Expressions had-runtime-error-field
  public static void main(String[] args) throws IOException {
    if (args.length > 1) {
      System.out.println("Usage: jlox [script]");
      System.exit(64); // [64]
    } else if (args.length == 1) {
      runFile(args[0]);
    } else {
      runPrompt();
    }
  }
//> run-file
  private static void runFile(String path) throws IOException {
    byte[] bytes = Files.readAllBytes(Paths.get(path));
    run(new String(bytes, Charset.defaultCharset()));
//> exit-code

    // Indicate an error in the exit code.
    if (hadError) System.exit(65);
//< exit-code
//> Evaluating Expressions check-runtime-error
    if (hadRuntimeError) System.exit(70);
//< Evaluating Expressions check-runtime-error
  }
//< run-file
//> prompt
  private static void runPrompt() throws IOException {
    InputStreamReader input = new InputStreamReader(System.in);
    BufferedReader reader = new BufferedReader(input);

    for (;;) { // [repl]
      System.out.print("> ");
      String line = reader.readLine();
      if (line == null) break;
      run(line);
//> reset-had-error
      hadError = false;
//< reset-had-error
    }
  }
//< prompt
//> run
  private static void run(String source) {
    Scanner scanner = new Scanner(source);
    List<Token> tokens = scanner.scanTokens();
/* Scanning run < Parsing Expressions print-ast

    // For now, just print the tokens.
    for (Token token : tokens) {
      System.out.println(token);
    }
*/
//> Parsing Expressions print-ast
    Parser parser = new Parser(tokens);
/* Parsing Expressions print-ast < Statements and State parse-statements
    Expr expression = parser.parse();
*/
//> Statements and State parse-statements
    List<Stmt> statements = parser.parse();
//< Statements and State parse-statements

    // Stop if there was a syntax error.
    if (hadError) return;

//< Parsing Expressions print-ast
//> Resolving and Binding create-resolver
    Resolver resolver = new Resolver(interpreter);
    resolver.resolve(statements);
//> resolution-error

    // Stop if there was a resolution error.
    if (hadError) return;
//< resolution-error

//< Resolving and Binding create-resolver
/* Parsing Expressions print-ast < Evaluating Expressions interpreter-interpret
    System.out.println(new AstPrinter().print(expression));
*/
/* Evaluating Expressions interpreter-interpret < Statements and State interpret-statements
    interpreter.interpret(expression);
*/
//> Statements and State interpret-statements
    interpreter.interpret(statements);
//< Statements and State interpret-statements
  }
//< run
//> lox-error
  static void error(int line, String message) {
    report(line, "", message);
  }

  private static void report(int line, String where,
                             String message) {
    System.err.println(
        "[line " + line + "] Error" + where + ": " + message);
    hadError = true;
  }
//< lox-error
//> Parsing Expressions token-error
  static void error(Token token, String message) {
    if (token.type == TokenType.EOF) {
      report(token.line, " at end", message);
    } else {
      report(token.line, " at '" + token.lexeme + "'", message);
    }
  }
//< Parsing Expressions token-error
//> Evaluating Expressions runtime-error-method
  static void runtimeError(RuntimeError error) {
    System.err.println(error.getMessage() +
        "\n[line " + error.token.line + "]");
    hadRuntimeError = true;
  }
//< Evaluating Expressions runtime-error-method
}


================================================
FILE: java/com/craftinginterpreters/lox/LoxCallable.java
================================================
//> Functions callable
package com.craftinginterpreters.lox;

import java.util.List;

interface LoxCallable {
//> callable-arity
  int arity();
//< callable-arity
  Object call(Interpreter interpreter, List<Object> arguments);
}


================================================
FILE: java/com/craftinginterpreters/lox/LoxClass.java
================================================
//> Classes lox-class
package com.craftinginterpreters.lox;

import java.util.List;
import java.util.Map;

/* Classes lox-class < Classes lox-class-callable
class LoxClass {
*/
//> lox-class-callable
class LoxClass implements LoxCallable {
//< lox-class-callable
  final String name;
//> Inheritance lox-class-superclass-field
  final LoxClass superclass;
//< Inheritance lox-class-superclass-field
/* Classes lox-class < Classes lox-class-methods

  LoxClass(String name) {
    this.name = name;
  }
*/
//> lox-class-methods
  private final Map<String, LoxFunction> methods;

/* Classes lox-class-methods < Inheritance lox-class-constructor
  LoxClass(String name, Map<String, LoxFunction> methods) {
*/
//> Inheritance lox-class-constructor
  LoxClass(String name, LoxClass superclass,
           Map<String, LoxFunction> methods) {
    this.superclass = superclass;
//< Inheritance lox-class-constructor
    this.name = name;
    this.methods = methods;
  }
//< lox-class-methods
//> lox-class-find-method
  LoxFunction findMethod(String name) {
    if (methods.containsKey(name)) {
      return methods.get(name);
    }

//> Inheritance find-method-recurse-superclass
    if (superclass != null) {
      return superclass.findMethod(name);
    }

//< Inheritance find-method-recurse-superclass
    return null;
  }
//< lox-class-find-method

  @Override
  public String toString() {
    return name;
  }
//> lox-class-call-arity
  @Override
  public Object call(Interpreter interpreter,
                     List<Object> arguments) {
    LoxInstance instance = new LoxInstance(this);
//> lox-class-call-initializer
    LoxFunction initializer = findMethod("init");
    if (initializer != null) {
      initializer.bind(instance).call(interpreter, arguments);
    }

//< lox-class-call-initializer
    return instance;
  }

  @Override
  public int arity() {
/* Classes lox-class-call-arity < Classes lox-initializer-arity
    return 0;
*/
//> lox-initializer-arity
    LoxFunction initializer = findMethod("init");
    if (initializer == null) return 0;
    return initializer.arity();
//< lox-initializer-arity
  }
//< lox-class-call-arity
}


================================================
FILE: java/com/craftinginterpreters/lox/LoxFunction.java
================================================
//> Functions lox-function
package com.craftinginterpreters.lox;

import java.util.List;

class LoxFunction implements LoxCallable {
  private final Stmt.Function declaration;
//> closure-field
  private final Environment closure;
  
//< closure-field
/* Functions lox-function < Functions closure-constructor
  LoxFunction(Stmt.Function declaration) {
*/
/* Functions closure-constructor < Classes is-initializer-field
  LoxFunction(Stmt.Function declaration, Environment closure) {
*/
//> Classes is-initializer-field
  private final boolean isInitializer;

  LoxFunction(Stmt.Function declaration, Environment closure,
              boolean isInitializer) {
    this.isInitializer = isInitializer;
//< Classes is-initializer-field
//> closure-constructor
    this.closure = closure;
//< closure-constructor
    this.declaration = declaration;
  }
//> Classes bind-instance
  LoxFunction bind(LoxInstance instance) {
    Environment environment = new Environment(closure);
    environment.define("this", instance);
/* Classes bind-instance < Classes lox-function-bind-with-initializer
    return new LoxFunction(declaration, environment);
*/
//> lox-function-bind-with-initializer
    return new LoxFunction(declaration, environment,
                           isInitializer);
//< lox-function-bind-with-initializer
  }
//< Classes bind-instance
//> function-to-string
  @Override
  public String toString() {
    return "<fn " + declaration.name.lexeme + ">";
  }
//< function-to-string
//> function-arity
  @Override
  public int arity() {
    return declaration.params.size();
  }
//< function-arity
//> function-call
  @Override
  public Object call(Interpreter interpreter,
                     List<Object> arguments) {
/* Functions function-call < Functions call-closure
    Environment environment = new Environment(interpreter.globals);
*/
//> call-closure
    Environment environment = new Environment(closure);
//< call-closure
    for (int i = 0; i < declaration.params.size(); i++) {
      environment.define(declaration.params.get(i).lexeme,
          arguments.get(i));
    }

/* Functions function-call < Functions catch-return
    interpreter.executeBlock(declaration.body, environment);
*/
//> catch-return
    try {
      interpreter.executeBlock(declaration.body, environment);
    } catch (Return returnValue) {
//> Classes early-return-this
      if (isInitializer) return closure.getAt(0, "this");

//< Classes early-return-this
      return returnValue.value;
    }
//< catch-return
//> Classes return-this

    if (isInitializer) return closure.getAt(0, "this");
//< Classes return-this
    return null;
  }
//< function-call
}


================================================
FILE: java/com/craftinginterpreters/lox/LoxInstance.java
================================================
//> Classes lox-instance
package com.craftinginterpreters.lox;

import java.util.HashMap;
import java.util.Map;

class LoxInstance {
  private LoxClass klass;
//> lox-instance-fields
  private final Map<String, Object> fields = new HashMap<>();
//< lox-instance-fields

  LoxInstance(LoxClass klass) {
    this.klass = klass;
  }

//> lox-instance-get-property
  Object get(Token name) {
    if (fields.containsKey(name.lexeme)) {
      return fields.get(name.lexeme);
    }

//> lox-instance-get-method
    LoxFunction method = klass.findMethod(name.lexeme);
/* Classes lox-instance-get-method < Classes lox-instance-bind-method
    if (method != null) return method;
*/
//> lox-instance-bind-method
    if (method != null) return method.bind(this);
//< lox-instance-bind-method

//< lox-instance-get-method
    throw new RuntimeError(name, // [hidden]
        "Undefined property '" + name.lexeme + "'.");
  }
//< lox-instance-get-property
//> lox-instance-set-property
  void set(Token name, Object value) {
    fields.put(name.lexeme, value);
  }
//< lox-instance-set-property
  @Override
  public String toString() {
    return klass.name + " instance";
  }
}


================================================
FILE: java/com/craftinginterpreters/lox/Parser.java
================================================
//> Parsing Expressions parser
package com.craftinginterpreters.lox;

//> Statements and State parser-imports
import java.util.ArrayList;
//< Statements and State parser-imports
//> Control Flow import-arrays
import java.util.Arrays;
//< Control Flow import-arrays
import java.util.List;

import static com.craftinginterpreters.lox.TokenType.*;

class Parser {
//> parse-error
  private static class ParseError extends RuntimeException {}

//< parse-error
  private final List<Token> tokens;
  private int current = 0;

  Parser(List<Token> tokens) {
    this.tokens = tokens;
  }
/* Parsing Expressions parse < Statements and State parse
  Expr parse() {
    try {
      return expression();
    } catch (ParseError error) {
      return null;
    }
  }
*/
//> Statements and State parse
  List<Stmt> parse() {
    List<Stmt> statements = new ArrayList<>();
    while (!isAtEnd()) {
/* Statements and State parse < Statements and State parse-declaration
      statements.add(statement());
*/
//> parse-declaration
      statements.add(declaration());
//< parse-declaration
    }

    return statements; // [parse-error-handling]
  }
//< Statements and State parse
//> expression
  private Expr expression() {
/* Parsing Expressions expression < Statements and State expression
    return equality();
*/
//> Statements and State expression
    return assignment();
//< Statements and State expression
  }
//< expression
//> Statements and State declaration
  private Stmt declaration() {
    try {
//> Classes match-class
      if (match(CLASS)) return classDeclaration();
//< Classes match-class
//> Functions match-fun
      if (match(FUN)) return function("function");
//< Functions match-fun
      if (match(VAR)) return varDeclaration();

      return statement();
    } catch (ParseError error) {
      synchronize();
      return null;
    }
  }
//< Statements and State declaration
//> Classes parse-class-declaration
  private Stmt classDeclaration() {
    Token name = consume(IDENTIFIER, "Expect class name.");
//> Inheritance parse-superclass

    Expr.Variable superclass = null;
    if (match(LESS)) {
      consume(IDENTIFIER, "Expect superclass name.");
      superclass = new Expr.Variable(previous());
    }

//< Inheritance parse-superclass
    consume(LEFT_BRACE, "Expect '{' before class body.");

    List<Stmt.Function> methods = new ArrayList<>();
    while (!check(RIGHT_BRACE) && !isAtEnd()) {
      methods.add(function("method"));
    }

    consume(RIGHT_BRACE, "Expect '}' after class body.");

/* Classes parse-class-declaration < Inheritance construct-class-ast
    return new Stmt.Class(name, methods);
*/
//> Inheritance construct-class-ast
    return new Stmt.Class(name, superclass, methods);
//< Inheritance construct-class-ast
  }
//< Classes parse-class-declaration
//> Statements and State parse-statement
  private Stmt statement() {
//> Control Flow match-for
    if (match(FOR)) return forStatement();
//< Control Flow match-for
//> Control Flow match-if
    if (match(IF)) return ifStatement();
//< Control Flow match-if
    if (match(PRINT)) return printStatement();
//> Functions match-return
    if (match(RETURN)) return returnStatement();
//< Functions match-return
//> Control Flow match-while
    if (match(WHILE)) return whileStatement();
//< Control Flow match-while
//> parse-block
    if (match(LEFT_BRACE)) return new Stmt.Block(block());
//< parse-block

    return expressionStatement();
  }
//< Statements and State parse-statement
//> Control Flow for-statement
  private Stmt forStatement() {
    consume(LEFT_PAREN, "Expect '(' after 'for'.");

/* Control Flow for-statement < Control Flow for-initializer
    // More here...
*/
//> for-initializer
    Stmt initializer;
    if (match(SEMICOLON)) {
      initializer = null;
    } else if (match(VAR)) {
      initializer = varDeclaration();
    } else {
      initializer = expressionStatement();
    }
//< for-initializer
//> for-condition

    Expr condition = null;
    if (!check(SEMICOLON)) {
      condition = expression();
    }
    consume(SEMICOLON, "Expect ';' after loop condition.");
//< for-condition
//> for-increment

    Expr increment = null;
    if (!check(RIGHT_PAREN)) {
      increment = expression();
    }
    consume(RIGHT_PAREN, "Expect ')' after for clauses.");
//< for-increment
//> for-body
    Stmt body = statement();

//> for-desugar-increment
    if (increment != null) {
      body = new Stmt.Block(
          Arrays.asList(
              body,
              new Stmt.Expression(increment)));
    }

//< for-desugar-increment
//> for-desugar-condition
    if (condition == null) condition = new Expr.Literal(true);
    body = new Stmt.While(condition, body);

//< for-desugar-condition
//> for-desugar-initializer
    if (initializer != null) {
      body = new Stmt.Block(Arrays.asList(initializer, body));
    }

//< for-desugar-initializer
    return body;
//< for-body
  }
//< Control Flow for-statement
//> Control Flow if-statement
  private Stmt ifStatement() {
    consume(LEFT_PAREN, "Expect '(' after 'if'.");
    Expr condition = expression();
    consume(RIGHT_PAREN, "Expect ')' after if condition."); // [parens]

    Stmt thenBranch = statement();
    Stmt elseBranch = null;
    if (match(ELSE)) {
      elseBranch = statement();
    }

    return new Stmt.If(condition, thenBranch, elseBranch);
  }
//< Control Flow if-statement
//> Statements and State parse-print-statement
  private Stmt printStatement() {
    Expr value = expression();
    consume(SEMICOLON, "Expect ';' after value.");
    return new Stmt.Print(value);
  }
//< Statements and State parse-print-statement
//> Functions parse-return-statement
  private Stmt returnStatement() {
    Token keyword = previous();
    Expr value = null;
    if (!check(SEMICOLON)) {
      value = expression();
    }

    consume(SEMICOLON, "Expect ';' after return value.");
    return new Stmt.Return(keyword, value);
  }
//< Functions parse-return-statement
//> Statements and State parse-var-declaration
  private Stmt varDeclaration() {
    Token name = consume(IDENTIFIER, "Expect variable name.");

    Expr initializer = null;
    if (match(EQUAL)) {
      initializer = expression();
    }

    consume(SEMICOLON, "Expect ';' after variable declaration.");
    return new Stmt.Var(name, initializer);
  }
//< Statements and State parse-var-declaration
//> Control Flow while-statement
  private Stmt whileStatement() {
    consume(LEFT_PAREN, "Expect '(' after 'while'.");
    Expr condition = expression();
    consume(RIGHT_PAREN, "Expect ')' after condition.");
    Stmt body = statement();

    return new Stmt.While(condition, body);
  }
//< Control Flow while-statement
//> Statements and State parse-expression-statement
  private Stmt expressionStatement() {
    Expr expr = expression();
    consume(SEMICOLON, "Expect ';' after expression.");
    return new Stmt.Expression(expr);
  }
//< Statements and State parse-expression-statement
//> Functions parse-function
  private Stmt.Function function(String kind) {
    Token name = consume(IDENTIFIER, "Expect " + kind + " name.");
//> parse-parameters
    consume(LEFT_PAREN, "Expect '(' after " + kind + " name.");
    List<Token> parameters = new ArrayList<>();
    if (!check(RIGHT_PAREN)) {
      do {
        if (parameters.size() >= 255) {
          error(peek(), "Can't have more than 255 parameters.");
        }

        parameters.add(
            consume(IDENTIFIER, "Expect parameter name."));
      } while (match(COMMA));
    }
    consume(RIGHT_PAREN, "Expect ')' after parameters.");
//< parse-parameters
//> parse-body

    consume(LEFT_BRACE, "Expect '{' before " + kind + " body.");
    List<Stmt> body = block();
    return new Stmt.Function(name, parameters, body);
//< parse-body
  }
//< Functions parse-function
//> Statements and State block
  private List<Stmt> block() {
    List<Stmt> statements = new ArrayList<>();

    while (!check(RIGHT_BRACE) && !isAtEnd()) {
      statements.add(declaration());
    }

    consume(RIGHT_BRACE, "Expect '}' after block.");
    return statements;
  }
//< Statements and State block
//> Statements and State parse-assignment
  private Expr assignment() {
/* Statements and State parse-assignment < Control Flow or-in-assignment
    Expr expr = equality();
*/
//> Control Flow or-in-assignment
    Expr expr = or();
//< Control Flow or-in-assignment

    if (match(EQUAL)) {
      Token equals = previous();
      Expr value = assignment();

      if (expr instanceof Expr.Variable) {
        Token name = ((Expr.Variable)expr).name;
        return new Expr.Assign(name, value);
//> Classes assign-set
      } else if (expr instanceof Expr.Get) {
        Expr.Get get = (Expr.Get)expr;
        return new Expr.Set(get.object, get.name, value);
//< Classes assign-set
      }

      error(equals, "Invalid assignment target."); // [no-throw]
    }

    return expr;
  }
//< Statements and State parse-assignment
//> Control Flow or
  private Expr or() {
    Expr expr = and();

    while (match(OR)) {
      Token operator = previous();
      Expr right = and();
      expr = new Expr.Logical(expr, operator, right);
    }

    return expr;
  }
//< Control Flow or
//> Control Flow and
  private Expr and() {
    Expr expr = equality();

    while (match(AND)) {
      Token operator = previous();
      Expr right = equality();
      expr = new Expr.Logical(expr, operator, right);
    }

    return expr;
  }
//< Control Flow and
//> equality
  private Expr equality() {
    Expr expr = comparison();

    while (match(BANG_EQUAL, EQUAL_EQUAL)) {
      Token operator = previous();
      Expr right = comparison();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }
//< equality
//> comparison
  private Expr comparison() {
    Expr expr = term();

    while (match(GREATER, GREATER_EQUAL, LESS, LESS_EQUAL)) {
      Token operator = previous();
      Expr right = term();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }
//< comparison
//> term
  private Expr term() {
    Expr expr = factor();

    while (match(MINUS, PLUS)) {
      Token operator = previous();
      Expr right = factor();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }
//< term
//> factor
  private Expr factor() {
    Expr expr = unary();

    while (match(SLASH, STAR)) {
      Token operator = previous();
      Expr right = unary();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }
//< factor
//> unary
  private Expr unary() {
    if (match(BANG, MINUS)) {
      Token operator = previous();
      Expr right = unary();
      return new Expr.Unary(operator, right);
    }

/* Parsing Expressions unary < Functions unary-call
    return primary();
*/
//> Functions unary-call
    return call();
//< Functions unary-call
  }
//< unary
//> Functions finish-call
  private Expr finishCall(Expr callee) {
    List<Expr> arguments = new ArrayList<>();
    if (!check(RIGHT_PAREN)) {
      do {
//> check-max-arity
        if (arguments.size() >= 255) {
          error(peek(), "Can't have more than 255 arguments.");
        }
//< check-max-arity
        arguments.add(expression());
      } while (match(COMMA));
    }

    Token paren = consume(RIGHT_PAREN,
                          "Expect ')' after arguments.");

    return new Expr.Call(callee, paren, arguments);
  }
//< Functions finish-call
//> Functions call
  private Expr call() {
    Expr expr = primary();

    while (true) { // [while-true]
      if (match(LEFT_PAREN)) {
        expr = finishCall(expr);
//> Classes parse-property
      } else if (match(DOT)) {
        Token name = consume(IDENTIFIER,
            "Expect property name after '.'.");
        expr = new Expr.Get(expr, name);
//< Classes parse-property
      } else {
        break;
      }
    }

    return expr;
  }
//< Functions call
//> primary
  private Expr primary() {
    if (match(FALSE)) return new Expr.Literal(false);
    if (match(TRUE)) return new Expr.Literal(true);
    if (match(NIL)) return new Expr.Literal(null);

    if (match(NUMBER, STRING)) {
      return new Expr.Literal(previous().literal);
    }
//> Inheritance parse-super

    if (match(SUPER)) {
      Token keyword = previous();
      consume(DOT, "Expect '.' after 'super'.");
      Token method = consume(IDENTIFIER,
          "Expect superclass method name.");
      return new Expr.Super(keyword, method);
    }
//< Inheritance parse-super
//> Classes parse-this

    if (match(THIS)) return new Expr.This(previous());
//< Classes parse-this
//> Statements and State parse-identifier

    if (match(IDENTIFIER)) {
      return new Expr.Variable(previous());
    }
//< Statements and State parse-identifier

    if (match(LEFT_PAREN)) {
      Expr expr = expression();
      consume(RIGHT_PAREN, "Expect ')' after expression.");
      return new Expr.Grouping(expr);
    }
//> primary-error

    throw error(peek(), "Expect expression.");
//< primary-error
  }
//< primary
//> match
  private boolean match(TokenType... types) {
    for (TokenType type : types) {
      if (check(type)) {
        advance();
        return true;
      }
    }

    return false;
  }
//< match
//> consume
  private Token consume(TokenType type, String message) {
    if (check(type)) return advance();

    throw error(peek(), message);
  }
//< consume
//> check
  private boolean check(TokenType type) {
    if (isAtEnd()) return false;
    return peek().type == type;
  }
//< check
//> advance
  private Token advance() {
    if (!isAtEnd()) current++;
    return previous();
  }
//< advance
//> utils
  private boolean isAtEnd() {
    return peek().type == EOF;
  }

  private Token peek() {
    return tokens.get(current);
  }

  private Token previous() {
    return tokens.get(current - 1);
  }
//< utils
//> error
  private ParseError error(Token token, String message) {
    Lox.error(token, message);
    return new ParseError();
  }
//< error
//> synchronize
  private void synchronize() {
    advance();

    while (!isAtEnd()) {
      if (previous().type == SEMICOLON) return;

      switch (peek().type) {
        case CLASS:
        case FUN:
        case VAR:
        case FOR:
        case IF:
        case WHILE:
        case PRINT:
        case RETURN:
          return;
      }

      advance();
    }
  }
//< synchronize
}


================================================
FILE: java/com/craftinginterpreters/lox/Resolver.java
================================================
//> Resolving and Binding resolver
package com.craftinginterpreters.lox;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

class Resolver implements Expr.Visitor<Void>, Stmt.Visitor<Void> {
  private final Interpreter interpreter;
//> scopes-field
  private final Stack<Map<String, Boolean>> scopes = new Stack<>();
//< scopes-field
//> function-type-field
  private FunctionType currentFunction = FunctionType.NONE;
//< function-type-field

  Resolver(Interpreter interpreter) {
    this.interpreter = interpreter;
  }
//> function-type
  private enum FunctionType {
    NONE,
/* Resolving and Binding function-type < Classes function-type-method
    FUNCTION
*/
//> Classes function-type-method
    FUNCTION,
//> function-type-initializer
    INITIALIZER,
//< function-type-initializer
    METHOD
//< Classes function-type-method
  }
//< function-type
//> Classes class-type

  private enum ClassType {
    NONE,
/* Classes class-type < Inheritance class-type-subclass
    CLASS
 */
//> Inheritance class-type-subclass
    CLASS,
    SUBCLASS
//< Inheritance class-type-subclass
  }

  private ClassType currentClass = ClassType.NONE;

//< Classes class-type
//> resolve-statements
  void resolve(List<Stmt> statements) {
    for (Stmt statement : statements) {
      resolve(statement);
    }
  }
//< resolve-statements
//> visit-block-stmt
  @Override
  public Void visitBlockStmt(Stmt.Block stmt) {
    beginScope();
    resolve(stmt.statements);
    endScope();
    return null;
  }
//< visit-block-stmt
//> Classes resolver-visit-class
  @Override
  public Void visitClassStmt(Stmt.Class stmt) {
//> set-current-class
    ClassType enclosingClass = currentClass;
    currentClass = ClassType.CLASS;

//< set-current-class
    declare(stmt.name);
    define(stmt.name);
//> Inheritance resolve-superclass

//> inherit-self
    if (stmt.superclass != null &&
        stmt.name.lexeme.equals(stmt.superclass.name.lexeme)) {
      Lox.error(stmt.superclass.name,
          "A class can't inherit from itself.");
    }

//< inherit-self
    if (stmt.superclass != null) {
//> set-current-subclass
      currentClass = ClassType.SUBCLASS;
//< set-current-subclass
      resolve(stmt.superclass);
    }
//< Inheritance resolve-superclass
//> Inheritance begin-super-scope

    if (stmt.superclass != null) {
      beginScope();
      scopes.peek().put("super", true);
    }
//< Inheritance begin-super-scope
//> resolve-methods

//> resolver-begin-this-scope
    beginScope();
    scopes.peek().put("this", true);

//< resolver-begin-this-scope
    for (Stmt.Function method : stmt.methods) {
      FunctionType declaration = FunctionType.METHOD;
//> resolver-initializer-type
      if (method.name.lexeme.equals("init")) {
        declaration = FunctionType.INITIALIZER;
      }

//< resolver-initializer-type
      resolveFunction(method, declaration); // [local]
    }

//> resolver-end-this-scope
    endScope();

//< resolver-end-this-scope
//< resolve-methods
//> Inheritance end-super-scope
    if (stmt.superclass != null) endScope();

//< Inheritance end-super-scope
//> restore-current-class
    currentClass = enclosingClass;
//< restore-current-class
    return null;
  }
//< Classes resolver-visit-class
//> visit-expression-stmt
  @Override
  public Void visitExpressionStmt(Stmt.Expression stmt) {
    resolve(stmt.expression);
    return null;
  }
//< visit-expression-stmt
//> visit-function-stmt
  @Override
  public Void visitFunctionStmt(Stmt.Function stmt) {
    declare(stmt.name);
    define(stmt.name);

/* Resolving and Binding visit-function-stmt < Resolving and Binding pass-function-type
    resolveFunction(stmt);
*/
//> pass-function-type
    resolveFunction(stmt, FunctionType.FUNCTION);
//< pass-function-type
    return null;
  }
//< visit-function-stmt
//> visit-if-stmt
  @Override
  public Void visitIfStmt(Stmt.If stmt) {
    resolve(stmt.condition);
    resolve(stmt.thenBranch);
    if (stmt.elseBranch != null) resolve(stmt.elseBranch);
    return null;
  }
//< visit-if-stmt
//> visit-print-stmt
  @Override
  public Void visitPrintStmt(Stmt.Print stmt) {
    resolve(stmt.expression);
    return null;
  }
//< visit-print-stmt
//> visit-return-stmt
  @Override
  public Void visitReturnStmt(Stmt.Return stmt) {
//> return-from-top
    if (currentFunction == FunctionType.NONE) {
      Lox.error(stmt.keyword, "Can't return from top-level code.");
    }

//< return-from-top
    if (stmt.value != null) {
//> Classes return-in-initializer
      if (currentFunction == FunctionType.INITIALIZER) {
        Lox.error(stmt.keyword,
            "Can't return a value from an initializer.");
      }

//< Classes return-in-initializer
      resolve(stmt.value);
    }

    return null;
  }
//< visit-return-stmt
//> visit-var-stmt
  @Override
  public Void visitVarStmt(Stmt.Var stmt) {
    declare(stmt.name);
    if (stmt.initializer != null) {
      resolve(stmt.initializer);
    }
    define(stmt.name);
    return null;
  }
//< visit-var-stmt
//> visit-while-stmt
  @Override
  public Void visitWhileStmt(Stmt.While stmt) {
    resolve(stmt.condition);
    resolve(stmt.body);
    return null;
  }
//< visit-while-stmt
//> visit-assign-expr
  @Override
  public Void visitAssignExpr(Expr.Assign expr) {
    resolve(expr.value);
    resolveLocal(expr, expr.name);
    return null;
  }
//< visit-assign-expr
//> visit-binary-expr
  @Override
  public Void visitBinaryExpr(Expr.Binary expr) {
    resolve(expr.left);
    resolve(expr.right);
    return null;
  }
//< visit-binary-expr
//> visit-call-expr
  @Override
  public Void visitCallExpr(Expr.Call expr) {
    resolve(expr.callee);

    for (Expr argument : expr.arguments) {
      resolve(argument);
    }

    return null;
  }
//< visit-call-expr
//> Classes resolver-visit-get
  @Override
  public Void visitGetExpr(Expr.Get expr) {
    resolve(expr.object);
    return null;
  }
//< Classes resolver-visit-get
//> visit-grouping-expr
  @Override
  public Void visitGroupingExpr(Expr.Grouping expr) {
    resolve(expr.expression);
    return null;
  }
//< visit-grouping-expr
//> visit-literal-expr
  @Override
  public Void visitLiteralExpr(Expr.Literal expr) {
    return null;
  }
//< visit-literal-expr
//> visit-logical-expr
  @Override
  public Void visitLogicalExpr(Expr.Logical expr) {
    resolve(expr.left);
    resolve(expr.right);
    return null;
  }
//< visit-logical-expr
//> Classes resolver-visit-set
  @Override
  public Void visitSetExpr(Expr.Set expr) {
    resolve(expr.value);
    resolve(expr.object);
    return null;
  }
//< Classes resolver-visit-set
//> Inheritance resolve-super-expr
  @Override
  public Void visitSuperExpr(Expr.Super expr) {
//> invalid-super
    if (currentClass == ClassType.NONE) {
      Lox.error(expr.keyword,
          "Can't use 'super' outside of a class.");
    } else if (currentClass != ClassType.SUBCLASS) {
      Lox.error(expr.keyword,
          "Can't use 'super' in a class with no superclass.");
    }

//< invalid-super
    resolveLocal(expr, expr.keyword);
    return null;
  }
//< Inheritance resolve-super-expr
//> Classes resolver-visit-this
  @Override
  public Void visitThisExpr(Expr.This expr) {
//> this-outside-of-class
    if (currentClass == ClassType.NONE) {
      Lox.error(expr.keyword,
          "Can't use 'this' outside of a class.");
      return null;
    }

//< this-outside-of-class
    resolveLocal(expr, expr.keyword);
    return null;
  }

//< Classes resolver-visit-this
//> visit-unary-expr
  @Override
  public Void visitUnaryExpr(Expr.Unary expr) {
    resolve(expr.right);
    return null;
  }
//< visit-unary-expr
//> visit-variable-expr
  @Override
  public Void visitVariableExpr(Expr.Variable expr) {
    if (!scopes.isEmpty() &&
        scopes.peek().get(expr.name.lexeme) == Boolean.FALSE) {
      Lox.error(expr.name,
          "Can't read local variable in its own initializer.");
    }

    resolveLocal(expr, expr.name);
    return null;
  }
//< visit-variable-expr
//> resolve-stmt
  private void resolve(Stmt stmt) {
    stmt.accept(this);
  }
//< resolve-stmt
//> resolve-expr
  private void resolve(Expr expr) {
    expr.accept(this);
  }
//< resolve-expr
//> resolve-function
/* Resolving and Binding resolve-function < Resolving and Binding set-current-function
  private void resolveFunction(Stmt.Function function) {
*/
//> set-current-function
  private void resolveFunction(
      Stmt.Function function, FunctionType type) {
    FunctionType enclosingFunction = currentFunction;
    currentFunction = type;

//< set-current-function
    beginScope();
    for (Token param : function.params) {
      declare(param);
      define(param);
    }
    resolve(function.body);
    endScope();
//> restore-current-function
    currentFunction = enclosingFunction;
//< restore-current-function
  }
//< resolve-function
//> begin-scope
  private void beginScope() {
    scopes.push(new HashMap<String, Boolean>());
  }
//< begin-scope
//> end-scope
  private void endScope() {
    scopes.pop();
  }
//< end-scope
//> declare
  private void declare(Token name) {
    if (scopes.isEmpty()) return;

    Map<String, Boolean> scope = scopes.peek();
//> duplicate-variable
    if (scope.containsKey(name.lexeme)) {
      Lox.error(name,
          "Already a variable with this name in this scope.");
    }

//< duplicate-variable
    scope.put(name.lexeme, false);
  }
//< declare
//> define
  private void define(Token name) {
    if (scopes.isEmpty()) return;
    scopes.peek().put(name.lexeme, true);
  }
//< define
//> resolve-local
  private void resolveLocal(Expr expr, Token name) {
    for (int i = scopes.size() - 1; i >= 0; i--) {
      if (scopes.get(i).containsKey(name.lexeme)) {
        interpreter.resolve(expr, scopes.size() - 1 - i);
        return;
      }
    }
  }
//< resolve-local
}


================================================
FILE: java/com/craftinginterpreters/lox/Return.java
================================================
//> Functions return-exception
package com.craftinginterpreters.lox;

class Return extends RuntimeException {
  final Object value;

  Return(Object value) {
    super(null, null, false, false);
    this.value = value;
  }
}


================================================
FILE: java/com/craftinginterpreters/lox/RuntimeError.java
================================================
//> Evaluating Expressions runtime-error-class
package com.craftinginterpreters.lox;

class RuntimeError extends RuntimeException {
  final Token token;

  RuntimeError(Token token, String message) {
    super(message);
    this.token = token;
  }
}


================================================
FILE: java/com/craftinginterpreters/lox/Scanner.java
================================================
//> Scanning scanner-class
package com.craftinginterpreters.lox;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.craftinginterpreters.lox.TokenType.*; // [static-import]

class Scanner {
//> keyword-map
  private static final Map<String, TokenType> keywords;

  static {
    keywords = new HashMap<>();
    keywords.put("and",    AND);
    keywords.put("class",  CLASS);
    keywords.put("else",   ELSE);
    keywords.put("false",  FALSE);
    keywords.put("for",    FOR);
    keywords.put("fun",    FUN);
    keywords.put("if",     IF);
    keywords.put("nil",    NIL);
    keywords.put("or",     OR);
    keywords.put("print",  PRINT);
    keywords.put("return", RETURN);
    keywords.put("super",  SUPER);
    keywords.put("this",   THIS);
    keywords.put("true",   TRUE);
    keywords.put("var",    VAR);
    keywords.put("while",  WHILE);
  }
//< keyword-map
  private final String source;
  private final List<Token> tokens = new ArrayList<>();
//> scan-state
  private int start = 0;
  private int current = 0;
  private int line = 1;
//< scan-state

  Scanner(String source) {
    this.source = source;
  }
//> scan-tokens
  List<Token> scanTokens() {
    while (!isAtEnd()) {
      // We are at the beginning of the next lexeme.
      start = current;
      scanToken();
    }

    tokens.add(new Token(EOF, "", null, line));
    return tokens;
  }
//< scan-tokens
//> scan-token
  private void scanToken() {
    char c = advance();
    switch (c) {
      case '(': addToken(LEFT_PAREN); break;
      case ')': addToken(RIGHT_PAREN); break;
      case '{': addToken(LEFT_BRACE); break;
      case '}': addToken(RIGHT_BRACE); break;
      case ',': addToken(COMMA); break;
      case '.': addToken(DOT); break;
      case '-': addToken(MINUS); break;
      case '+': addToken(PLUS); break;
      case ';': addToken(SEMICOLON); break;
      case '*': addToken(STAR); break; // [slash]
//> two-char-tokens
      case '!':
        addToken(match('=') ? BANG_EQUAL : BANG);
        break;
      case '=':
        addToken(match('=') ? EQUAL_EQUAL : EQUAL);
        break;
      case '<':
        addToken(match('=') ? LESS_EQUAL : LESS);
        break;
      case '>':
        addToken(match('=') ? GREATER_EQUAL : GREATER);
        break;
//< two-char-tokens
//> slash
      case '/':
        if (match('/')) {
          // A comment goes until the end of the line.
          while (peek() != '\n' && !isAtEnd()) advance();
        } else {
          addToken(SLASH);
        }
        break;
//< slash
//> whitespace

      case ' ':
      case '\r':
      case '\t':
        // Ignore whitespace.
        break;

      case '\n':
        line++;
        break;
//< whitespace
//> string-start

      case '"': string(); break;
//< string-start
//> char-error

      default:
/* Scanning char-error < Scanning digit-start
        Lox.error(line, "Unexpected character.");
*/
//> digit-start
        if (isDigit(c)) {
          number();
//> identifier-start
        } else if (isAlpha(c)) {
          identifier();
//< identifier-start
        } else {
          Lox.error(line, "Unexpected character.");
        }
//< digit-start
        break;
//< char-error
    }
  }
//< scan-token
//> identifier
  private void identifier() {
    while (isAlphaNumeric(peek())) advance();

/* Scanning identifier < Scanning keyword-type
    addToken(IDENTIFIER);
*/
//> keyword-type
    String text = source.substring(start, current);
    TokenType type = keywords.get(text);
    if (type == null) type = IDENTIFIER;
    addToken(type);
//< keyword-type
  }
//< identifier
//> number
  private void number() {
    while (isDigit(peek())) advance();

    // Look for a fractional part.
    if (peek() == '.' && isDigit(peekNext())) {
      // Consume the "."
      advance();

      while (isDigit(peek())) advance();
    }

    addToken(NUMBER,
        Double.parseDouble(source.substring(start, current)));
  }
//< number
//> string
  private void string() {
    while (peek() != '"' && !isAtEnd()) {
      if (peek() == '\n') line++;
      advance();
    }

    if (isAtEnd()) {
      Lox.error(line, "Unterminated string.");
      return;
    }

    // The closing ".
    advance();

    // Trim the surrounding quotes.
    String value = source.substring(start + 1, current - 1);
    addToken(STRING, value);
  }
//< string
//> match
  private boolean match(char expected) {
    if (isAtEnd()) return false;
    if (source.charAt(current) != expected) return false;

    current++;
    return true;
  }
//< match
//> peek
  private char peek() {
    if (isAtEnd()) return '\0';
    return source.charAt(current);
  }
//< peek
//> peek-next
  private char peekNext() {
    if (current + 1 >= source.length()) return '\0';
    return source.charAt(current + 1);
  } // [peek-next]
//< peek-next
//> is-alpha
  private boolean isAlpha(char c) {
    return (c >= 'a' && c <= 'z') ||
           (c >= 'A' && c <= 'Z') ||
            c == '_';
  }

  private boolean isAlphaNumeric(char c) {
    return isAlpha(c) || isDigit(c);
  }
//< is-alpha
//> is-digit
  private boolean isDigit(char c) {
    return c >= '0' && c <= '9';
  } // [is-digit]
//< is-digit
//> is-at-end
  private boolean isAtEnd() {
    return current >= source.length();
  }
//< is-at-end
//> advance-and-add-token
  private char advance() {
    return source.charAt(current++);
  }

  private void addToken(TokenType type) {
    addToken(type, null);
  }

  private void addToken(TokenType type, Object literal) {
    String text = source.substring(start, current);
    tokens.add(new Token(type, text, literal, line));
  }
//< advance-and-add-token
}


================================================
FILE: java/com/craftinginterpreters/lox/Stmt.java
================================================
//> Appendix II stmt
package com.craftinginterpreters.lox;

import java.util.List;

abstract class Stmt {
  interface Visitor<R> {
    R visitBlockStmt(Block stmt);
    R visitClassStmt(Class stmt);
    R visitExpressionStmt(Expression stmt);
    R visitFunctionStmt(Function stmt);
    R visitIfStmt(If stmt);
    R visitPrintStmt(Print stmt);
    R visitReturnStmt(Return stmt);
    R visitVarStmt(Var stmt);
    R visitWhileStmt(While stmt);
  }

  // Nested Stmt classes here...
//> stmt-block
  static class Block extends Stmt {
    Block(List<Stmt> statements) {
      this.statements = statements;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitBlockStmt(this);
    }

    final List<Stmt> statements;
  }
//< stmt-block
//> stmt-class
  static class Class extends Stmt {
    Class(Token name,
          Expr.Variable superclass,
          List<Stmt.Function> methods) {
      this.name = name;
      this.superclass = superclass;
      this.methods = methods;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitClassStmt(this);
    }

    final Token name;
    final Expr.Variable superclass;
    final List<Stmt.Function> methods;
  }
//< stmt-class
//> stmt-expression
  static class Expression extends Stmt {
    Expression(Expr expression) {
      this.expression = expression;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitExpressionStmt(this);
    }

    final Expr expression;
  }
//< stmt-expression
//> stmt-function
  static class Function extends Stmt {
    Function(Token name, List<Token> params, List<Stmt> body) {
      this.name = name;
      this.params = params;
      this.body = body;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitFunctionStmt(this);
    }

    final Token name;
    final List<Token> params;
    final List<Stmt> body;
  }
//< stmt-function
//> stmt-if
  static class If extends Stmt {
    If(Expr condition, Stmt thenBranch, Stmt elseBranch) {
      this.condition = condition;
      this.thenBranch = thenBranch;
      this.elseBranch = elseBranch;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitIfStmt(this);
    }

    final Expr condition;
    final Stmt thenBranch;
    final Stmt elseBranch;
  }
//< stmt-if
//> stmt-print
  static class Print extends Stmt {
    Print(Expr expression) {
      this.expression = expression;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitPrintStmt(this);
    }

    final Expr expression;
  }
//< stmt-print
//> stmt-return
  static class Return extends Stmt {
    Return(Token keyword, Expr value) {
      this.keyword = keyword;
      this.value = value;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitReturnStmt(this);
    }

    final Token keyword;
    final Expr value;
  }
//< stmt-return
//> stmt-var
  static class Var extends Stmt {
    Var(Token name, Expr initializer) {
      this.name = name;
      this.initializer = initializer;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitVarStmt(this);
    }

    final Token name;
    final Expr initializer;
  }
//< stmt-var
//> stmt-while
  static class While extends Stmt {
    While(Expr condition, Stmt body) {
      this.condition = condition;
      this.body = body;
    }

    @Override
    <R> R accept(Visitor<R> visitor) {
      return visitor.visitWhileStmt(this);
    }

    final Expr condition;
    final Stmt body;
  }
//< stmt-while

  abstract <R> R accept(Visitor<R> visitor);
}
//< Appendix II stmt


================================================
FILE: java/com/craftinginterpreters/lox/Token.java
================================================
//> Scanning token-class
package com.craftinginterpreters.lox;

class Token {
  final TokenType type;
  final String lexeme;
  final Object literal;
  final int line; // [location]

  Token(TokenType type, String lexeme, Object literal, int line) {
    this.type = type;
    this.lexeme = lexeme;
    this.literal = literal;
    this.line = line;
  }

  public String toString() {
    return type + " " + lexeme + " " + literal;
  }
}


================================================
FILE: java/com/craftinginterpreters/lox/TokenType.java
================================================
//> Scanning token-type
package com.craftinginterpreters.lox;

enum TokenType {
  // Single-character tokens.
  LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
  COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,

  // One or two character tokens.
  BANG, BANG_EQUAL,
  EQUAL, EQUAL_EQUAL,
  GREATER, GREATER_EQUAL,
  LESS, LESS_EQUAL,

  // Literals.
  IDENTIFIER, STRING, NUMBER,

  // Keywords.
  AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
  PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,

  EOF
}


================================================
FILE: java/com/craftinginterpreters/tool/GenerateAst.java
================================================
//> Representing Code generate-ast
package com.craftinginterpreters.tool;

import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;

public class GenerateAst {
  public static void main(String[] args) throws IOException {
    if (args.length != 1) {
      System.err.println("Usage: generate_ast <output directory>");
      System.exit(64);
    }
    String outputDir = args[0];
//> call-define-ast
    defineAst(outputDir, "Expr", Arrays.asList(
//> Statements and State assign-expr
      "Assign   : Token name, Expr value",
//< Statements and State assign-expr
      "Binary   : Expr left, Token operator, Expr right",
//> Functions call-expr
      "Call     : Expr callee, Token paren, List<Expr> arguments",
//< Functions call-expr
//> Classes get-ast
      "Get      : Expr object, Token name",
//< Classes get-ast
      "Grouping : Expr expression",
      "Literal  : Object value",
//> Control Flow logical-ast
      "Logical  : Expr left, Token operator, Expr right",
//< Control Flow logical-ast
//> Classes set-ast
      "Set      : Expr object, Token name, Expr value",
//< Classes set-ast
//> Inheritance super-expr
      "Super    : Token keyword, Token method",
//< Inheritance super-expr
//> Classes this-ast
      "This     : Token keyword",
//< Classes this-ast
/* Representing Code call-define-ast < Statements and State var-expr
      "Unary    : Token operator, Expr right"
*/
//> Statements and State var-expr
      "Unary    : Token operator, Expr right",
      "Variable : Token name"
//< Statements and State var-expr
    ));
//> Statements and State stmt-ast

    defineAst(outputDir, "Stmt", Arrays.asList(
//> block-ast
      "Block      : List<Stmt> statements",
//< block-ast
/* Classes class-ast < Inheritance superclass-ast
      "Class      : Token name, List<Stmt.Function> methods",
*/
//> Inheritance superclass-ast
      "Class      : Token name, Expr.Variable superclass," +
                  " List<Stmt.Function> methods",
//< Inheritance superclass-ast
      "Expression : Expr expression",
//> Functions function-ast
      "Function   : Token name, List<Token> params," +
                  " List<Stmt> body",
//< Functions function-ast
//> Control Flow if-ast
      "If         : Expr condition, Stmt thenBranch," +
                  " Stmt elseBranch",
//< Control Flow if-ast
/* Statements and State stmt-ast < Statements and State var-stmt-ast
      "Print      : Expr expression"
*/
//> var-stmt-ast
      "Print      : Expr expression",
//< var-stmt-ast
//> Functions return-ast
      "Return     : Token keyword, Expr value",
//< Functions return-ast
/* Statements and State var-stmt-ast < Control Flow while-ast
      "Var        : Token name, Expr initializer"
*/
//> Control Flow while-ast
      "Var        : Token name, Expr initializer",
      "While      : Expr condition, Stmt body"
//< Control Flow while-ast
    ));
//< Statements and State stmt-ast
//< call-define-ast
  }
//> define-ast
  private static void defineAst(
      String outputDir, String baseName, List<String> types)
      throws IOException {
    String path = outputDir + "/" + baseName + ".java";
    PrintWriter writer = new PrintWriter(path, "UTF-8");

//> omit
    writer.println("//> Appendix II " + baseName.toLowerCase());
//< omit
    writer.println("package com.craftinginterpreters.lox;");
    writer.println();
    writer.println("import java.util.List;");
    writer.println();
    writer.println("abstract class " + baseName + " {");

//> call-define-visitor
    defineVisitor(writer, baseName, types);

//< call-define-visitor
//> omit
    writer.println();
    writer.println("  // Nested " + baseName + " classes here...");
//< omit
//> nested-classes
    // The AST classes.
    for (String type : types) {
      String className = type.split(":")[0].trim();
      String fields = type.split(":")[1].trim(); // [robust]
      defineType(writer, baseName, className, fields);
    }
//< nested-classes
//> base-accept-method

    // The base accept() method.
    writer.println();
    writer.println("  abstract <R> R accept(Visitor<R> visitor);");

//< base-accept-method
    writer.println("}");
//> omit
    writer.println("//< Appendix II " + baseName.toLowerCase());
//< omit
    writer.close();
  }
//< define-ast
//> define-visitor
  private static void defineVisitor(
      PrintWriter writer, String baseName, List<String> types) {
    writer.println("  interface Visitor<R> {");

    for (String type : types) {
      String typeName = type.split(":")[0].trim();
      writer.println("    R visit" + typeName + baseName + "(" +
          typeName + " " + baseName.toLowerCase() + ");");
    }

    writer.println("  }");
  }
//< define-visitor
//> define-type
  private static void defineType(
      PrintWriter writer, String baseName,
      String className, String fieldList) {
//> omit
    writer.println("//> " +
        baseName.toLowerCase() + "-" + className.toLowerCase());
//< omit
    writer.println("  static class " + className + " extends " +
        baseName + " {");

//> omit
    // Hack. Stmt.Class has such a long constructor that it overflows
    // the line length on the Appendix II page. Wrap it.
    if (fieldList.length() > 64) {
      fieldList = fieldList.replace(", ", ",\n          ");
    }

//< omit
    // Constructor.
    writer.println("    " + className + "(" + fieldList + ") {");

//> omit
    fieldList = fieldList.replace(",\n          ", ", ");
//< omit
    // Store parameters in fields.
    String[] fields = fieldList.split(", ");
    for (String field : fields) {
      String name = field.split(" ")[1];
      writer.println("      this." + name + " = " + name + ";");
    }

    writer.println("    }");
//> accept-method

    // Visitor pattern.
    writer.println();
    writer.println("    @Override");
    writer.println("    <R> R accept(Visitor<R> visitor) {");
    writer.println("      return visitor.visit" +
        className + baseName + "(this);");
    writer.println("    }");
//< accept-method

    // Fields.
    writer.println();
    for (String field : fields) {
      writer.println("    final " + field + ";");
    }

    writer.println("  }");
//> omit
    writer.println("//< " +
        baseName.toLowerCase() + "-" + className.toLowerCase());
//< omit
  }
//< define-type
//> pastry-visitor
  interface PastryVisitor {
    void visitBeignet(Beignet beignet); // [overload]
    void visitCruller(Cruller cruller);
  }
//< pastry-visitor
//> pastries
  abstract class Pastry {
//> pastry-accept
    abstract void accept(PastryVisitor visitor);
//< pastry-accept
  }

  class Beignet extends Pastry {
//> beignet-accept
    @Override
    void accept(PastryVisitor visitor) {
      visitor.visitBeignet(this);
    }
//< beignet-accept
  }

  class Cruller extends Pastry {
//> cruller-accept
    @Override
    void accept(PastryVisitor visitor) {
      visitor.visitCruller(this);
    }
//< cruller-accept
  }
//< pastries
}


================================================
FILE: jlox
================================================
#!/usr/bin/env bash

script_dir=$(dirname "$0")
java -cp ${script_dir}/build/java com.craftinginterpreters.lox.Lox $@


================================================
FILE: note/BISAC.txt
================================================
COMPUTERS / Programming / Compilers
COMPUTERS / Languages / General
COMPUTERS / Software Development & Engineering / Tools

================================================
FILE: note/answers/chapter01_introduction/1.md
================================================
Markdown, Jinja2, Makefile, SASS, CSS, HTML. There's also the homegrown little
tags inserted in the code and Markdown to weave the two together.

The tests used to ensure the interpreters work correctly also have a
mini-language embedded in comments to define expectations for how the test
should behave.

This doesn't count the Python scripts that glue this altogether, since  Python
is a general-purpose language.


================================================
FILE: note/answers/chapter01_introduction/2/Hello.java
================================================
public class Hello {
  public static void main(String[] args) {
    System.out.println("Hello, world!");
  }
}


================================================
FILE: note/answers/chapter01_introduction/2/Makefile
================================================
# Compile the Java file to a class file.
Hello.class: Hello.java
	@ javac Hello.java

# Convenience target to build and run it.
run: Hello.class
	@ java Hello

# Tell make that "run" is not the name of a file.
.PHONY: run


================================================
FILE: note/answers/chapter01_introduction/3/Makefile
================================================
# Compile the .c file to an executable.
linked_list: linked_list.c
	gcc linked_list.c -o linked_list


================================================
FILE: note/answers/chapter01_introduction/3/linked_list.c
================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct sNode {
  struct sNode* prev;
  struct sNode* next;
  char* string;
} Node;

// Insert a new node containing [string] after [prev], or at the beginning of
// the list if [prev] is NULL.
void insert(Node** list, Node* prev, const char* string) {
  // Create the new node and copy the string to the heap.
  Node* node = malloc(sizeof(Node));
  node->string = malloc(strlen(string) + 1);
  strcpy(node->string, string);

  if (prev == NULL) {
    if (*list != NULL) (*list)->prev = node;
    node->prev = NULL;
    node->next = *list;
    *list = node;
  } else {
    node->next = prev->next;
    if (node->next != NULL) node->next->prev = node;
    prev->next = node;
    node->prev = prev;
  }
}

Node* find(Node* list, const char* string) {
  while (list != NULL) {
    if (strcmp(string, list->string) == 0) {
      return list;
    }
    
    list = list->next;
  }
  
  // Not found.
  return NULL;
}

void delete(Node** list, Node* node) {
  // Unlink it.
  if (node->prev != NULL) node->prev->next = node->next;
  if (node->next != NULL) node->next->prev = node->prev;
  
  // If we're deleting the head, update it.
  if (*list == node) *list = node->next;
  
  free(node->string);
  free(node);
}

void dump(Node* list) {
  while (list != NULL) {
    printf("%p [prev %p next %p] %s\n",
           list, list->prev, list->next, list->string);
    list = list->next;
  }
}

int main(int argc, const char* argv[]) {
  printf("Hello, World!\n");
  
  Node* list = NULL;
  insert(&list, NULL, "four");
  insert(&list, NULL, "one");
  insert(&list, find(list, "one"), "two");
  insert(&list, find(list, "two"), "three");
  
  dump(list);
  printf("-- delete three --\n");
  delete(&list, find(list, "three"));
  dump(list);

  printf("-- delete one --\n");
  delete(&list, find(list, "one"));
  dump(list);

  return 0;
}


================================================
FILE: note/answers/chapter01_introduction/3/linked_list.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 46;
	objects = {

/* Begin PBXBuildFile section */
		29BE01271DBD3A9300EB6E51 /* linked_list.c in Sources */ = {isa = PBXBuildFile; fileRef = 29BE01261DBD3A9300EB6E51 /* linked_list.c */; };
/* End PBXBuildFile section */

/* Begin PBXCopyFilesBuildPhase section */
		2973DC0C1DBD3A69005047A2 /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		2973DC0E1DBD3A69005047A2 /* linked_list */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = linked_list; sourceTree = BUILT_PRODUCTS_DIR; };
		29BE01261DBD3A9300EB6E51 /* linked_list.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = linked_list.c; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		2973DC0B1DBD3A69005047A2 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		2973DC051DBD3A69005047A2 = {
			isa = PBXGroup;
			children = (
				29BE01261DBD3A9300EB6E51 /* linked_list.c */,
				2973DC0F1DBD3A69005047A2 /* Products */,
			);
			sourceTree = "<group>";
		};
		2973DC0F1DBD3A69005047A2 /* Products */ = {
			isa = PBXGroup;
			children = (
				2973DC0E1DBD3A69005047A2 /* linked_list */,
			);
			name = Products;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		2973DC0D1DBD3A69005047A2 /* linked_list */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 2973DC151DBD3A69005047A2 /* Build configuration list for PBXNativeTarget "linked_list" */;
			buildPhases = (
				2973DC0A1DBD3A69005047A2 /* Sources */,
				2973DC0B1DBD3A69005047A2 /* Frameworks */,
				2973DC0C1DBD3A69005047A2 /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = linked_list;
			productName = linked_list;
			productReference = 2973DC0E1DBD3A69005047A2 /* linked_list */;
			productType = "com.apple.product-type.tool";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		2973DC061DBD3A69005047A2 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				LastUpgradeCheck = 0640;
				ORGANIZATIONNAME = "Robert Nystrom";
				TargetAttributes = {
					2973DC0D1DBD3A69005047A2 = {
						CreatedOnToolsVersion = 6.4;
					};
				};
			};
			buildConfigurationList = 2973DC091DBD3A69005047A2 /* Build configuration list for PBXProject "linked_list" */;
			compatibilityVersion = "Xcode 3.2";
			developmentRegion = English;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
			);
			mainGroup = 2973DC051DBD3A69005047A2;
			productRefGroup = 2973DC0F1DBD3A69005047A2 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				2973DC0D1DBD3A69005047A2 /* linked_list */,
			);
		};
/* End PBXProject section */

/* Begin PBXSourcesBuildPhase section */
		2973DC0A1DBD3A69005047A2 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				29BE01271DBD3A9300EB6E51 /* linked_list.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin XCBuildConfiguration section */
		2973DC131DBD3A69005047A2 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.11;
				MTL_ENABLE_DEBUG_INFO = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = macosx;
			};
			name = Debug;
		};
		2973DC141DBD3A69005047A2 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.11;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
			};
			name = Release;
		};
		2973DC161DBD3A69005047A2 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		2973DC171DBD3A69005047A2 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		2973DC091DBD3A69005047A2 /* Build configuration list for PBXProject "linked_list" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				2973DC131DBD3A69005047A2 /* Debug */,
				2973DC141DBD3A69005047A2 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		2973DC151DBD3A69005047A2 /* Build configuration list for PBXNativeTarget "linked_list" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				2973DC161DBD3A69005047A2 /* Debug */,
				2973DC171DBD3A69005047A2 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 2973DC061DBD3A69005047A2 /* Project object */;
}


================================================
FILE: note/answers/chapter01_introduction/3/linked_list.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:linked_list.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: note/answers/chapter02_map.md
================================================
## 1. Find the various parts in an open source implementation.

TODO

## 2. Why not use a JIT?

1. It's really complex to implement, debug, and maintain. Few people have the
   skill to do it.
2. Like a native code compiler (which it is), it ties you to a specific CPU
   architecture.
3. Bytecode is generally more compact than machine code (since it's closer to
   the semantics of the language), so it takes up less memory. In platforms
   like embedded devices where memory may matter more than speed, that can be
   a worthwhile trade-off.
4. Some platforms, like iOS and most game consoles, expressly disallow
   executing code generated at runtime. The OS simply won't allow you to jump
   into memory that can be written to.

## 3. Why do Lisp compilers also contain an interpreter?

Most Lisps support macros -- code that is executed at compile time, so the
implementation needs to be able to evaluate the macro itself while in the middle
of compiling. You could do that by *compiling* the macro and then running that,
but that's a lot of overhead.


================================================
FILE: note/answers/chapter03_lox.md
================================================
1.  I've, uh, written plenty. Look in /test/. Here's another:
    ~~~~
    class List {
      init(data, next) {
        this.data = data;
        this.next = next;
      }

      map(function) {
        var data = function(this.data);
        var next;
        if (this.next != nil) next = this.next.map(function);
        return List(data, next);
      }

      display() {
        var list = this;
        while (list != nil) {
          print(list.data);
          list = list.next;
        }
      }
    }

    var list = List(1, List(2, List(3, List(4, nil))));
    list.display();

    fun double(n) { return n * 2; }
    list = list.map(double);
    list.display();
    ~~~~

2.  Here's a few:

    1.  What happens if you access a global variable in a function before it is
        defined?

    2.  What does it mean to say something is "an error"? Runtime error?
        Compile time?

    3.  What kind of expressions are allowed when a superclass is specified?

    4.  What happens if you declare two classes or functions with the same name?

    5.  Can a class inherit from something that isn't a class?

    6.  Can you reassign to the name that is bound by a class or function
        declaration?

3.  The big ones are:

    1.  Lists/arrays. You can build your own linked lists, but there's no way to
        create a data structure that stores a contiguous series of values that
        can be accessed in constant time in user code. That has to be baked
        into the language or core library.

    2.  Some mechanism for handling runtime errors along the lines of exception
        handling.

    Also:

    3.  No `break` or `continue` for loops.

    4.  No `switch`.


================================================
FILE: note/answers/chapter04_scanning.md
================================================
1.  Both of them have significant indentation. To handle that, the scanner
    emits synthetic "{" and "}" tokens (or "indent" and "dedent" as Python
    calls them), as if there were explicit delimiters for each level of
    indentation.

    In order to know when a new line begins or ends one of more levels of
    indentation, the scanner has to track the *previous* indentation value.
    That state has to be stored in the scanner, which means it has a little bit
    of *memory*. That makes it no longer a regular language, which is defined
    to only need to store a single finite number identifying which state it's
    in.

    You *could* make a regular language for significant indentation, by having
    a hardcoded limit to the maximum amount of indentation, but that starts to
    split semantic hairs around the Chomsky hierarchy.

2.  In CoffeeScript, parentheses are option in function calls. You can call a
    function like:

    ```coffeescript
    doStuff withThis
    ```

    Also, there is a nice syntax for lambda functions:

    ```coffeescript
    () -> someLambda
    -> anotherOne
    ```

    On the second line, you can see that you can omit the `()` if the lambda
    takes no parameters. So what does this do:

    ```coffeescript
    someFunction () -> someLambda
    ```

    Does it call `someFunction` with zero parameters and then call the result of
    *that* with one parameter, a lambda? Or does it call `someFunction` with
    one parameter, the lambda? The answer depends on spaces:

    ```coffeescript
    someFunction() -> someLambda
    # Means the same as:
    someFunction()(() -> someLambda)

    someFunction () -> someLambda
    # Means the same as:
    someFunction(() -> someLambda)
    ```

    Ruby has similar corner cases because it also allow omitting the parentheses
    in method calls (which is where CoffeeScript gets it from).

    The C preprocessor relies on spaces to distinguish function macros from
    simple macros:

    ```c
    #define MACRO1 (p) (p)
    #define MACRO2(p) (p)
    ```

    Here, `MACRO1` is a simple text macro that expands to `(p) (p)` when used.
    `MACRO2(p)` is a function-like macro that takes a parameter and expands to
    `(p)` with `p` replaced by the parameter.

3.  Programmers often write "doc comments" above their functions and types. A
    documentation generator or an IDE that shows help text for declarations
    needs access to those comments, so a scanner for those should include them.

    An automated code formatter obviously needs to preserve comments and may
    want to be aware of the original whitespace if some of the author's
    formatting should be preserved.

4.  You can see where I've implemented them for a similar language here:

    https://github.com/munificent/wren/blob/c6eb0be99014d34085e2d24c696aed449e2fb171/src/vm/wren_compiler.c#L663

    The interesting part is the `nesting` variable. Like challenge #1, we
    require some extra state to track the nesting, which makes this not quite
    regular.

    Note also that we need to handle an unterminated block comment.


================================================
FILE: note/answers/chapter05_representing.md
================================================
1.  There are a few ways to do it. Here is one:

    ```text
    expr → expr calls
    expr → IDENTIFIER
    expr → NUMBER

    calls → calls call
    calls → call

    call → "(" ")"
    call → "(" arguments ")"
    call → "." IDENTIFIER

    arguments → expr
    arguments → arguments "," expr
    ```

    It's the syntax for a function invocation.

2.  One way is to create a record or tuple containing a function pointer for
    each operation. In order to allow defining new types and passing them to
    existing code, these functions need to encapsulate the type entirely -- the
    existing code isn't aware of it, so it can't type check. You can do that by
    having the functions be closures that all close over the same shared object,
    "this", basically.

3.  Here you go:

    ```java
    class RpnPrinter implements Expr.Visitor<String> {
      String print(Expr expr) {
        return expr.accept(this);
      }

      @Override
      public String visitBinaryExpr(Expr.Binary expr) {
        return expr.left.accept(this) + " " +
               expr.right.accept(this) + " " +
               expr.operator.lexeme;
      }

      @Override
      public String visitGroupingExpr(Expr.Grouping expr) {
        return expr.expression.accept(this);
      }

      @Override
      public String visitLiteralExpr(Expr.Literal expr) {
        return expr.value.toString();
      }

      @Override
      public String visitUnaryExpr(Expr.Unary expr) {
        String operator = expr.operator.lexeme;
        if (expr.operator.type == TokenType.MINUS) {
          // Can't use same symbol for unary and binary.
          operator = "~";
        }

        return expr.right.accept(this) + " " + operator;
      }

      public static void main(String[] args) {
        Expr expression = new Expr.Binary(
            new Expr.Unary(
                new Token(TokenType.MINUS, "-", null, 1),
                new Expr.Literal(123)),
            new Token(TokenType.STAR, "*", null, 1),
            new Expr.Grouping(
                new Expr.Literal("str")));

        System.out.println(new RpnPrinter().print(expression));
      }
    }
    ```

    Note that we have to handle unary "-" specially. In RPN, we can't use the
    same symbol for both unary and binary forms. When we encounter it, we
    wouldn't know whether to pop one or two numbers off the stack. So, to
    disambiguate, we pick a different symbol for negation.


================================================
FILE: note/answers/chapter06_parsing.md
================================================
1.  The comma operator has the lowest precedence, so it goes between expression
    and equality:

    ```ebnf
    expression → comma ;
    comma      → equality ( "," equality )* ;
    equality   → comparison ( ( "!=" | "==" ) comparison )* ;
    comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
    term       → factor ( ( "-" | "+" ) factor )* ;
    factor     → unary ( ( "/" | "*" ) unary )* ;
    unary      → ( "!" | "-" | "--" | "++" ) unary
               | postfix ;
    postfix    → primary ( "--" | ++" )* ;
    primary    → NUMBER | STRING | "true" | "false" | "nil"
               | "(" expression ")" ;
    ```

    We could define a new syntax tree node by adding this to the `defineAst()`
    call:

    ```java
    "Comma    : Expr left, Expr right",
    ```

    But a simpler choice is to treat it like any other binary operator and
    reuse Expr.Binary.

    Parsing is similar to other infix operators (except that we don't bother to
    keep the operator token):

    ```java
    private Expr expression() {
      return comma();
    }

    private Expr comma() {
      Expr expr = equality();

      while (match(COMMA)) {
        Token operator = previous();
        Expr right = equality();
        expr = new Expr.Binary(expr, operator, right);
      }

      return expr;
    }
    ```

    Keep in mind that commas are already used in the grammar to separate
    arguments in function calls. With the above change, this:

    ```lox
    foo(1, 2)
    ```

    Now gets parsed as:

    ```lox
    foo((1, 2))
    ```

    In other words, pass a single argument to `foo`, the result of evaluating
    `1, 2`. That's not what we want. To fix that, we simply change the way we
    parse function arguments to require a higher precedence expression than the
    comma operator:

    ```java
    if (!check(RIGHT_PAREN)) {
      do {
        if (arguments.size() >= 8) {
          error(peek(), "Can't have more than 8 arguments.");
        }
        arguments.add(equality()); // <-- was expression().
      } while (match(COMMA));
    }
    ```

2.  We just need one new rule.

    ```ebnf
    expression  → conditional ;
    conditional → equality ( "?" expression ":" conditional )? ;
    // Other rules...
    ```

    The precedence of the operands is pretty interesting. The left operand has
    higher precedence than the others, and the middle operand has lower
    precedence than the condition expression itself. That allows:

        a ? b = c : d

    Again, I won't bother showing the scanner and token changes since they're
    pretty obvious.

    ```java
    private Expr expression() {
      return conditional();
    }

    private Expr conditional() {
      Expr expr = equality();

      if (match(QUESTION)) {
        Expr thenBranch = expression();
        consume(COLON,
            "Expect ':' after then branch of conditional expression.");
        Expr elseBranch = conditional();
        expr = new Expr.Conditional(expr, thenBranch, elseBranch);
      }

      return expr;
    }
    ```

3.  Here's an updated grammar. The grammar itself doesn't "know" that some of
    these productions are errors. The parser handles that.

    ```ebnf
    expression → equality ;
    equality   → comparison ( ( "!=" | "==" ) comparison )* ;
    comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
    term       → factor ( ( "-" | "+" ) factor )* ;
    factor     → unary ( ( "/" | "*" ) unary )* ;
    unary      → ( "!" | "-" | "--" | "++" ) unary
               | postfix ;
    postfix    → primary ( "--" | ++" )* ;
    primary    → NUMBER | STRING | "true" | "false" | "nil"
               | "(" expression ")"
               // Error productions...
               | ( "!=" | "==" ) equality
               | ( ">" | ">=" | "<" | "<=" ) comparison
               | ( "+" ) term
               | ( "/" | "*" ) factor ;
    ```

    Note that "-" isn't an error production because that *is* a valid prefix
    expression.

    With the normal infix productions, the operand non-terminals are one
    precedence level higher than the operator's own precedence. In order to
    handle a series of operators of the same precedence, the rules explicitly
    allow repetition.

    With the error productions, though, the right-hand operand rule is the same
    precedence level. That will effectively strip off the erroneous leading
    operator and then consume a series of infix uses of operators at the same
    level by reusing the existing correct rule. For example:

    ```lox
    + a - b + c - d
    ```

    The error production for `+` will match the leading `+` and then use
    `term` to also match the rest of the expression.

    ```java
    private Expr primary() {
      if (match(FALSE)) return new Expr.Literal(false);
      if (match(TRUE)) return new Expr.Literal(true);
      if (match(NIL)) return new Expr.Literal(null);

      if (match(NUMBER, STRING)) {
        return new Expr.Literal(previous().literal);
      }

      if (match(LEFT_PAREN)) {
        Expr expr = expression();
        consume(RIGHT_PAREN, "Expect ')' after expression.");
        return new Expr.Grouping(expr);
      }

      // Error productions.
      if (match(BANG_EQUAL, EQUAL_EQUAL)) {
        error(previous(), "Missing left-hand operand.");
        equality();
        return null;
      }

      if (match(GREATER, GREATER_EQUAL, LESS, LESS_EQUAL)) {
        error(previous(), "Missing left-hand operand.");
        comparison();
        return null;
      }

      if (match(PLUS)) {
        error(previous(), "Missing left-hand operand.");
        term();
        return null;
      }

      if (match(SLASH, STAR)) {
        error(previous(), "Missing left-hand operand.");
        factor();
        return null;
      }

      throw error(peek(), "Expect expression.");
    }
    ```


================================================
FILE: note/answers/chapter07_evaluating.md
================================================
1.  Python 3 allows comparing all of the various number types with each other,
    except for complex numbers. Booleans (True and False) are a subclass of
    int and work like 1 and 0 for comparison.

    Strings can be compared with each other and are ordered lexicographically.
    Likewise other sequences.

    Comparing sets is defined in terms of subsets and supersets, so that, for
    example `{1, 2} < {1, 2, 3}`. This isn't a total order since many pairs of
    sets are neither subsets nor supersets of each other.

    I think it would be reasonable to extend Lox to support comparing strings
    with each other. I wouldn't support comparing other built in types, nor
    mixing them. Allowing `"1" < 2` is a recipe for confusion.

2.  Replace the Token.PLUS case with:

    ```java
    case PLUS:
      if (left instanceof String || right instanceof String) {
        return stringify(left) + stringify(right);
      }

      if (left instanceof Double && right instanceof Double) {
        return (double)left + (double)right;
      }

      throw new RuntimeError(expr.operator,
          "Operands must be two numbers or two strings.");
      ```

3.  It returns Infinity, -Infinity, or NaN based on sign of the dividend. Given
    that Lox is a high level scripting language, I think it would be better to
    raise a runtime error to let the user know something got weird. That's what
    Python and Ruby do.

    On the other hand, given that Lox gives the user no way to catch and
    handle runtime errors, not throwing one might be more flexible.


================================================
FILE: note/answers/chapter08_statements.md
================================================
1.  It can be hard to do this in a clean way since the expression grammar
    overlaps the statement grammar so much (every expression is also the
    beginning of an expression statement containing that same expression).

    One trick some parsers use is to simply *try* to parse the syntax as a
    statement. If that fails, hide any parse errors and then try to parse it
    again as expression.

    I took a slightly different approach. Instead, the parser tries to parse a
    list of statements, but if it knows it's allowed to parse a single
    expression, and it reaches the end of the source right after parsing the
    expression part of an expression statement, then it stops early and returns
    that expression.

    All that's left is to see if the parsed value is an expression and, if so,
    evaluate it and print it.

    This isn't the cleanest implementation, but here goes. In Parser, add two
    new fields:

    ```java
    private boolean allowExpression;
    private boolean foundExpression = false;
    ```

    Then define this method:

    ```java
    Object parseRepl() {
      allowExpression = true;
      List<Stmt> statements = new ArrayList<>();
      while (!isAtEnd()) {
        statements.add(declaration());

        if (foundExpression) {
          Stmt last = statements.get(statements.size() - 1);
          return ((Stmt.Expression) last).expression;
        }

        allowExpression = false;
      }

      return statements;
    }
    ```

    And change expressionStatement() to:

    ```java
    private Stmt expressionStatement() {
      Expr expr = expression();

      if (allowExpression && isAtEnd()) {
        foundExpression = true;
      } else {
        consume(SEMICOLON, "Expect ';' after expression.");
      }
      return new Stmt.Expression(expr);
    }
    ```

    In Interpreter, add:

    ```java
    String interpret(Expr expression) {
      try {
        Object value = evaluate(expression);
        return stringify(value);
      } catch (RuntimeError error) {
        Lox.runtimeError(error);
        return null;
      }
    }
    ```

    Finally, in Lox, change runPrompt() to:

    ```java
    private static void runPrompt() throws IOException {
      InputStreamReader input = new InputStreamReader(System.in);
      BufferedReader reader = new BufferedReader(input);

      for (;;) {
        hadError = false;

        System.out.print("> ");
        Scanner scanner = new Scanner(reader.readLine());
        List<Token> tokens = scanner.scanTokens();

        Parser parser = new Parser(tokens);
        Object syntax = parser.parseRepl();

        // Ignore it if there was a syntax error.
        if (hadError) continue;

        if (syntax instanceof List) {
          interpreter.interpret((List<Stmt>)syntax);
        } else if (syntax instanceof Expr) {
          String result = interpreter.interpret((Expr)syntax);
          if (result != null) {
            System.out.println("= " + result);
          }
        }
      }
    }
    ```

    That should about do it.

2.  This is pretty simple. Instead of initializing variables with null if they
    have no initializer, we use a special sentinel value to distinguish it from
    Lox's nil. Then, we check for that when the variable is accessed.

    In Interpreter, add:

    ```java
    private static Object uninitialized = new Object();
    ```

    Change the first line of visitVarStmt() to:

    ```java
    Object value = uninitialized;
    ```

    Finally, change visitVariableExpr() to:

    ```java
    public Object visitVariableExpr(Expr.Variable expr) {
      Object value = environment.get(expr.name);
      if (value == uninitialized) {
        throw new RuntimeError(expr.name,
            "Variable must be initialized before use.");
      }
      return value;
    }
    ```

    The main downside is that checking for the uninitialized variable on every
    single access significantly slows execution for what is a very common
    operation. Not a big deal given that our Java interpreter isn't designed
    for speed anyway.

3.  > What does the following program do?

    It prints 3. The shadowed variable doesn't come into scope until *after* its
    initializer expression is evaluated, so `a + 2` is evaluated using the
    outer `a`, whose value is 1. Then the result is stored in the new `a`.

    > What did you expect it to do?

    Well, I wrote this book, so it's no surprise to me.

    > Is it what you think it should do?

    Code like this is rare in practice, so I don't care too much. But the
    current behavior is a little surprising. People read code left-to-right, so
    they probably expect the new variable to be in scope as soon as they scan
    over its name after `var`.

    Ideally, I'd make this kind of code a static error. Put the variable in
    scope as soon as its name is encountered but in a special "unusable" state.
    Then, once its initializer is done, make it available. If the initializer
    references it, make that a static error.

    > What does analogous code in other languages you are familiar with do?

    Java disallows shadowing local variables. C# allows shadowing, but doesn't
    allow multiple mentions of the same name in the same block to resolve to
    different variables.

    > What do you think users will expect this to do?

    I think they'd be surprised if the code was valid at all, and would
    probably consider it bad code even if it did do something.


================================================
FILE: note/answers/chapter09_control.md
================================================
1.  The basic idea is that the control flow operations become methods that take
    callbacks for the blocks to execute when true or false. You define two
    classes with singleton instances, one for true and one for false. The
    implementations of the control flow methods on the true class invoke the
    then callbacks. The ones on the false class implement the else callbacks.

    Like so:

    ```lox
    class True {
      ifThen(thenBranch) {
        return thenBranch();
      }

      ifThenElse(thenBranch, elseBranch) {
        return thenBranch();
      }
    }

    class False {
      ifThen(thenBranch) {
        return nil;
      }

      ifThenElse(thenBranch, elseBranch) {
        return elseBranch();
      }
    }
    ```

    Then we make singleton instances of these classes:

    ```lox
    var t = True();
    var f = False();
    ```

    You can try them out like so:

    ```lox
    fun test(condition) {
      fun ifThenFn() {
        print "if then -> then";
      }

      condition.ifThen(ifThenFn);

      fun ifThenElseThenFn() {
        print "if then else -> then";
      }

      fun ifThenElseElseFn() {
        print "if then else -> else";
      }

      condition.ifThenElse(ifThenElseThenFn, ifThenElseElseFn);
    }

    test(t);
    test(f);
    ```

    This is famously how Smalltalk implements its control flow.

    It looks cumbersome because Lox doesn't have lambdas -- anonymous function
    expressions -- but those would be easy to add to the language if
    we wanted to go in this direction.

    Even more powerful would a nice terse syntax for defining and passing a
    closure to a method. The Grace language has a particularly nice notation
    for passing multiple blocks to a method. If we adapted that to Lox, we'd
    get something like:

    ```text
    fun test(condition) {
      condition.ifThen {
        print "if then -> then";
      };

      condition.ifThen {
        print "if then else -> then";
      } else {
        print "if then else -> else";
      };
    }

    test(t);
    test(f);
    ```

    It starts to look like this control flow is built into the language even
    though it's only method calls.

2.  Scheme is the language that famously shows that all iteration can be
    represented in terms of recursion and conditional execution. To execute a
    chunk of code more than once, hoist it out into a function that calls itself
    at the end of its body for the next iteration.

    For example, we could represent this `for` loop:

    ```lox
    for (var i = 0; i < 100; i = i + 1) {
      print i;
    }
    ```

    Like so:

    ```lox
    fun forStep(i) {
      print i;
      if (i < 99) forStep(i + 1);
    }
    ```

    When you see heavy use of recursion like here where there are almost a
    hundred recursive calls, the concern is overflowing the stack. However, in
    many cases, you don't need to preserve any information from the previous
    call when beginning a recursive call. If the recursive call is in *tail
    position* -- it's the last thing in the body of the function -- then you
    can discard any stack space used by the previous call before beginning the
    next one.

    This **tail call optimization** lets you use recursion for an unbounded
    number of iterations while consuming only a constant amount of stack space.
    Scheme and some other functional languages require an implementation to
    perform this optimization so that users can safely rely on recursion for
    iteration.

3.  As usual, we start with the AST:

    ```java
    defineAst(outputDir, "Stmt", Arrays.asList(
      "Block      : List<Stmt> statements",
      "Break      : ",  // <--
      "Expression : Expr expression",
      "If         : Expr condition, Stmt thenBranch, Stmt elseBranch",
      "Print      : Expr expression",
      "Var        : Token name, Expr initializer",
      "While      : Expr condition, Stmt body"
    ));
    ```

    Break doesn't have any fields, which actually breaks the little generator
    script, so you also need to change defineType() to:

    ```java
    // Store parameters in fields.
    String[] fields;
    if (fieldList.isEmpty()) {
      fields = new String[0];
    } else {
      fields = fieldList.split(", ");
    }
    ```

    Run that to get the new AST class. Now we need to push the syntax through the
    front end, starting with the new keyword. In TokenType, add `BREAK`:

    ```java
    // Keywords.
    AND, BREAK, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
    ```

    And then define it in the lexer:

    ```java
    keywords.put("break",  BREAK);
    ```

    In the parser, we match the keyword in `statement()`:

    ```java
    if (match(BREAK)) return breakStatement();
    ```

    Which calls:

    ```java
    private Stmt breakStatement() {
      consume(SEMICOLON, "Expect ';' after 'break'.");
      return new Stmt.Break();
    }
    ```

    We need some additional parser support. It should be a syntax error to use
    `break` outside of a loop. We do that by adding a field in Parser to track
    how many enclosing loops there currently are:

    ```java
    private int loopDepth = 0;
    ```

    In `forStatement()`, we update that when parsing the loop body:

    ```java
    try {
      loopDepth++;
      Stmt body = statement();

      if (increment != null) {
        body = new Stmt.Block(Arrays.asList(
            body,
            new Stmt.Expression(increment)));
      }

      if (condition == null) condition = new Expr.Literal(true);
      body = new Stmt.While(condition, body);

      if (initializer != null) {
        body = new Stmt.Block(Arrays.asList(initializer, body));
      }

      return body;
    } finally {
      loopDepth--;
    }
    ```

    Likewise `whileStatement()`:

    ```java
    try {
      loopDepth++;
      Stmt body = statement();

      return new Stmt.While(condition, body);
    } finally {
      loopDepth--;
    }
    ```

    Now we can check that when parsing the `break` statement:

    ```java
    private Stmt breakStatement() {
      if (loopDepth == 0) {
        error(previous(), "Must be inside a loop to use 'break'.");
      }
      consume(SEMICOLON, "Expect ';' after 'break'.");
      return new Stmt.Break();
    }
    ```

    To interpret this, we'll use exceptions to jump from the break out of the
    loop. In Interpreter, define a class:

    ```java
    private static class BreakException extends RuntimeException {}
    ```

    Executing a `break` simply throws that:

    ```java
    @Override
    public Void visitBreakStmt(Stmt.Break stmt) {
      throw new BreakException();
    }
    ```

    That gets caught by the `while` loop code and then proceeds from there.

    ```java
    @Override
    public Void visitWhileStmt(Stmt.While stmt) {
      try {
        while (isTruthy(evaluate(stmt.condition))) {
          execute(stmt.body);
        }
      } catch (BreakException ex) {
        // Do nothing.
      }
      return null;
    }
    ```


================================================
FILE: note/answers/chapter10_functions.md
================================================
1.  Smalltalk has different call syntax for different arities. To define a
    method that takes multiple arguments, you use **keyword selectors**. Each
    argument has a piece of the method name preceding instead of using commas
    as a separator. For example, a method like:

    ```lox
    list.insert("element", 2)
    ```

    To insert "element" as index 2 would look like this in Smalltalk:

    ```smalltalk
    list insert: "element" at: 2
    ```

    Smalltalk doesn't use a dot to separate method name from receiver. More
    interestingly, the "insert:" and "at:" parts both form a single method
    call whose full name is "insert:at:". Since the selectors and the colons
    that separate them form part of the method's name, there's no way to call
    it with the wrong number of arguments. You can't pass too many or two few
    arguments to "insert:at:" because there would be no way to write that call
    while still actually naming that method.

2.  This requires juggling some code around. In GenerateAst, we need a node
    for function expressions. In the defineAst() call for Expr, add:

    ```java
    "Function : List<Token> parameters, List<Stmt> body",
    ```

    While we're at it, we can reuse that for function statements. A function
    *statement* is now just a name and a function expression:

    ```java
    "Function   : Token name, Expr.Function function",
    ```

    Over in LoxFunction, it will store an Expr.Function instead of a statement
    to handle both types. If the function does have a name, that is tracked
    separately, since lambdas won't have one:

    ```java
    class LoxFunction implements Callable {
      private final String name;
      private final Expr.Function declaration;
      private final Environment closure;

      LoxFunction(String name, Expr.Function declaration, Environment closure) {
        this.name = name;
        this.closure = closure;
        this.declaration = declaration;
      }
      @Override
      public String toString() {
        if (name == null) return "<fn>";
        return "<fn " + name + ">";
      }

      // ...
    }
    ```

    The parser changes are a little more complex. We move the logic to handle
    anonymous functions into a new method. Then the method to handle named
    functions becomes wrapper around that one:

    ```java
    private Stmt.Function function(String kind) {
      Token name = consume(IDENTIFIER, "Expect " + kind + " name.");
      return new Stmt.Function(name, functionBody(kind));
    }

    private Expr.Function functionBody(String kind) {
      consume(LEFT_PAREN, "Expect '(' after " + kind + " name.");
      List<Token> parameters = new ArrayList<>();
      if (!check(RIGHT_PAREN)) {
        do {
          if (parameters.size() >= 8) {
            error(peek(), "Can't have more than 8 parameters.");
          }

          parameters.add(consume(IDENTIFIER, "Expect parameter name."));
        } while (match(COMMA));
      }
      consume(RIGHT_PAREN, "Expect ')' after parameters.");

      consume(LEFT_BRACE, "Expect '{' before " + kind + " body.");
      List<Stmt> body = block();
      return new Expr.Function(parameters, body);
    }
    ```

    Now we can use `functionBody()` to parse lambdas. In `primary()`, add
    another clause:

    ```java
    if (match(FUN)) return functionBody("function");
    ```

    We've got one nasty little problem. We want lambdas to be a valid primary
    expression, and in theory any primary expression is allowed in a primary
    statement. But if you try to do:

    ```lox
    fun () {};
    ```

    Then the `declaration()` parser will match that `fun` and try to parse it
    as a named function declaration statement. It won't see a name and will
    report a parse error. Even though the above code is pointless, we want it
    to work to avoid a weird edge case in the grammar.

    To handle that, we only want to parse a function declaration if the current
    token is `fun` and the one past that is an identifier. That requires another
    token of lookahead, as we add:

    ```java
    private boolean checkNext(TokenType tokenType) {
      if (isAtEnd()) return false;
      if (tokens.get(current + 1).type == EOF) return false;
      return tokens.get(current + 1).type == tokenType;
    }
    ```

    Then, in `declaration()`, change the `match(FUN)) ...` line to:

    ```java
    if (check(FUN) && checkNext(IDENTIFIER)) {
      consume(FUN, null);
      return function("function");
    }
    ```

    Now only a function with a name is parsed as such.
    
    Then our interpreter needs to handle both cases:


    ```java
  
    @Override
    public Void visitFunctionStmt(Stmt.Function stmt) {
        String fnName = stmt.name.lexeme;
        environment.define(fnName, new LoxFunction(fnName, stmt.function, environment));
        return null;
    }

    @Override
    public Object visitFunctionExpr(Expr.Function expr) {
        return new LoxFunction(null, expr, environment);
    }
    ```

    We could have re-used visitFunctionExpr but that would lose the function name if someone were to print it, this ensures we preserve it.
    ```lox
    fun whichFn(fn) {
      print fn;
    }

    whichFn(fun (b) {
     print b;
    });

    fun named(a) { print a; }
    whichFn(named);
    //
    // <fn>
    // <fn named>
    ```

3.  No, it isn't. Lox uses the same scope for the parameters and local variables
    immediately inside the body. That's why Stmt.Function stores the body as a
    list of statements, not a single Stmt.Block that would create its own
    nested scope separate from the parameters.

    In Java, it's an error because you aren't allowed to shadow local variables
    inside a method or collide them.

    It's an error in C because parameters and locals share the same scope.

    It is allowed in Dart. There, parameters are in a separate scope surrounding
    the function body.

    I'm not a fan of Dart's choice. I think shadowing should be allowed in
    general because it helps ensure changes to code are encapsulated and don't
    affect parts of the program unrelated to the change. (See this design note
    for more: http://craftinginterpreters.com/statements-and-state.html#design-note).

    But shadowing still usually leads to more confusing code, so it should be
    avoided when possible. The only thing putting parameters in an outer scope
    allows is shadowing those parameters, but I think any code that did that
    would be *very* hard to read. I would rather prohibit that outright.


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/AstPrinter.java
================================================
package com.craftinginterpreters.lox;

// Creates an unambiguous, if ugly, string representation of AST nodes.
class AstPrinter implements Expr.Visitor<String>, Stmt.Visitor<String> {
  String print(Expr expr) {
    return expr.accept(this);
  }

  String print(Stmt stmt) {
    return stmt.accept(this);
  }
  @Override
  public String visitBlockStmt(Stmt.Block stmt) {
    StringBuilder builder = new StringBuilder();
    builder.append("(block ");

    for (Stmt statement : stmt.statements) {
      builder.append(statement.accept(this));
    }

    builder.append(")");
    return builder.toString();
  }

  @Override
  public String visitExpressionStmt(Stmt.Expression stmt) {
    return parenthesize(";", stmt.expression);
  }

  @Override
  public String visitFunctionStmt(Stmt.Function stmt) {
    StringBuilder builder = new StringBuilder();
    builder.append("(fun " + stmt.name.lexeme + "(");

    for (Token param : stmt.parameters) {
      if (param != stmt.parameters.get(0)) builder.append(" ");
      builder.append(param.lexeme);
    }

    builder.append(") ");

    for (Stmt body : stmt.body) {
      builder.append(body.accept(this));
    }

    builder.append(")");
    return builder.toString();
  }

  @Override
  public String visitIfStmt(Stmt.If stmt) {
    if (stmt.elseBranch == null) {
      return parenthesize2("if", stmt.condition, stmt.thenBranch);
    }

    return parenthesize2("if-else", stmt.condition, stmt.thenBranch,
        stmt.elseBranch);
  }

  @Override
  public String visitPrintStmt(Stmt.Print stmt) {
    return parenthesize("print", stmt.expression);
  }

  @Override
  public String visitReturnStmt(Stmt.Return stmt) {
    if (stmt.value == null) return "(return)";
    return parenthesize("return", stmt.value);
  }

  @Override
  public String visitVarStmt(Stmt.Var stmt) {
    if (stmt.initializer == null) {
      return parenthesize2("var", stmt.name);
    }

    return parenthesize2("var", stmt.name, "=", stmt.initializer);
  }

  @Override
  public String visitWhileStmt(Stmt.While stmt) {
    return parenthesize2("while", stmt.condition, stmt.body);
  }

  @Override
  public String visitAssignExpr(Expr.Assign expr) {
    return parenthesize2("=", expr.name.lexeme, expr.value);
  }

  @Override
  public String visitBinaryExpr(Expr.Binary expr) {
    return parenthesize(expr.operator.lexeme, expr.left, expr.right);
  }

  @Override
  public String visitCallExpr(Expr.Call expr) {
    return parenthesize2("call", expr.callee, expr.arguments);
  }

  @Override
  public String visitGroupingExpr(Expr.Grouping expr) {
    return parenthesize("group", expr.expression);
  }

  @Override
  public String visitLiteralExpr(Expr.Literal expr) {
    if (expr.value == null) return "nil";
    return expr.value.toString();
  }

  @Override
  public String visitLogicalExpr(Expr.Logical expr) {
    return parenthesize(expr.operator.lexeme, expr.left, expr.right);
  }

  @Override
  public String visitUnaryExpr(Expr.Unary expr) {
    return parenthesize(expr.operator.lexeme, expr.right);
  }

  @Override
  public String visitVariableExpr(Expr.Variable expr) {
    return expr.name.lexeme;
  }
  private String parenthesize(String name, Expr... exprs) {
    StringBuilder builder = new StringBuilder();

    builder.append("(").append(name);
    for (Expr expr : exprs) {
      builder.append(" ");
      builder.append(expr.accept(this));
    }
    builder.append(")");

    return builder.toString();
  }
  // Note: AstPrinting other types of syntax trees is not shown in the
  // book, but this is provided here as a reference for those reading
  // the full code.
  private String parenthesize2(String name, Object... parts) {
    StringBuilder builder = new StringBuilder();

    builder.append("(").append(name);

    for (Object part : parts) {
      builder.append(" ");

      if (part instanceof Expr) {
        builder.append(((Expr)part).accept(this));
      } else if (part instanceof Stmt) {
        builder.append(((Stmt) part).accept(this));
      } else if (part instanceof Token) {
        builder.append(((Token) part).lexeme);
      } else {
        builder.append(part);
      }
    }
    builder.append(")");

    return builder.toString();
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Environment.java
================================================
package com.craftinginterpreters.lox;

import java.util.ArrayList;
import java.util.List;

class Environment {
  final Environment enclosing;
  private final List<Object> values = new ArrayList<>();

  Environment() {
    enclosing = null;
  }

  Environment(Environment enclosing) {
    this.enclosing = enclosing;
  }

  void define(Object value) {
    values.add(value);
  }

  Object getAt(int distance, int slot) {
    Environment environment = this;
    for (int i = 0; i < distance; i++) {
      environment = environment.enclosing;
    }

    return environment.values.get(slot);
  }

  void assignAt(int distance, int slot, Object value) {
    Environment environment = this;
    for (int i = 0; i < distance; i++) {
      environment = environment.enclosing;
    }

    environment.values.set(slot, value);
  }
  @Override
  public String toString() {
    String result = values.toString();
    if (enclosing != null) {
      result += " -> " + enclosing.toString();
    }

    return result;
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Expr.java
================================================
package com.craftinginterpreters.lox;

import java.util.List;

abstract class Expr {
  interface Visitor<R> {
    R visitAssignExpr(Assign expr);
    R visitBinaryExpr(Binary expr);
    R visitCallExpr(Call expr);
    R visitGroupingExpr(Grouping expr);
    R visitLiteralExpr(Literal expr);
    R visitLogicalExpr(Logical expr);
    R visitUnaryExpr(Unary expr);
    R visitVariableExpr(Variable expr);
  }

  static class Assign extends Expr {
    Assign(Token name, Expr value) {
      this.name = name;
      this.value = value;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitAssignExpr(this);
    }

    final Token name;
    final Expr value;
  }

  static class Binary extends Expr {
    Binary(Expr left, Token operator, Expr right) {
      this.left = left;
      this.operator = operator;
      this.right = right;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitBinaryExpr(this);
    }

    final Expr left;
    final Token operator;
    final Expr right;
  }

  static class Call extends Expr {
    Call(Expr callee, Token paren, List<Expr> arguments) {
      this.callee = callee;
      this.paren = paren;
      this.arguments = arguments;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitCallExpr(this);
    }

    final Expr callee;
    final Token paren;
    final List<Expr> arguments;
  }

  static class Grouping extends Expr {
    Grouping(Expr expression) {
      this.expression = expression;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitGroupingExpr(this);
    }

    final Expr expression;
  }

  static class Literal extends Expr {
    Literal(Object value) {
      this.value = value;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitLiteralExpr(this);
    }

    final Object value;
  }

  static class Logical extends Expr {
    Logical(Expr left, Token operator, Expr right) {
      this.left = left;
      this.operator = operator;
      this.right = right;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitLogicalExpr(this);
    }

    final Expr left;
    final Token operator;
    final Expr right;
  }

  static class Unary extends Expr {
    Unary(Token operator, Expr right) {
      this.operator = operator;
      this.right = right;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitUnaryExpr(this);
    }

    final Token operator;
    final Expr right;
  }

  static class Variable extends Expr {
    Variable(Token name) {
      this.name = name;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitVariableExpr(this);
    }

    final Token name;
  }

  abstract <R> R accept(Visitor<R> visitor);
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Interpreter.java
================================================
package com.craftinginterpreters.lox;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

class Interpreter implements Expr.Visitor<Object>, Stmt.Visitor<Void> {
  final Map<String, Object> globals = new HashMap<>();
  private Environment environment;
  private final Map<Expr, Integer> locals = new HashMap<>();
  private final Map<Expr, Integer> slots = new HashMap<>();

  Interpreter() {
    globals.put("clock", new LoxCallable() {
      @Override
      public int arity() {
        return 0;
      }

      @Override
      public Object call(Interpreter interpreter,
                         List<Object> arguments) {
        return (double)System.currentTimeMillis() / 1000.0;
      }
    });
  }
  void interpret(List<Stmt> statements) {
    try {
      for (Stmt statement : statements) {
        execute(statement);
      }
    } catch (RuntimeError error) {
      Lox.runtimeError(error);
    }
  }
  private Object evaluate(Expr expr) {
    return expr.accept(this);
  }
  private void execute(Stmt stmt) {
    stmt.accept(this);
  }
  void resolve(Expr expr, int depth, int slot) {
    locals.put(expr, depth);
    slots.put(expr, slot);
  }
  void executeBlock(List<Stmt> statements, Environment environment) {
    Environment previous = this.environment;
    try {
      this.environment = environment;

      for (Stmt statement : statements) {
        execute(statement);
      }
    } finally {
      this.environment = previous;
    }
  }
  @Override
  public Void visitBlockStmt(Stmt.Block stmt) {
    executeBlock(stmt.statements, new Environment(environment));
    return null;
  }
  @Override
  public Void visitExpressionStmt(Stmt.Expression stmt) {
    evaluate(stmt.expression);
    return null; // [void]
  }
  @Override
  public Void visitFunctionStmt(Stmt.Function stmt) {
    LoxFunction function = new LoxFunction(stmt, environment);
    define(stmt.name, function);
    return null;
  }
  @Override
  public Void visitIfStmt(Stmt.If stmt) {
    if (isTruthy(evaluate(stmt.condition))) {
      execute(stmt.thenBranch);
    } else if (stmt.elseBranch != null) {
      execute(stmt.elseBranch);
    }
    return null;
  }
  @Override
  public Void visitPrintStmt(Stmt.Print stmt) {
    Object value = evaluate(stmt.expression);
    System.out.println(stringify(value));
    return null;
  }
  @Override
  public Void visitReturnStmt(Stmt.Return stmt) {
    Object value = null;
    if (stmt.value != null) value = evaluate(stmt.value);

    throw new Return(value);
  }
  @Override
  public Void visitVarStmt(Stmt.Var stmt) {
    Object value = null;
    if (stmt.initializer != null) {
      value = evaluate(stmt.initializer);
    }

    define(stmt.name, value);
    return null;
  }
  @Override
  public Void visitWhileStmt(Stmt.While stmt) {
    while (isTruthy(evaluate(stmt.condition))) {
      execute(stmt.body);
    }
    return null;
  }
  @Override
  public Object visitAssignExpr(Expr.Assign expr) {
    Object value = evaluate(expr.value);

    Integer distance = locals.get(expr);
    if (distance != null) {
      environment.assignAt(distance, slots.get(expr), value);
    } else {
      if (globals.containsKey(expr.name.lexeme)) {
        globals.put(expr.name.lexeme, value);
      } else {
        throw new RuntimeError(expr.name,
            "Undefined variable '" + expr.name.lexeme + "'.");
      }
    }

    return value;
  }
  @Override
  public Object visitBinaryExpr(Expr.Binary expr) {
    Object left = evaluate(expr.left);
    Object right = evaluate(expr.right); // [left]

    switch (expr.operator.type) {
      case BANG_EQUAL: return !isEqual(left, right);
      case EQUAL_EQUAL: return isEqual(left, right);
      case GREATER:
        checkNumberOperands(expr.operator, left, right);
        return (double)left > (double)right;
      case GREATER_EQUAL:
        checkNumberOperands(expr.operator, left, right);
        return (double)left >= (double)right;
      case LESS:
        checkNumberOperands(expr.operator, left, right);
        return (double)left < (double)right;
      case LESS_EQUAL:
        checkNumberOperands(expr.operator, left, right);
        return (double)left <= (double)right;
      case MINUS:
        checkNumberOperands(expr.operator, left, right);
        return (double)left - (double)right;
      case PLUS:
        if (left instanceof Double && right instanceof Double) {
          return (double)left + (double)right;
        } // [plus]

        if (left instanceof String && right instanceof String) {
          return (String)left + (String)right;
        }

        throw new RuntimeError(expr.operator,
            "Operands must be two numbers or two strings.");
      case SLASH:
        checkNumberOperands(expr.operator, left, right);
        return (double)left / (double)right;
      case STAR:
        checkNumberOperands(expr.operator, left, right);
        return (double)left * (double)right;
    }

    // Unreachable.
    return null;
  }
  @Override
  public Object visitCallExpr(Expr.Call expr) {
    Object callee = evaluate(expr.callee);

    List<Object> arguments = new ArrayList<>();
    for (Expr argument : expr.arguments) { // [in-order]
      arguments.add(evaluate(argument));
    }

    if (!(callee instanceof LoxCallable)) {
      throw new RuntimeError(expr.paren,
          "Can only call functions and classes.");
    }

    LoxCallable function = (LoxCallable)callee;
   if (arguments.size() != function.arity()) {
      throw new RuntimeError(expr.paren, "Expected " +
          function.arity() + " arguments but got " +
          arguments.size() + ".");
    }

    return function.call(this, arguments);
  }
  @Override
  public Object visitGroupingExpr(Expr.Grouping expr) {
    return evaluate(expr.expression);
  }
  @Override
  public Object visitLiteralExpr(Expr.Literal expr) {
    return expr.value;
  }
  @Override
  public Object visitLogicalExpr(Expr.Logical expr) {
    Object left = evaluate(expr.left);

    if (expr.operator.type == TokenType.OR) {
      if (isTruthy(left)) return left;
    } else {
      if (!isTruthy(left)) return left;
    }

    return evaluate(expr.right);
  }
  @Override
  public Object visitUnaryExpr(Expr.Unary expr) {
    Object right = evaluate(expr.right);

    switch (expr.operator.type) {
      case BANG:
        return !isTruthy(right);
      case MINUS:
        checkNumberOperand(expr.operator, right);
        return -(double)right;
    }

    // Unreachable.
    return null;
  }
  @Override
  public Object visitVariableExpr(Expr.Variable expr) {
    return lookUpVariable(expr.name, expr);
  }
  private Object lookUpVariable(Token name, Expr expr) {
    Integer distance = locals.get(expr);
    if (distance != null) {
      return environment.getAt(distance, slots.get(expr));
    } else {
      if (globals.containsKey(name.lexeme)) {
        return globals.get(name.lexeme);
      } else {
        throw new RuntimeError(name,
            "Undefined variable '" + name.lexeme + "'.");
      }
    }
  }
  private void checkNumberOperand(Token operator, Object operand) {
    if (operand instanceof Double) return;
    throw new RuntimeError(operator, "Operand must be a number.");
  }
  private void checkNumberOperands(Token operator,
                                   Object left, Object right) {
    if (left instanceof Double && right instanceof Double) return;
    // [operand]
    throw new RuntimeError(operator, "Operands must be numbers.");
  }
  private boolean isTruthy(Object object) {
    if (object == null) return false;
    if (object instanceof Boolean) return (boolean)object;
    return true;
  }
  private boolean isEqual(Object a, Object b) {
    // nil is only equal to nil.
    if (a == null && b == null) return true;
    if (a == null) return false;

    return a.equals(b);
  }
  private String stringify(Object object) {
    if (object == null) return "nil";

    // Hack. Work around Java adding ".0" to integer-valued doubles.
    if (object instanceof Double) {
      String text = object.toString();
      if (text.endsWith(".0")) {
        text = text.substring(0, text.length() - 2);
      }
      return text;
    }

    return object.toString();
  }
  private void define(Token name, Object value) {
    if (environment != null) {
      environment.define(value);
    } else {
      globals.put(name.lexeme, value);
    }
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Lox.java
================================================
package com.craftinginterpreters.lox;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

public class Lox {
  private static final Interpreter interpreter = new Interpreter();
  static boolean hadError = false;
  static boolean hadRuntimeError = false;

  public static void main(String[] args) throws IOException {
    if (args.length > 1) {
      System.out.println("Usage: jlox [script]");
    } else if (args.length == 1) {
      runFile(args[0]);
    } else {
      runPrompt();
    }
  }
  private static void runFile(String path) throws IOException {
    byte[] bytes = Files.readAllBytes(Paths.get(path));
    run(new String(bytes, Charset.defaultCharset()));

    // Indicate an error in the exit code.
    if (hadError) System.exit(65);
    if (hadRuntimeError) System.exit(70);
  }
  private static void runPrompt() throws IOException {
    InputStreamReader input = new InputStreamReader(System.in);
    BufferedReader reader = new BufferedReader(input);

    for (;;) { // [repl]
      System.out.print("> ");
      run(reader.readLine());
      hadError = false;
    }
  }
  private static void run(String source) {
    Scanner scanner = new Scanner(source);
    List<Token> tokens = scanner.scanTokens();
    Parser parser = new Parser(tokens);
    List<Stmt> statements = parser.parse();

    // Stop if there was a syntax error.
    if (hadError) return;

    Resolver resolver = new Resolver(interpreter);
    resolver.resolve(statements);

    // Stop if there was a resolution error.
    if (hadError) return;

    interpreter.interpret(statements);
  }
  static void error(int line, String message) {
    report(line, "", message);
  }

  static private void report(int line, String where, String message) {
    System.err.println(
        "[line " + line + "] Error" + where + ": " + message);
    hadError = true;
  }
  static void error(Token token, String message) {
    if (token.type == TokenType.EOF) {
      report(token.line, " at end", message);
    } else {
      report(token.line, " at '" + token.lexeme + "'", message);
    }
  }
  static void runtimeError(RuntimeError error) {
    System.err.println(error.getMessage() +
        "\n[line " + error.token.line + "]");
    hadRuntimeError = true;
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/LoxCallable.java
================================================
package com.craftinginterpreters.lox;

import java.util.List;

interface LoxCallable {
  int arity();
  Object call(Interpreter interpreter, List<Object> arguments);
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/LoxFunction.java
================================================
package com.craftinginterpreters.lox;

import java.util.List;

class LoxFunction implements LoxCallable {
  private final Stmt.Function declaration;
  private final Environment closure;

  LoxFunction(Stmt.Function declaration, Environment closure) {
    this.closure = closure;
    this.declaration = declaration;
  }
  @Override
  public String toString() {
    return "<fn " + declaration.name.lexeme + ">";
  }
  @Override
  public int arity() {
    return declaration.parameters.size();
  }
  @Override
  public Object call(Interpreter interpreter, List<Object> arguments) {
    Environment environment = new Environment(closure);
    for (int i = 0; i < declaration.parameters.size(); i++) {
      environment.define(arguments.get(i));
    }

    try {
      interpreter.executeBlock(declaration.body, environment);
    } catch (Return returnValue) {
      return returnValue.value;
    }
    return null;
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Parser.java
================================================
package com.craftinginterpreters.lox;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static com.craftinginterpreters.lox.TokenType.*;

class Parser {
  private static class ParseError extends RuntimeException {}

  private final List<Token> tokens;
  private int current = 0;

  Parser(List<Token> tokens) {
    this.tokens = tokens;
  }
  List<Stmt> parse() {
    List<Stmt> statements = new ArrayList<>();
    while (!isAtEnd()) {
      statements.add(declaration());
    }

    return statements;
  }
  private Expr expression() {
    return assignment();
  }
  private Stmt declaration() {
    try {
      if (match(FUN)) return function("function");
      if (match(VAR)) return varDeclaration();

      return statement();
    } catch (ParseError error) {
      synchronize();
      return null;
    }
  }
  private Stmt statement() {
    if (match(FOR)) return forStatement();
    if (match(IF)) return ifStatement();
    if (match(PRINT)) return printStatement();
    if (match(RETURN)) return returnStatement();
    if (match(WHILE)) return whileStatement();
    if (match(LEFT_BRACE)) return new Stmt.Block(block());

    return expressionStatement();
  }
  private Stmt forStatement() {
    consume(LEFT_PAREN, "Expect '(' after 'for'.");

    Stmt initializer;
    if (match(SEMICOLON)) {
      initializer = null;
    } else if (match(VAR)) {
      initializer = varDeclaration();
    } else {
      initializer = expressionStatement();
    }

    Expr condition = null;
    if (!check(SEMICOLON)) {
      condition = expression();
    }
    consume(SEMICOLON, "Expect ';' after loop condition.");

    Expr increment = null;
    if (!check(RIGHT_PAREN)) {
      increment = expression();
    }
    consume(RIGHT_PAREN, "Expect ')' after for clauses.");
    Stmt body = statement();

    if (increment != null) {
      body = new Stmt.Block(Arrays.asList(
          body,
          new Stmt.Expression(increment)));
    }

    if (condition == null) condition = new Expr.Literal(true);
    body = new Stmt.While(condition, body);

    if (initializer != null) {
      body = new Stmt.Block(Arrays.asList(initializer, body));
    }

    return body;
  }
  private Stmt ifStatement() {
    consume(LEFT_PAREN, "Expect '(' after 'if'.");
    Expr condition = expression();
    consume(RIGHT_PAREN, "Expect ')' after if condition."); // [parens]

    Stmt thenBranch = statement();
    Stmt elseBranch = null;
    if (match(ELSE)) {
      elseBranch = statement();
    }

    return new Stmt.If(condition, thenBranch, elseBranch);
  }
  private Stmt printStatement() {
    Expr value = expression();
    consume(SEMICOLON, "Expect ';' after value.");
    return new Stmt.Print(value);
  }
  private Stmt returnStatement() {
    Token keyword = previous();
    Expr value = null;
    if (!check(SEMICOLON)) {
      value = expression();
    }

    consume(SEMICOLON, "Expect ';' after return value.");
    return new Stmt.Return(keyword, value);
  }
  private Stmt varDeclaration() {
    Token name = consume(IDENTIFIER, "Expect variable name.");

    Expr initializer = null;
    if (match(EQUAL)) {
      initializer = expression();
    }

    consume(SEMICOLON, "Expect ';' after variable declaration.");
    return new Stmt.Var(name, initializer);
  }
  private Stmt whileStatement() {
    consume(LEFT_PAREN, "Expect '(' after 'while'.");
    Expr condition = expression();
    consume(RIGHT_PAREN, "Expect ')' after condition.");
    Stmt body = statement();

    return new Stmt.While(condition, body);
  }
  private Stmt expressionStatement() {
    Expr expr = expression();
    consume(SEMICOLON, "Expect ';' after expression.");
    return new Stmt.Expression(expr);
  }
  private Stmt.Function function(String kind) {
    Token name = consume(IDENTIFIER, "Expect " + kind + " name.");
    consume(LEFT_PAREN, "Expect '(' after " + kind + " name.");
    List<Token> parameters = new ArrayList<>();
    if (!check(RIGHT_PAREN)) {
      do {
        if (parameters.size() >= 8) {
          error(peek(), "Can't have more than 8 parameters.");
        }

        parameters.add(consume(IDENTIFIER, "Expect parameter name."));
      } while (match(COMMA));
    }
    consume(RIGHT_PAREN, "Expect ')' after parameters.");

    consume(LEFT_BRACE, "Expect '{' before " + kind + " body.");
    List<Stmt> body = block();
    return new Stmt.Function(name, parameters, body);
  }
  private List<Stmt> block() {
    List<Stmt> statements = new ArrayList<>();

    while (!check(RIGHT_BRACE) && !isAtEnd()) {
      statements.add(declaration());
    }

    consume(RIGHT_BRACE, "Expect '}' after block.");
    return statements;
  }
  private Expr assignment() {
    Expr expr = or();

    if (match(EQUAL)) {
      Token equals = previous();
      Expr value = assignment();

      if (expr instanceof Expr.Variable) {
        Token name = ((Expr.Variable)expr).name;
        return new Expr.Assign(name, value);
      }

      error(equals, "Invalid assignment target.");
    }

    return expr;
  }
  private Expr or() {
    Expr expr = and();

    while (match(OR)) {
      Token operator = previous();
      Expr right = and();
      expr = new Expr.Logical(expr, operator, right);
    }

    return expr;
  }
  private Expr and() {
    Expr expr = equality();

    while (match(AND)) {
      Token operator = previous();
      Expr right = equality();
      expr = new Expr.Logical(expr, operator, right);
    }

    return expr;
  }
  private Expr equality() {
    Expr expr = comparison();

    while (match(BANG_EQUAL, EQUAL_EQUAL)) {
      Token operator = previous();
      Expr right = comparison();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }
  private Expr comparison() {
    Expr expr = addition();

    while (match(GREATER, GREATER_EQUAL, LESS, LESS_EQUAL)) {
      Token operator = previous();
      Expr right = addition();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }
  private Expr addition() {
    Expr expr = multiplication();

    while (match(MINUS, PLUS)) {
      Token operator = previous();
      Expr right = multiplication();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }

  private Expr multiplication() {
    Expr expr = unary();

    while (match(SLASH, STAR)) {
      Token operator = previous();
      Expr right = unary();
      expr = new Expr.Binary(expr, operator, right);
    }

    return expr;
  }
  private Expr unary() {
    if (match(BANG, MINUS)) {
      Token operator = previous();
      Expr right = unary();
      return new Expr.Unary(operator, right);
    }

    return call();
  }
  private Expr finishCall(Expr callee) {
    List<Expr> arguments = new ArrayList<>();
    if (!check(RIGHT_PAREN)) {
      do {
        if (arguments.size() >= 8) {
          error(peek(), "Can't have more than 8 arguments.");
        }
        arguments.add(expression());
      } while (match(COMMA));
    }

    Token paren = consume(RIGHT_PAREN, "Expect ')' after arguments.");

    return new Expr.Call(callee, paren, arguments);
  }
  private Expr call() {
    Expr expr = primary();

    while (true) {
      if (match(LEFT_PAREN)) {
        expr = finishCall(expr);
      } else {
        break;
      }
    }

    return expr;
  }

  private Expr primary() {
    if (match(FALSE)) return new Expr.Literal(false);
    if (match(TRUE)) return new Expr.Literal(true);
    if (match(NIL)) return new Expr.Literal(null);

    if (match(NUMBER, STRING)) {
      return new Expr.Literal(previous().literal);
    }

    if (match(IDENTIFIER)) {
      return new Expr.Variable(previous());
    }

    if (match(LEFT_PAREN)) {
      Expr expr = expression();
      consume(RIGHT_PAREN, "Expect ')' after expression.");
      return new Expr.Grouping(expr);
    }

    throw error(peek(), "Expect expression.");
  }
  private boolean match(TokenType... types) {
    for (TokenType type : types) {
      if (check(type)) {
        advance();
        return true;
      }
    }

    return false;
  }
  private Token consume(TokenType type, String message) {
    if (check(type)) return advance();

    throw error(peek(), message);
  }
  private boolean check(TokenType tokenType) {
    if (isAtEnd()) return false;
    return peek().type == tokenType;
  }
  private Token advance() {
    if (!isAtEnd()) current++;
    return previous();
  }
  private boolean isAtEnd() {
    return peek().type == EOF;
  }

  private Token peek() {
    return tokens.get(current);
  }

  private Token previous() {
    return tokens.get(current - 1);
  }
  private ParseError error(Token token, String message) {
    Lox.error(token, message);
    return new ParseError();
  }
  private void synchronize() {
    advance();

    while (!isAtEnd()) {
      if (previous().type == SEMICOLON) return;

      switch (peek().type) {
        case CLASS:
        case FUN:
        case VAR:
        case FOR:
        case IF:
        case WHILE:
        case PRINT:
        case RETURN:
          return;
      }

      advance();
    }
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Resolver.java
================================================
package com.craftinginterpreters.lox;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

class Resolver implements Expr.Visitor<Void>, Stmt.Visitor<Void> {
  private final Interpreter interpreter;
  private final Stack<Map<String, Variable>> scopes = new Stack<>();
private FunctionType currentFunction = FunctionType.NONE;

  Resolver(Interpreter interpreter) {
    this.interpreter = interpreter;
  }
  private class Variable {
    boolean isDefined = false;
    final int slot;

    private Variable(int slot) {
      this.slot = slot;
    }
  }
  private enum FunctionType {
    NONE,
    FUNCTION
  }
  void resolve(List<Stmt> statements) {
    for (Stmt statement : statements) {
      resolve(statement);
    }
  }
  @Override
  public Void visitBlockStmt(Stmt.Block stmt) {
    beginScope();
    resolve(stmt.statements);
    endScope();
    return null;
  }
  @Override
  public Void visitExpressionStmt(Stmt.Expression stmt) {
    resolve(stmt.expression);
    return null;
  }
  @Override
  public Void visitFunctionStmt(Stmt.Function stmt) {
    declare(stmt.name);
    define(stmt.name);

    resolveFunction(stmt, FunctionType.FUNCTION);
    return null;
  }
  @Override
  public Void visitIfStmt(Stmt.If stmt) {
    resolve(stmt.condition);
    resolve(stmt.thenBranch);
    if (stmt.elseBranch != null) resolve(stmt.elseBranch);
    return null;
  }
  @Override
  public Void visitPrintStmt(Stmt.Print stmt) {
    resolve(stmt.expression);
    return null;
  }
  @Override
  public Void visitReturnStmt(Stmt.Return stmt) {
    if (currentFunction == FunctionType.NONE) {
      Lox.error(stmt.keyword, "Can't return from top-level code.");
    }

    if (stmt.value != null) {
      resolve(stmt.value);
    }

    return null;
  }
  @Override
  public Void visitVarStmt(Stmt.Var stmt) {
    declare(stmt.name);
    if (stmt.initializer != null) {
      resolve(stmt.initializer);
    }
    define(stmt.name);
    return null;
  }
  @Override
  public Void visitWhileStmt(Stmt.While stmt) {
    resolve(stmt.condition);
    resolve(stmt.body);
    return null;
  }
  @Override
  public Void visitAssignExpr(Expr.Assign expr) {
    resolve(expr.value);
    resolveLocal(expr, expr.name);
    return null;
  }
  @Override
  public Void visitBinaryExpr(Expr.Binary expr) {
    resolve(expr.left);
    resolve(expr.right);
    return null;
  }
  @Override
  public Void visitCallExpr(Expr.Call expr) {
    resolve(expr.callee);

    for (Expr argument : expr.arguments) {
      resolve(argument);
    }

    return null;
  }
  @Override
  public Void visitGroupingExpr(Expr.Grouping expr) {
    resolve(expr.expression);
    return null;
  }
  @Override
  public Void visitLiteralExpr(Expr.Literal expr) {
    return null;
  }
  @Override
  public Void visitLogicalExpr(Expr.Logical expr) {
    resolve(expr.left);
    resolve(expr.right);
    return null;
  }
  @Override
  public Void visitUnaryExpr(Expr.Unary expr) {
    resolve(expr.right);
    return null;
  }
  @Override
  public Void visitVariableExpr(Expr.Variable expr) {
    if (!scopes.isEmpty() &&
        scopes.peek().containsKey(expr.name.lexeme) &&
        !scopes.peek().get(expr.name.lexeme).isDefined) {
      Lox.error(expr.name,
          "Can't read local variable in its own initializer.");
    }

    resolveLocal(expr, expr.name);
    return null;
  }
  private void resolve(Stmt stmt) {
    stmt.accept(this);
  }
  private void resolve(Expr expr) {
    expr.accept(this);
  }
  private void resolveFunction(Stmt.Function function, FunctionType type) {
    FunctionType enclosingFunction = currentFunction;
    currentFunction = type;

    beginScope();
    for (Token param : function.parameters) {
      declare(param);
      define(param);
    }
    resolve(function.body);
    endScope();
    currentFunction = enclosingFunction;
  }
  private void beginScope() {
    scopes.push(new HashMap<String, Variable>());
  }
  private void endScope() {
    scopes.pop();
  }
  private void declare(Token name) {
    if (scopes.isEmpty()) return;

    Map<String, Variable> scope = scopes.peek();
    if (scope.containsKey(name.lexeme)) {
      Lox.error(name,
          "Already variable with this name in this scope.");
    }

    scope.put(name.lexeme, new Variable(scope.size()));
  }
  private void define(Token name) {
    if (scopes.isEmpty()) return;
    scopes.peek().get(name.lexeme).isDefined = true;
  }
  private void resolveLocal(Expr expr, Token name) {
    for (int i = scopes.size() - 1; i >= 0; i--) {
      Map<String, Variable> scope = scopes.get(i);
      if (scope.containsKey(name.lexeme)) {
        interpreter.resolve(expr, scopes.size() - 1 - i,
            scope.get(name.lexeme).slot);
        return;
      }
    }

    // Not found. Assume it is global.
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Return.java
================================================
package com.craftinginterpreters.lox;

class Return extends RuntimeException {
  final Object value;

  Return(Object value) {
    super(null, null, false, false);
    this.value = value;
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/RuntimeError.java
================================================
package com.craftinginterpreters.lox;

class RuntimeError extends RuntimeException {
  final Token token;

  RuntimeError(Token token, String message) {
    super(message);
    this.token = token;
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Scanner.java
================================================
package com.craftinginterpreters.lox;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.craftinginterpreters.lox.TokenType.*; // [static-import]

class Scanner {
  private static final Map<String, TokenType> keywords;

  static {
    keywords = new HashMap<>();
    keywords.put("and",    AND);
    keywords.put("class",  CLASS);
    keywords.put("else",   ELSE);
    keywords.put("false",  FALSE);
    keywords.put("for",    FOR);
    keywords.put("fun",    FUN);
    keywords.put("if",     IF);
    keywords.put("nil",    NIL);
    keywords.put("or",     OR);
    keywords.put("print",  PRINT);
    keywords.put("return", RETURN);
    keywords.put("super",  SUPER);
    keywords.put("this",   THIS);
    keywords.put("true",   TRUE);
    keywords.put("var",    VAR);
    keywords.put("while",  WHILE);
  }
  private final String source;
  private final List<Token> tokens = new ArrayList<>();
  private int start = 0;
  private int current = 0;
  private int line = 1;

  Scanner(String source) {
    this.source = source;
  }
  List<Token> scanTokens() {
    while (!isAtEnd()) {
      // We are at the beginning of the next lexeme.
      start = current;
      scanToken();
    }

    tokens.add(new Token(EOF, "", null, line));
    return tokens;
  }
  private void scanToken() {
    char c = advance();
    switch (c) {
      case '(': addToken(LEFT_PAREN); break;
      case ')': addToken(RIGHT_PAREN); break;
      case '{': addToken(LEFT_BRACE); break;
      case '}': addToken(RIGHT_BRACE); break;
      case ',': addToken(COMMA); break;
      case '.': addToken(DOT); break;
      case '-': addToken(MINUS); break;
      case '+': addToken(PLUS); break;
      case ';': addToken(SEMICOLON); break;
      case '*': addToken(STAR); break;
      case '!': addToken(match('=') ? BANG_EQUAL : BANG); break;
      case '=': addToken(match('=') ? EQUAL_EQUAL : EQUAL); break;
      case '<': addToken(match('=') ? LESS_EQUAL : LESS); break;
      case '>': addToken(match('=') ? GREATER_EQUAL : GREATER); break;
      case '/':
        if (match('/')) {
          // A comment goes until the end of the line.
          while (peek() != '\n' && !isAtEnd()) advance();
        } else {
          addToken(SLASH);
        }
        break;

      case ' ':
      case '\r':
      case '\t':
        // Ignore whitespace.
        break;

      case '\n':
        line++;
        break;

      case '"': string(); break;

      default:
        if (isDigit(c)) {
          number();
        } else if (isAlpha(c)) {
          identifier();
        } else {
          Lox.error(line, "Unexpected character.");
        }
        break;
    }
  }
  private void identifier() {
    while (isAlphaNumeric(peek())) advance();

    // See if the identifier is a reserved word.
    String text = source.substring(start, current);

    TokenType type = keywords.get(text);
    if (type == null) type = IDENTIFIER;
    addToken(type);
  }
  private void number() {
    while (isDigit(peek())) advance();

    // Look for a fractional part.
    if (peek() == '.' && isDigit(peekNext())) {
      // Consume the "."
      advance();

      while (isDigit(peek())) advance();
    }

    addToken(NUMBER,
        Double.parseDouble(source.substring(start, current)));
  }
  private void string() {
    while (peek() != '"' && !isAtEnd()) {
      if (peek() == '\n') line++;
      advance();
    }

    // Unterminated string.
    if (isAtEnd()) {
      Lox.error(line, "Unterminated string.");
      return;
    }

    // The closing ".
    advance();

    // Trim the surrounding quotes.
    String value = source.substring(start + 1, current - 1);
    addToken(STRING, value);
  }
  private boolean match(char expected) {
    if (isAtEnd()) return false;
    if (source.charAt(current) != expected) return false;

    current++;
    return true;
  }
  private char peek() {
    if (isAtEnd()) return '\0';
    return source.charAt(current);
  }
  private char peekNext() {
    if (current + 1 >= source.length()) return '\0';
    return source.charAt(current + 1);
  } // [peek-next]
  private boolean isAlpha(char c) {
    return (c >= 'a' && c <= 'z') ||
           (c >= 'A' && c <= 'Z') ||
            c == '_';
  }

  private boolean isAlphaNumeric(char c) {
    return isAlpha(c) || isDigit(c);
  }
  private boolean isDigit(char c) {
    return c >= '0' && c <= '9';
  } // [is-digit]
  private boolean isAtEnd() {
    return current >= source.length();
  }
  private char advance() {
    current++;
    return source.charAt(current - 1);
  }

  private void addToken(TokenType type) {
    addToken(type, null);
  }

  private void addToken(TokenType type, Object literal) {
    String text = source.substring(start, current);
    tokens.add(new Token(type, text, literal, line));
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Stmt.java
================================================
package com.craftinginterpreters.lox;

import java.util.List;

abstract class Stmt {
  interface Visitor<R> {
    R visitBlockStmt(Block stmt);
    R visitExpressionStmt(Expression stmt);
    R visitFunctionStmt(Function stmt);
    R visitIfStmt(If stmt);
    R visitPrintStmt(Print stmt);
    R visitReturnStmt(Return stmt);
    R visitVarStmt(Var stmt);
    R visitWhileStmt(While stmt);
  }

  static class Block extends Stmt {
    Block(List<Stmt> statements) {
      this.statements = statements;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitBlockStmt(this);
    }

    final List<Stmt> statements;
  }

  static class Expression extends Stmt {
    Expression(Expr expression) {
      this.expression = expression;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitExpressionStmt(this);
    }

    final Expr expression;
  }

  static class Function extends Stmt {
    Function(Token name, List<Token> parameters, List<Stmt> body) {
      this.name = name;
      this.parameters = parameters;
      this.body = body;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitFunctionStmt(this);
    }

    final Token name;
    final List<Token> parameters;
    final List<Stmt> body;
  }

  static class If extends Stmt {
    If(Expr condition, Stmt thenBranch, Stmt elseBranch) {
      this.condition = condition;
      this.thenBranch = thenBranch;
      this.elseBranch = elseBranch;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitIfStmt(this);
    }

    final Expr condition;
    final Stmt thenBranch;
    final Stmt elseBranch;
  }

  static class Print extends Stmt {
    Print(Expr expression) {
      this.expression = expression;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitPrintStmt(this);
    }

    final Expr expression;
  }

  static class Return extends Stmt {
    Return(Token keyword, Expr value) {
      this.keyword = keyword;
      this.value = value;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitReturnStmt(this);
    }

    final Token keyword;
    final Expr value;
  }

  static class Var extends Stmt {
    Var(Token name, Expr initializer) {
      this.name = name;
      this.initializer = initializer;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitVarStmt(this);
    }

    final Token name;
    final Expr initializer;
  }

  static class While extends Stmt {
    While(Expr condition, Stmt body) {
      this.condition = condition;
      this.body = body;
    }

    <R> R accept(Visitor<R> visitor) {
      return visitor.visitWhileStmt(this);
    }

    final Expr condition;
    final Stmt body;
  }

  abstract <R> R accept(Visitor<R> visitor);
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/Token.java
================================================
package com.craftinginterpreters.lox;

class Token {
  final TokenType type;
  final String lexeme;
  final Object literal;
  final int line; // [location]

  Token(TokenType type, String lexeme, Object literal, int line) {
    this.type = type;
    this.lexeme = lexeme;
    this.literal = literal;
    this.line = line;
  }

  public String toString() {
    return type + " " + lexeme + " " + literal;
  }
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/lox/TokenType.java
================================================
package com.craftinginterpreters.lox;

enum TokenType {
  // Single-character tokens.
  LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
  COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,

  // One or two character tokens.
  BANG, BANG_EQUAL,
  EQUAL, EQUAL_EQUAL,
  GREATER, GREATER_EQUAL,
  LESS, LESS_EQUAL,

  // Literals.
  IDENTIFIER, STRING, NUMBER,

  // Keywords.
  AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
  PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,

  EOF
}


================================================
FILE: note/answers/chapter11_resolving/4/com/craftinginterpreters/tool/GenerateAst.java
================================================
package com.craftinginterpreters.tool;

import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;

public class GenerateAst {
  public static void main(String[] args) throws IOException {
    if (args.length != 1) {
      System.err.println("Usage: generate_ast <output directory>");
      System.exit(1);
    }
    String outputDir = args[0];
    defineAst(outputDir, "Expr", Arrays.asList(
      "Assign   : Token name, Expr value",
      "Binary   : Expr left, Token operator, Expr right",
      "Call     : Expr callee, Token paren, List<Expr> arguments",
      "Grouping : Expr expression",
      "Literal  : Object value",
      "Logical  : Expr left, Token operator, Expr right",
      "Unary    : Token operator, Expr right",
      "Variable : Token name"
    ));

    defineAst(outputDir, "Stmt", Arrays.asList(
      "Block      : List<Stmt> statements",
      "Expression : Expr expression",
      "Function   : Token name, List<Token> parameters, List<Stmt> body",
      "If         : Expr condition, Stmt thenBranch, Stmt elseBranch",
      "Print      : Expr expression",
      "Return     : Token keyword, Expr value",
      "Var        : Token name, Expr initializer",
      "While      : Expr condition, Stmt body"
    ));
  }
  private static void defineAst(
      String outputDir, String baseName, List<String> types)
      throws IOException {
    String path = outputDir + "/" + baseName + ".java";
    PrintWriter writer = new PrintWriter(path, "UTF-8");

    writer.println("package com.craftinginterpreters.lox;");
    writer.println("");
    writer.println("import java.util.List;");
    writer.println("");
    writer.println("abstract class " + baseName + " {");

    defineVisitor(writer, baseName, types);

    // The AST classes.
    for (String type : types) {
      String className = type.split(":")[0].trim();
      String fields = type.split(":")[1].trim(); // [robust]
      defineType(writer, baseName, className, fields);
    }

    // The base accept() method.
    writer.println("");
    writer.println("  abstract <R> R accept(Visitor<R> visitor);");

    writer.println("}");
    writer.close();
  }
  private static void defineVisitor(
      PrintWriter writer, String baseName, List<String> types) {
    writer.println("  interface Visitor<R> {");

    for (String type : types) {
      String typeName = type.split(":")[0].trim();
      writer.println("    R visit" + typeName + baseName + "(" +
          typeName + " " + baseName.toLowerCase() + ");");
    }

    writer.println("  }");
  }
  private static void defineType(
      PrintWriter writer, String baseName,
      String className, String fieldList) {
    writer.println("");
    writer.println("  static class " + className + " extends " +
        baseName + " {");

    // Constructor.
    writer.println("    " + className + "(" + fieldList + ") {");

    // Store parameters in fields.
    String[] fields = fieldList.split(", ");
    for (String field : fields) {
      String name = field.split(" ")[1];
      writer.println("      this." + name + " = " + name + ";");
    }

    writer.println("    }");

    // Visitor pattern.
    writer.println();
    writer.println("    <R> R accept(Visitor<R> visitor) {");
    writer.println("      return visitor.visit" +
        className + baseName + "(this);");
    writer.println("    }");

    // Fields.
    writer.println();
    for (String field : fields) {
      writer.println("    final " + field + ";");
    }

    writer.println("  }");
  }
  interface PastryVisitor {
    void visitBeignet(Beignet beignet); // [overload]
    void visitCruller(Cruller cruller);
  }
  abstract class Pastry {
    abstract void accept(PastryVisitor visitor);
  }

  class Beignet extends Pastry {
    @Override
    void accept(PastryVisitor visitor) {
      visitor.visitBeignet(this);
    }
  }

  class Cruller extends Pastry {
    @Override
    void accept(PastryVisitor visitor) {
      visitor.visitCruller(this);
    }
  }
}


================================================
FILE: note/answers/chapter11_resolving/chapter11_resolving.md
================================================
1.  Consider:

    ```lox
    fun foo() {
      if (itsTuesday) foo();
    }
    ```

    The function does call itself inside it's definition. But it relies on some
    initial outer call to kick off the recursion. Some outside code must refer
    to "foo" by name first. That can't happen until the function declaration
    statement itself has finished executing. By then, "foo" is fully defined
    and is safe to use.

2.  In C, the variable is put in scope before its initializer, which means that
    the initializer refers to the variable being initialized. Since C does not
    require any clearing of uninitialized memory, it means you could potentially
    access garbage data.

    Java does not allow one local variable to shadow another so it's an error
    because of that if the outer variable is also local. The outer variable
    could be a field on the surrounding class. In that case, like C, the local
    variable is in scope in its own initializer. However, Java makes it an error
    to refer to a variable that may not have been initialized, so this falls
    under that case and is an error.

    Obviously, C's approach is crazy talk. Java is fine and takes advantage of
    definite assignment analysis, which is useful for other things (like
    ensuring final fields are initialized before the constructor body
    completes). I like when languages get a lot of mileage out of a single
    concept.

3.  The basic idea is that instead of storing just a boolean state for each
    local variable as we resolve the code, we'll allow a variable to be in one
    of three states:

    1. It has been declared but not yet defined.
    2. It has been defined but not yet read.
    3. It has been read.

    Any variable that goes out of scope when in the defined-but-not-yet-read
    state is an error. The annoying part is that we can't detect the error until
    the variable goes out of scope, but we want to report it on the line that
    the variable was declared. So we also need to keep track of the token from
    the variable declaration. We'll bundle that and the three-state enum into
    a little class inside the Resolver class:

    ```java
      private static class Variable {
        final Token name;
        VariableState state;

        private Variable(Token name, VariableState state) {
          this.name = name;
          this.state = state;
        }
      }

      private enum VariableState {
        DECLARED,
        DEFINED,
        READ
      }
    ```

    Then we change the scope stack to use that instead of Boolean:

    ```java
      private final Stack<Map<String, Variable>> scopes = new Stack<>();
    ```

    When we resolve a local variable, we mark it used. However, we don't want
    to consider assigning to a local variable to be a "use". Writing to a
    variable that's never read is still pointless. So we change resolveLocal()
    to:

    ```java
      private void resolveLocal(Expr expr, Token name, boolean isRead) {
        for (int i = scopes.size() - 1; i >= 0; i--) {
          if (scopes.get(i).containsKey(name.lexeme)) {
            interpreter.resolve(expr, scopes.size() - 1 - i);

            // Mark it used.
            if (isRead) {
              scopes.get(i).get(name.lexeme).state = VariableState.READ;
            }
            return;
          }
        }

        // Not found. Assume it is global.
      }
    ```

    Every call to resolveLocal() needs to pass in that flag. In
    visitVariableExpr(), it's true:

    ```java
        resolveLocal(expr, expr.name, true);
    ```

    In visitAssignExpr(), it's false:

    ```java
        resolveLocal(expr, expr.name, false);
    ```

    Next, we update the existing code that touches scopes to use the new
    Variable class:

    ```java
      public Void visitVariableExpr(Expr.Variable expr) {
        if (!scopes.isEmpty() &&
            scopes.peek().containsKey(expr.name.lexeme) &&
            scopes.peek().get(expr.name.lexeme).state == VariableState.DECLARED) {
          Lox.error(expr.name,
              "Can't read local variable in its own initializer.");
        }

        resolveLocal(expr, expr.name, true);
        return null;
      }

      private void beginScope() {
        scopes.push(new HashMap<String, Variable>());
      }

      private void declare(Token name) {
        if (scopes.isEmpty()) return;

        Map<String, Variable> scope = scopes.peek();
        if (scope.containsKey(name.lexeme)) {
          Lox.error(name,
              "Already variable with this name in this scope.");
        }

        scope.put(name.lexeme, new Variable(name, VariableState.DECLARED));
      }

      private void define(Token name) {
        if (scopes.isEmpty()) return;
        scopes.peek().get(name.lexeme).state = VariableState.DEFINED;
      }
    ```

    Finally, when a scope is popped, we check its variables to see if any were
    not read:

    ```java
      private void endScope() {
        Map<String, Variable> scope = scopes.pop();

        for (Map.Entry<String, Variable> entry : scope.entrySet()) {
          if (entry.getValue().state == VariableState.DEFINED) {
            Lox.error(entry.getValue().name, "Local variable is not used.");
          }
        }
      }
    ```

4. This challenge is a real challenge and involves even more code changes.
   I went ahead and made a copy of the interpreter with the relevant changes
   in the "4" directory here.


================================================
FILE: note/answers/chapter12_classes.md
================================================
1.  Metaclasses are so cool, I almost wish the book itself discussed them
    properly, but there are only so many pages. The idea is that a class object
    is itself an instance, which means it must have its own class -- a
    metaclass. That metaclass defines the methods that are available on the
    class object -- what you'd think of as the "static" methods in a language
    like Java.

    Before we get to metaclasses, we need to push the new syntax through. In
    AstGenerator, add a new field to Stmt.Class:

    ```java
    "Class      : Token name, List<Stmt.Function> methods, List<Stmt.Function> classMethods",
    ```

    When parsing a class, we separate out the class methods (prefixed with
    "class") into a separate list:

    ```java
    private Stmt classDeclaration() {
      Token name = consume(IDENTIFIER, "Expect class name.");

      List<Stmt.Function> methods = new ArrayList<>();
      List<Stmt.Function> classMethods = new ArrayList<>();
      consume(LEFT_BRACE, "Expect '{' before class body.");

      while (!check(RIGHT_BRACE) && !isAtEnd()) {
        boolean isClassMethod = match(CLASS);
        (isClassMethod ? classMethods : methods).add(function("method"));
      }

      consume(RIGHT_BRACE, "Expect '}' after class body.");

      return new Stmt.Class(name, methods, classMethods);
    }
    ```

    In the resolver, we need to make sure to resolve the class methods too:

    ```java
    for (Stmt.Function method : stmt.classMethods) {
      beginScope();
      scopes.peek().put("this", true);
      resolveFunction(method, FunctionType.METHOD);
      endScope();
    }
    ```

    They are resolved mostly like methods. They even have a "this" variable,
    which will be the class itself.

    Now we're ready for metaclasses. Change the declaration of LoxClass to:

    ```java
    class LoxClass extends LoxInstance implements LoxCallable {
      final String name;
      private final Map<String, LoxFunction> methods;

      LoxClass(LoxClass metaclass, String name,
            Map<String, LoxFunction> methods) {
        super(metaclass);
        this.name = name;
        this.methods = methods;
      }

      // ...
    }
    ```

    LoxClass now extends LoxInstance. Every class object is also itself an
    instance of a class, its metaclass. When we interpret a class declaration,
    we create two LoxClasses:

    ```java
    @Override
    public Void visitClassStmt(Stmt.Class stmt) {
      environment.define(stmt.name.lexeme, null);
      Map<String, LoxFunction> classMethods = new HashMap<>();
      for (Stmt.Function method : stmt.classMethods) {
        LoxFunction function = new LoxFunction(method, environment, false);
        classMethods.put(method.name.lexeme, function);
      }

      LoxClass metaclass = new LoxClass(null,
          stmt.name.lexeme + " metaclass", classMethods);

      Map<String, LoxFunction> methods = new HashMap<>();
      for (Stmt.Function method : stmt.methods) {
        LoxFunction function = new LoxFunction(method, environment,
            method.name.lexeme.equals("init"));
        methods.put(method.name.lexeme, function);
      }

      LoxClass klass = new LoxClass(metaclass, stmt.name.lexeme, methods);
      environment.assign(stmt.name, klass);
      return null;
    }
    ```

    First, we create a metaclass containing all of the class methods. It has
    null for its metametaclass to stop the infinite regress. Then we create the
    main class like we did previously. The only difference is that we pass in
    the metaclass as its class.

    That's it. There are no other interpreter changes. Now that LoxClass is an
    instance of LoxInstance, the existing code for property gets now applies to
    class objects. On the last line of:

    ```lox
    class Math {
      class square(n) {
        return n * n;
      }
    }

    print Math.square(3); // Prints "9".
    ```

    The `.square` expression looks at the object on the left. It's a
    LoxInstance. We call `.get()` on that. That fails to find a field named
    "square" so it looks for a method on the object's class with that name. The
    object's class is the metaclass, and the method is found there. You can
    even put fields on classes now:

    ```lox
    Math.pi = 3.141592653;
    print Math.pi;
    ```

2.  The first implementation detail we have to figure out is how our AST
    distinguishes a getter declaration from the declaration of a method that
    takes no parameters. This is kind of cute, but we'll use a *null*
    parameter list to indicate the former and an *empty* for the latter. So,
    when parsing a method (and only a method, there are no getter *functions*),
    we allow the parameter list to be omitted:

    ```java
    private Stmt.Function function(String kind) {
      Token name = consume(IDENTIFIER, "Expect " + kind + " name.");

      List<Token> parameters = null;

      // Allow omitting the parameter list entirely in method getters.
      if (!kind.equals("method") || check(LEFT_PAREN)) {
        consume(LEFT_PAREN, "Expect '(' after " + kind + " name.");
        parameters = new ArrayList<>();
        if (!check(RIGHT_PAREN)) {
          do {
            if (parameters.size() >= 255) {
              error(peek(), "Can't have more than 255 parameters.");
            }

            parameters.add(consume(IDENTIFIER, "Expect parameter name."));
          } while (match(COMMA));
        }
        consume(RIGHT_PAREN, "Expect ')' after parameters.");
      }

      consume(LEFT_BRACE, "Expect '{' before " + kind + " body.");
      List<Stmt> body = block();
      return new Stmt.Function(name, parameters, body);
    }
    ```

    Now we need to make sure the rest of the interpreter doesn't choke on a
    null parameter list. We check for it when resolving:

    ```java
    private void resolveFunction(Stmt.Function function, FunctionType type) {
      FunctionType enclosingFunction = currentFunction;
      currentFunction = type;

      beginScope();
      if (function.params != null) {
        for (Token param : function.params) {
          declare(param);
          define(param);
        }
      }
      resolve(function.body);
      endScope();
      currentFunction = enclosingFunction;
    }
    ```

    And when calling a LoxFunction:

    ```java
    @Override
    public Object call(Interpreter interpreter, List<Object> arguments) {
      Environment environment = new Environment(closure);
      if (declaration.params != null) {
        for (int i = 0; i < declaration.params.size(); i++) {
          environment.define(declaration.params.get(i).lexeme,
              arguments.get(i));
        }
      }

      // ...
    }
    ```

    Now all that's left is to interpret getters. The only difference compared to
    methods is that the getter body is executed eagerly as soon as the property
    is accessed instead of waiting for a later call expression to invoke it.

    This isn't maybe the most elegant implementation, but it gets it done:

    ```java
    @Override
    public Object visitGetExpr(Expr.Get expr) {
      Object object = evaluate(expr.object);
      if (object instanceof LoxInstance) {
        Object result = ((LoxInstance) object).get(expr.name);
        if (result instanceof LoxFunction &&
            ((LoxFunction) result).isGetter()) {
          result = ((LoxFunction) result).call(this, null);
        }

        return result;
      }

      throw new RuntimeError(expr.name,
          "Only instances have properties.");
    }
    ```

    After looking up the property, we see if the resulting object is a getter.
    If so, we invoke it right now and use the result of that. This relies on
    one little helper in LoxFunction:

    ```java
    public boolean isGetter() {
      return declaration.params == null;
    }
    ```

    And that's it.

3.  Python and JavaScript allow you to freely access the fields on an object
    from outside of the methods on that object. Ruby and Smalltalk encapsulate
    instance state. Only methods on the class can access the raw fields, and it
    is up to the class to decide which state is exposed using getters and
    setters. Most statically typed languages offer access control modifiers
    like `private` and `public` to explicitly control on a per-member basis
    which parts of a class are externally accesible.

    What are the trade-offs between these approaches and why might a language
    might prefer one or the other?

    The decision to encapsulate at all or not is the classic
    trade-off between whether you want to make things easier for the class
    *consumer* or the class *maintainer*. By making everything public and
    freely externally visible and modifier, a downstream user of a class has
    more freedom to pop the hood open and muck around in the class's internals.

    However, that access tends to increasing coupling between the class and its
    users. That increased coupling makes the class itself more brittle, similar
    to the "fragile base class problem". If users are directly accessing
    properties that the class author considered implementation details, they
    lose the freedom to tweak that implementation without breaking those users.
    The class can end up harder to change. That's more painful for the
    maintainer, but also has a knock-on effect to the consumer -- if the class
    evolves more slowly, they get fewer newer features for free from the
    upstream maintainer.

    On the other hand, free external access to class state is a simpler, easier
    user experience when the class maintainer and consumer are the same person.
    If you're banging out a small script, it's handy to be able to just push
    stuff around without having to go through a lot of ceremony and boilerplate.
    At small scales, most language features that build fences in the program are
    more annoying than they are useful.

    As the program scales up, though, those fences become increasingly important
    since no one person is able to hold the entire program in their head.
    Boundaries in the code let you make productive changes while only knowing a
    single region of the program.

    Assuming you do want some sort of access control over properties, the next
    question is how fine-grained. Java has four different access control levels.
    That's four concepts the user needs to understand. Every time you add a
    member to a class, you need to pick one of the four, and need to have the
    expertise and forethought to choose wisely. This adds to the cognitive load
    of the language and adds some mental friction when programming.

    However, at large scales, each of those access control levels (except maybe
    package private) has proven to be useful. Having a few options gives class
    maintainers precise control over what extension points the class user has
    access to. While the class author has to do the mental work to pick a
    modifier, the class *consumer* gets to benefit from that. The modifier
    chosen for each member clearly communicates to the class user how the class
    is intended to be used. If you're subclassing a class and looking at a sea
    of methods, trying to figure out which one to override, the fact that one
    is protected while the others are all private or public makes your choice
    much easier -- it's a clear sign that that method is for the subclass's
    use.


================================================
FILE: note/answers/chapter13_inheritance/1.md
================================================
I'm gonna pick traits, for no particular reason. "Traits" means slightly
different things in the various languages that implement them. For my purposes,
I'll say:

*   A trait is a set of reusable methods.

*   A class can include as many traits as it wants. When it does, all of the
    methods from the traits are copied into the class.

*   A trait is *not* a class. That means you can't construct one. A trait
    doesn't define a kind of object or any sort of identity.

*   Traits can be composed. A trait can include the methods of other traits.

*   Any method collision is an error. The fact that collisions are not silently
    treated like overrides or shadows is one of the defining characteristics of
    traits, compared to mixins or multiple inheritance. Sophisticated languages
    give you ways of renaming or hiding in order to fix method collisions. I'll
    just make it an error.

The syntax for defining a trait looks like a class but with a different keyword:

```lox
trait SomeStuff {
  method() {
    print "method";
  }

  another() {
    print "another";
  }
}
```

To include the methods from one trait into a class or another trait, add a
"with" clause followed by the list of traits after the declaration:

```lox
class UsesTrait < Superclass with ATrait, AnotherTrait { ... }

trait ComposesTraits with SomeTrait, AnotherTrait { ... }
```

We'll do the implementation front to back. First, a couple of new reserved words
in TokenType:

```java
  TRAIT, WITH,
```

In the scanner, we add the keywords for them:

```java
keywords.put("trait",  TRAIT);
keywords.put("with",   WITH);
```

In the AST generator, we add a new statement node for a trait declaration:

```java
"Trait      : Token name, List<Expr> traits," +
            " List<Stmt.Function> methods",
```

And we also need to extend the class declaration AST to store the list of traits
it applies:

```java
"Class      : Token name, Expr superclass," +
            " List<Expr> traits," +
            " List<Stmt.Function> methods",
```

Now to parse. A trait declaration looks much like a class declaration. We start
by recognizing its leading keyword in `declaration()`:

```java
if (match(TRAIT)) return traitDeclaration();
```

That calls:

```java
private Stmt traitDeclaration() {
  Token name = consume(IDENTIFIER, "Expect trait name.");

  List<Expr> traits = withClause();

  consume(LEFT_BRACE, "Expect '{' before trait body.");

  List<Stmt.Function> methods = new ArrayList<>();
  while (!check(RIGHT_BRACE) && !isAtEnd()) {
    methods.add(function("method"));
  }

  consume(RIGHT_BRACE, "Expect '}' after trait body.");

  return new Stmt.Trait(name, traits, methods);
}
```

I could probably refactor and reuse some code from `classDeclaration()`, but I'm
not gonna worry about that. We also need this helper for parsing the "with"
clause:

```java
private List<Expr> withClause() {
  List<Expr> traits = new ArrayList<>();
  if (match(WITH)) {
    do {
      consume(IDENTIFIER, "Expect trait name.");
      traits.add(new Expr.Variable(previous()));
    } while (match(COMMA));
  }

  return traits;
}
```

A class declaration can also apply traits, so we extend `classDeclaration()` by
parsing a with clause before the class body and then passing that to the AST
constructor:

```java
private Stmt classDeclaration() {
  Token name = consume(IDENTIFIER, "Expect class name.");

  Expr superclass = null;
  if (match(LESS)) {
    consume(IDENTIFIER, "Expect superclass name.");
    superclass = new Expr.Variable(previous());
  }

  List<Expr> traits = withClause(); // <-- Add this.

  consume(LEFT_BRACE, "Expect '{' before class body.");

  List<Stmt.Function> methods = new ArrayList<>();
  while (!check(RIGHT_BRACE) && !isAtEnd()) {
    methods.add(function("method"));
  }

  consume(RIGHT_BRACE, "Expect '}' after class body.");

  // Add this.                          --v
  return new Stmt.Class(name, superclass, traits, methods);
}
```

Next is the resolver. Traits are not like other classes (they can't contain
`super` calls, in particular), so we add another ClassType case for them:

```java
private enum ClassType {
  NONE,
  CLASS,
  SUBCLASS,
  TRAIT // <-- Add this.
}
```

And we need a visit method for trait declarations:

```java
@Override
public Void visitTraitStmt(Stmt.Trait stmt) {
  declare(stmt.name);
  define(stmt.name);
  ClassType enclosingClass = currentClass;
  currentClass = ClassType.TRAIT;

  for (Expr trait : stmt.traits) {
    resolve(trait);
  }

  beginScope();
  scopes.peek().put("this", true);

  for (Stmt.Function method : stmt.methods) {
    FunctionType declaration = FunctionType.METHOD;
    resolveFunction(method, declaration);
  }

  endScope();

  currentClass = enclosingClass;
  return null;
}
```

It's pretty similar to resolving a class. The main difference is we don't treat
initializers specially. (We probably should. This means if you apply a trait
that defines a method named `init()`, it will act like an initializer but won't
have been resolved as one. Forgive me.)

Also, when resolving a class declaration, we resolve its with clause:

```java
// Add right before beginScope() call.
for (Expr trait : stmt.traits) {
  resolve(trait);
}
```

One last resolution bit. We'll disallow super calls in trait methods since we
don't know if there will be a superclass when the trait is applied:

```java
@Override
public Void visitSuperExpr(Expr.Super expr) {
  if (currentClass == ClassType.NONE) {
    Lox.error(expr.keyword,
        "Can't use 'super' outside of a class.");
  } else if (currentClass == ClassType.TRAIT) { // <-- Add this.
    Lox.error(expr.keyword,                     // <-- Add this.
        "Can't use 'super' in a trait.");      // <-- Add this.
  } else if (currentClass != ClassType.SUBCLASS) {
    Lox.error(expr.keyword,
        "Can't use 'super' in a class with no superclass.");
  }

  resolveLocal(expr, expr.keyword);
  return null;
}
```

We're almost ready to interpret. First, we need a runtime representation for a
trait. I thought about reusing LoxClass, but that would let you construct
traits, which we don't want. Instead, let's define a new class:

```java
package com.craftinginterpreters.lox;

import java.util.Map;

class LoxTrait {
  final Token name;
  final Map<String, LoxFunction> methods;

  LoxTrait(Token name, Map<String, LoxFunction> methods) {
    this.name = name;
    this.methods = methods;
  }

  @Override
  public String toString() {
    return name.lexeme;
  }
}
```

Sort of like a stripped down class. To interpret a trait declaration:

```java
@Override
public Void visitTraitStmt(Stmt.Trait stmt) {
  environment.define(stmt.name.lexeme, null);

  Map<String, LoxFunction> methods = applyTraits(stmt.traits);

  for (Stmt.Function method : stmt.methods) {
    if (methods.containsKey(method.name.lexeme)) {
      throw new RuntimeError(method.name,
          "A previous trait declares a method named '" +
              method.name.lexeme + "'.");
    }

    LoxFunction function = new LoxFunction(
        method, environment, false);
    methods.put(method.name.lexeme, function);
  }

  LoxTrait trait = new LoxTrait(stmt.name, methods);

  environment.assign(stmt.name, trait);
  return null;
}
```

Pretty similar to a class. A cleaner implementation would refactor and reuse
some code. Since a trait can apply other traits, first we compose all of the
traits in its with clause together into a single method map. That's done by:

```java
private Map<String, LoxFunction> applyTraits(List<Expr> traits) {
  Map<String, LoxFunction> methods = new HashMap<>();

  for (Expr traitExpr : traits) {
    Object traitObject = evaluate(traitExpr);
    if (!(traitObject instanceof LoxTrait)) {
      Token name = ((Expr.Variable)traitExpr).name;
      throw new RuntimeError(name,
          "'" + name.lexeme + "' is not a trait.");
    }

    LoxTrait trait = (LoxTrait) traitObject;
    for (String name : trait.methods.keySet()) {
      if (methods.containsKey(name)) {
        throw new RuntimeError(trait.name,
            "A previous trait declares a method named '" +
                name + "'.");
      }

      methods.put(name, trait.methods.get(name));
    }
  }

  return methods;
}
```

It walks the list of traits, adding the methods for each one into a big map.
Note that unlike with subclassing and overriding, this explicitly checks for a
collision and makes it a runtime error. Assuming nothing collided, it returns
the new map. The trait declaration then adds its own methods into that, again
checking for collisions.

The end result is a single flattened set of methods, not a *chain* of inherited
ones. This is one of the key differences between traits and other forms of
reuse.

A class declaration can also apply traits, so we replace this line in
`visitClassStmt()`:

```java
Map<String, LoxFunction> methods = new HashMap<>();
```

with:

```java
Map<String, LoxFunction> methods = applyTraits(stmt.traits);
```

This implementation is a little rough, especially around things like super, but
it has the main features we want. Give it a try:

```lox
trait A {
  a() {
    print "a";
  }
}

trait B1 {
  b1() {
    print "b1";
  }
}

trait B2 {
  b2() {
    print "b2";
  }
}

trait B with B1, B2 {
  b() {
    this.b1();
    this.b2();
  }
}

class C with A, B {}

var c = C();
c.a();
c.b();
```


================================================
FILE: note/answers/chapter13_inheritance/2.md
================================================
Ideally, we'd make "inner" a reserved word, but that means changing the scanner
and adding a new AST node for it and stuff. Since this is just a challenge
answer, I'll skip that. That means users could technically shadow "inner", but
that's OK.

The implementation I have here is correct (I think) but not very fast. There are
only a couple of pieces. The most interesting one is the change to
LoxClass.findMethod(). It now looks like:

```java
LoxFunction findMethod(LoxInstance instance, String name) {
  LoxFunction method = null;
  LoxFunction inner = null;
  LoxClass klass = this;
  while (klass != null) {
    if (klass.methods.containsKey(name)) {
      inner = method;
      method = klass.methods.get(name);
    }

    klass = klass.superclass;
  }

  if (method != null) {
    return method.bind(instance, inner);
  }

  return null;
}
```

Unlike before, this does not shortcut walking the superclass chain when it finds
the method. Instead, it keeps going so that it can find the *first* (i.e.
super-most) implementation of the method. As it does, it also keeps track of the
previously found method. That is the next one down the inheritance chain, and is
the one "inner" will invoke.

Once that loop is done, it now knows the top method to return, as well as the
method that "inner" should call. (If there is no matching method in the
subclass, "inner" will be null.) It then passes the inner method into bind:

```java
LoxFunction bind(LoxInstance instance, LoxFunction inner) {
  Environment environment = new Environment(closure);
  environment.define("this", instance);
  environment.define("inner", inner);
  return new LoxFunction(declaration, environment, isInitializer);
}
```

Just like "this", we store the function that should be called in the method's
closure environment, bound to "inner". Now a call to "inner" will call the next
method down in the inheritance chain.

In order for uses of "inner" to work, it also needs to be in the resolver's
static scope chains, so we add that there too. In visitClassStmt(), we define
"inner" right after "this":

```java
beginScope();
scopes.peek().put("this", true);
scopes.peek().put("inner", true); // <-- Add.
```

The last piece of bookkeeping is in LoxClass's call() method:

```java
public Object call(Interpreter interpreter, List<Object> arguments) {
  LoxInstance instance = new LoxInstance(this);
  LoxFunction initializer = findMethod(instance, "init");
  if (initializer != null) {
    initializer.call(interpreter, arguments);
  }

  return instance;
}
```

Now that bind() takes two arguments, we also need to fix how initializers are
looked up. (This is also good because users may use "inner" in an initializer
too.) So we change the body of call() to use the above findMethod() method to
correctly find the initializer and bind it.

That's it!


================================================
FILE: note/answers/chapter13_inheritance/3.md
================================================
There's a bunch of small features I'd add to Lox to make it feel a little more
user-friendly. Things like getters, setters, and operator overloading would be
nice. Perhaps a better syntax than having to do "this." inside methods to refer
to properties on the current object.

But, to me, the biggest real missing feature is some form of arrays. You can
implement linked lists and lots of other data structures yourself in Lox, but
arrays are special. In order to have true constant-time access to any element in
the array, you need to be able to create a truly contiguous array. Lox's current
only data abstract is objects with fields, which don't enable that.

So I'd add arrays. To make them really nice, I'd ideally do something like
growable lists, with literal syntax like `[1, 2, 3]` and a subscript operator
like `someArray[2]` to access and set elements. To keep this challenge simple,
I'll ignore the syntactic niceties and just do the bare minimum to expose the
semantics.

I'll add one new native function, "Array()". It creates a new array with the
given number of elements, all initialized to null:

```lox
var array = Array(3);
print array; // "[null, null, null]".
```

An array object has its own runtime representation. It exposes a few properties
and methods that are also implemented natively:

```lox
var array = Array(3);

// "length" returns the number of elements.
print array.length; // "3".

// "set" sets the element at the given index to the given value.
array.set(1, "new");

// "get" returns the element at a given index.
print array.get(1); // "new".
```

The implementation is pretty straightforward, though native "methods" look a
little funny since our natives up to this point have been top-level functions.
First, in the constructor for Interpreter, we add another native function:

```java
globals.define("Array", new LoxCallable() {
  @Override
  public int arity() {
    return 1;
  }

  @Override
  public Object call(Interpreter interpreter,
                     List<Object> arguments) {
    int size = (int)(double)arguments.get(0);
    return new LoxArray(size);
  }
});
```

That returns a new LoxArray object. It's defined like:

```java
package com.craftinginterpreters.lox;

import java.util.List;

class LoxArray extends LoxInstance {
  private final Object[] elements;

  LoxArray(int size) {
    super(null);
    elements = new Object[size];
  }

  @Override
  Object get(Token name) {
    if (name.lexeme.equals("get")) {
      return new LoxCallable() {
        @Override
        public int arity() {
          return 1;
        }

        @Override
        public Object call(Interpreter interpreter,
                           List<Object> arguments) {
          int index = (int)(double)arguments.get(0);
          return elements[index];
        }
      };
    } else if (name.lexeme.equals("set")) {
      return new LoxCallable() {
        @Override
        public int arity() {
          return 2;
        }

        @Override
        public Object call(Interpreter interpreter,
                           List<Object> arguments) {
          int index = (int)(double)arguments.get(0);
          Object value = arguments.get(1);
          return elements[index] = value;
        }
      };
    } else if (name.lexeme.equals("length")) {
      return (double) elements.length;
    }

    throw new RuntimeError(name, // [hidden]
        "Undefined property '" + name.lexeme + "'.");
  }

  @Override
  void set(Token name, Object value) {
    throw new RuntimeError(name, "Can't add properties to arrays.");
  }

  @Override
  public String toString() {
    StringBuffer buffer = new StringBuffer();
    buffer.append("[");
    for (int i = 0; i < elements.length; i++) {
      if (i != 0) buffer.append(", ");
      buffer.append(elements[i]);
    }
    buffer.append("]");
    return buffer.toString();
  }
}
```

And that's it. Fixed-size arrays are the only other data structure primitive we
really need in order to implement all of the other fancy data structures we take
for granted like hash tables, trees, etc.


================================================
FILE: note/answers/chapter14_chunks/1.md
================================================
In order to run-length encode the line information, we need a slightly smarter
data structure than just a flat array of integers. Instead, we'll define a
little struct:

```c
// chunk.h
typedef struct {
  int offset;
  int line;
} LineStart;
```

Each of these marks the beginning of a new source line in the code, and the
corresponding byte offset of the first instruction on that line. Any bytes after
that first one are understood to be on that same line, until we hit the next
LineStart.

In Chunk, we store an array of these:

```c
// chunk.h
typedef struct {
  int count;
  int capacity;
  uint8_t* code;
  ValueArray constants;
  int lineCount;
  int lineCapacity;
  LineStart* lines;
} Chunk;
```

Note also that we now need a separate lineCount and lineCapacity for this
dynamic array since its size will be different than code's (it should be much
shorter, that's the goal).

We've got to maintain that dynamic array now. When initializing:

```c
// chunk.c
void initChunk(Chunk* chunk) {
  chunk->count = 0;
  chunk->capacity = 0;
  chunk->code = NULL;
  chunk->lineCount = 0;    // <--
  chunk->lineCapacity = 0; // <--
  chunk->lines = NULL;
  initValueArray(&chunk->constants);
}
```

...and freeing...

```c
// chunk.c
void freeChunk(Chunk* chunk) {
  // ...
  FREE_ARRAY(LineStart, chunk->lines, chunk->lineCapacity);
}
```

Where it gets interesting is when writing a new byte:

```c
// chunk.c
void writeChunk(Chunk* chunk, uint8_t byte, int line) {
  if (chunk->capacity < chunk->count + 1) {
    int oldCapacity = chunk->capacity;
    chunk->capacity = GROW_CAPACITY(oldCapacity);
    chunk->code = GROW_ARRAY(uint8_t, chunk->code,
        oldCapacity, chunk->capacity);
    // Don't grow line array here...
  }

  chunk->code[chunk->count] = byte;
  chunk->count++;

  // See if we're still on the same line.
  if (chunk->lineCount > 0 &&
      chunk->lines[chunk->lineCount - 1].line == line) {
    return;
  }

  // Append a new LineStart.
  if (chunk->lineCapacity < chunk->lineCount + 1) {
    int oldCapacity = chunk->lineCapacity;
    chunk->lineCapacity = GROW_CAPACITY(oldCapacity);
    chunk->lines = GROW_ARRAY(LineStart, chunk->lines,
                              oldCapacity, chunk->lineCapacity);
  }

  LineStart* lineStart = &chunk->lines[chunk->lineCount++];
  lineStart->offset = chunk->count - 1;
  lineStart->line = line;
}
```

There are three changes here. First, we *don't* implicitly grow the line array
when we grow the code array. Their sizes are decoupled now. Instead, we grow the
line array when appending a new LineStart, if needed.

The second `if` statement is where we take advantage of adjacent instructions on
the same line. If the line for the byte we're writing is on the same line as
the current line start, we don't create a new one. This is the compression.

Otherwise, if this is the first byte of code, or it appears on a different line,
we begin a new LineStart and grow the array if needed.

This gives us a compressed array of LineStarts, where each one begins a new
line. Next, we have to use this data when showing line info.

Since the lookup process is a little more complex, we define a helper function:

```c
// chunk.h
int getLine(Chunk* chunk, int instruction);
```

It looks like this:

```c
// chunk.c
int getLine(Chunk* chunk, int instruction) {
  int start = 0;
  int end = chunk->lineCount - 1;

  for (;;) {
    int mid = (start + end) / 2;
    LineStart* line = &chunk->lines[mid];
    if (instruction < line->offset) {
      end = mid - 1;
    } else if (mid == chunk->lineCount - 1 ||
        instruction < chunk->lines[mid + 1].offset) {
      return line->line;
    } else {
      start = mid + 1;
    }
  }
}
```

Given a byte offset for an instruction, it binary searches through the
LineStart array to find which LineStart -- and thus which line -- contains that
offset. Using binary search is much faster than walking the whole array, but
it does place a constraint on the compiler. It assumes line numbers for the
instructions always monotonically increase. Since we're going to have a
single-pass compiler, that should be doable.

Now we can use this function when we disassemble an instruction:

```c
// debug.c
int disassembleInstruction(Chunk* chunk, int offset) {
  printf("%04d ", offset);
  int line = getLine(chunk, offset);
  if (offset > 0 && line == getLine(chunk, offset - 1)) {
    printf("   | ");
  } else {
    printf("%4d ", line);
  }
  // ...
}
```


================================================
FILE: note/answers/chapter14_chunks/2.md
================================================
There's not too much to this challenge. We add another opcode:

```c
// chunk.h
typedef enum {
  OP_CONSTANT,
  OP_CONSTANT_LONG, // <--
  OP_RETURN,
} OpCode;
```

Declare the new function:

```c
// chunk.h
void writeConstant(Chunk* chunk, Value value, int line);
```

And implement it:

```c
// chunk.c
void writeConstant(Chunk* chunk, Value value, int line) {
  int index = addConstant(chunk, value);
  if (index < 256) {
    writeChunk(chunk, OP_CONSTANT, line);
    writeChunk(chunk, (uint8_t)index, line);
  } else {
    writeChunk(chunk, OP_CONSTANT_LONG, line);
    writeChunk(chunk, (uint8_t)(index & 0xff), line);
    writeChunk(chunk, (uint8_t)((index >> 8) & 0xff), line);
    writeChunk(chunk, (uint8_t)((index >> 16) & 0xff), line);
  }
}
```

This is pretty straightforward. We add the constant to the array and get the
index back. If the index fits in one byte, we use the short opcode and just
write the single byte.

Otherwise, we write the long opcode. Then we need to split the value into
multiple bytes. It's up to us to pick an endianness -- do we put the most
significant byte first or last? For no particular reason, I went with
little-endian, the same order x86 uses.

We want to be able to disassemble it too, so we add another case:

```c
// debug.c
    case OP_CONSTANT_LONG:
      return longConstantInstruction("OP_CONSTANT_LONG", chunk, offset);
```

And that calls:

```c
// debug.c
static int longConstantInstruction(const char* name, Chunk* chunk,
                                   int offset) {
  uint32_t constant = chunk->code[offset + 1] |
                     (chunk->code[offset + 2] << 8) |
                     (chunk->code[offset + 3] << 16);
  printf("%-16s %4d '", name, constant);
  printValue(chunk->constants.values[constant]);
  printf("'\n");
  return offset + 4;
}
```

Again, we need to worry about endianness and we need to make sure we decode
the bytes the same way we encoded them. (If we were interpreting these, we'd
need to do it right there too.)

This isn't a bad approach. The main trade-off is that it adds to the number of
instructions we have. That has a couple of downsides:

- It makes our interpreter more complex. This is pretty minor, though.

- It uses up an opcode. If we want all opcodes to fit in a single byte, we can
  only have 256 different ones. Our toy interpreter won't need anywhere near
  that many, but a full-featured bytecode VM like the JVM or CPython can end up
  using lots of them and we may not want to sacrifice another opcode for this.

- It *might* slightly slow down the interpreter. Machine code has to be loaded
  onto the CPU before it can be executed, so locality affects it too. The less
  code you have in your code interpreter bytecode execution loop, the fewer
  cache misses you'll have as it dispatches to different instructions.

  Having multiple instructions, each with their own code, for handing constants
  of different sizes increases the code size of the core interpreter loop and
  might cause a few more caches misses.

In practice, though, none of these is fatal and having multiple instructions
of different sizes isn't a terrible idea.


================================================
FILE: note/answers/chapter15_virtual/1.md
================================================

A helpful intermediate step is to explicitly parenthesize them so we can see
the operator precedence:

    (1 * 2) + 3
    1 + (2 * 3)
    (3 - 2) - 1
    (1 + (2 * 3)) - (4 / (-5))

From there, it's straightforward to mentally do a post-order traversal of the
syntax trees:

    // (1 * 2) + 3
    CONST 1
    CONST 2
    MULTIPLY
    CONST 3
    ADD

    // 1 + (2 * 3)
    CONST 1
    CONST 2
    CONST 3
    MULTIPLY
    ADD

    // (3 - 2) - 1
    CONST 3
    CONST 2
    SUBTRACT
    CONST 1
    SUBTRACT

    // (1 + (2 * 3)) - (4 / (-5))
    CONST 1
    CONST 2
    CONST 3
    MULTIPLY
    ADD
    CONST 4
    CONST 5
    NEGATE
    DIVIDE
    SUBTRACT


================================================
FILE: note/answers/chapter15_virtual/2.md
================================================
First, let's parenthesize:

    4 - (3 * (- 2))

That gives:

    CONST 4
    CONST 3
    CONST 2
    NEGATE
    MULTIPLY
    SUBTRACT

Without negation, we need to subtract a number from zero to negate it, so the
code conceptually becomes:

    4 - (3 * (0 - 2))

Which is:

    CONST 4
    CONST 3
    CONST 0 // <--
    CONST 2
    SUBTRACT // <--
    MULTIPLY
    SUBTRACT

Without subtraction, we add the negation of the subtrahend:

    4 + - (3 * (- 2))

Which is:

    CONST 4
    CONST 3
    CONST 2
    NEGATE
    MULTIPLY
    NEGATE // <--
    ADD // <--

I do think it makes sense to have both instructions. The overhead of dispatching
is pretty high, so you want instructions as high level as possible, you want to
fill your opcode space, and you want common operations to encode as a single
instruction when possible.

Given how common both negation and subtraction are, and given that we've got
plenty of room in our opcode set, it makes perfect sense to have instructions
for both.

I would also consider specialized instructions to load common number constants
like zero and one. It might be worth having instructions to increment and
decrement a number too.


================================================
FILE: note/answers/chapter15_virtual/3.md
================================================
There's nothing super algorithmically interesting about the change. We basically
turn it into a dynamic array like we've seen before. A side effect of this
change is that `stackTop` becomes `stackCount`, an int. Using a raw pointer to
the top makes it a little harder to tell if we've run out of capacity:

```c
typedef struct {
  Chunk* chunk;
  uint8_t* ip;
  Value* stack;
  int stackCount;
  int stackCapacity;
} VM;
```

When we first create the VM, we need to initialize the dynamic array fields:

```c
void initVM() {
  vm.stack = NULL;
  vm.stackCapacity = 0;
  resetStack();
}
```

Resetting is still pretty simple:

```c
static void resetStack() {
  vm.stackCount = 0;
}
```

So is `pop()`:

```c
Value pop() {
  vm.stackCount--;
  return vm.stack[vm.stackCount];
}
```

Where it gets interesting is `push()`:

```c
void push(Value value) {
  if (vm.stackCapacity < vm.stackCount + 1) {
    int oldCapacity = vm.stackCapacity;
    vm.stackCapacity = GROW_CAPACITY(oldCapacity);
    vm.stack = GROW_ARRAY(Value, vm.stack, 
                          oldCapacity, vm.stackCapacity);
  }

  vm.stack[vm.stackCount] = value;
  vm.stackCount++;
}
```
We also have to change the way we debug the stack:
```c
for (Value *slot = vm.stack; slot < vm.stack + vm.stackCount; slot++) {
  printf("[ ");
  printValue(*slot);
  printf(" ]");
}
```

That `if` test needs to happen every single time we push a value. That happens
all the time while the VM is running, so this is a significant performance
problem.

We wouldn't want to have to do that. Fortunately, it turns out we won't need
to. If you're willing to limit the generated bytecode to fit within certain
constraints -- which happen to be implicitly true in a language with structured
control flow like Lox -- then you can *statically* determine the maximum amount
of stack space a chunk of bytecode could ever use.

During compilation, you always know how many stack slots are in use for locals
and temporaries at any point in time. So you just keep a running tally of the
highwater mark -- the greatest amount of stack space used at any point, and then
store that in with the resulting chunk.

So instead of checking on every single push, we check once before evaluating
the bytecode to see if the stack is big enough to cover the worst case.


================================================
FILE: note/answers/chapter16_scanning.md
================================================
## 1

I've implemented this in another language, Wren. You can see the code here:

https://github.com/munificent/wren/blob/8fae8e4f1e490888e2cc9b2ea6b8e0d0ff9dd60f/src/vm/wren_compiler.c#L118-L130

Poke around in that file for "interp" to see everything. The basic idea is you
have two token types. TOKEN_STRING is for uninterpolated string literals, and
the last segment of an interpolated string. Every piece of a string literal that
precedes an interpolated expression uses a different TOKEN_INTERPOLATION type.

This:

```lox
"Tea will be ready in ${steep + cool} minutes."
```

Gets scanned like:

```text
TOKEN_INTERPOLATION "Tea will be ready in"
TOKEN_IDENTIFIER    "steep"
TOKEN_PLUS          "+"
TOKEN_IDENTIFIER    "cool"
TOKEN_STRING        "minutes."
```

(The interpolation delimiters themselves are discarded.)

And this:

```lox
"Nested ${"interpolation?! Are you ${"mad?!"}"}"
```

Scans as:

```text
TOKEN_INTERPOLATION "Nested "
TOKEN_INTERPOLATION "interpolation?! Are you "
TOKEN_STRING        "mad?!"
TOKEN_STRING        ""
TOKEN_STRING        ""
```

The two empty TOKEN_STRING tokens are because the interpolation appears at the
very end of the string. They tell the parser that they've reached the end of
the interpolated expression.

## 2

As far as I can tell, Java and C# don't actually specify it correctly. Unless
the verbiage is hidden away somewhere in the specs, I believe that this:

```java
List<List<String>> nestedList;
```

Should technically by a syntax error in a fully spec-compliant implementation
of Java or C#. However, all practical implementations don't follow the letter
of the spec and instead do what users want.

C++, as of C++0x, does actually specify this:

http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1757.html

It states that if a `<` has been scanned and no closing `>` has been scanned
yet, and there are no other intervening bracket characters, then a subsequent
`>>` is scanned as two `>` tokens instead of a single shift.

As far as implementation, I think javac handles this by scanning the `>>` as a
single shift token. When the parser is looking for a `>` to close the type
argument, if it sees a shift token, it splits it into two `>` tokens right then,
consumes the first, and then keeps parsing.

Microsoft's C# parser takes the opposite approach. It always scans `>>` as two
separate `>` tokens. Then, when parsing an expression, if it sees two `>` tokens
next to each other with no whitespace between them, it parses them as a shift
operator.

## 3

I don't generally like contextual keywords. It's fairly easy to write a real
parser that can handle them gracefully, but:

*   Users are often confused by them. Many programmers don't even realize that
    contextual keywords exist. They assume all identifiers are either fully
    reserved by the language or fully available for use.

*   Once an identifier becomes a keyword in some context, it quickly takes on
    that meaning to readers and becomes *very* confusing if you use it for your
    own name outside of that context. Now that C# has async/await, you will
    just anger your fellow C# users if you name a variable `await` in some
    non-async method because they are so used to seeing `await` used for its
    keyword meaning.

    So even though it's *technically* usable elsewhere, it's effectively fully
    reserved.

That being said, sometimes you have no other option. Once your language is in
wide use, reserving a new keyword is a breaking change to any code that was
previously using that name. If you can only reserve it inside a new context that
didn't previously exist (for example, async functions in C#), or in a context
where an identifier can't appear, then you can reserve it only in that context
and be confident that you didn't break any previous code.

So they're sort of an inevitable compromise when evolving a language over time.

Implementing them is pretty easy. The scanner scans them like regular
identifiers, since it doesn't generally know the surrounding context. In the
parser, you recognize the keyword in that context by looking for an identifier
token and checking to see if its lexeme is the right string.


================================================
FILE: note/answers/chapter17_compiling.md
================================================
##1

It's:

```
expression
| parsePrecedence(PREC_ASSIGNMENT)
| | grouping
| | | expression
| | | | parsePrecedence(PREC_ASSIGNMENT)
| | | | | unary // for "-"
| | | | | | parsePrecedence(PREC_UNARY)
| | | | | | | number
| | | | | binary // for "+"
| | | | | | parsePrecedence(PREC_FACTOR) // PREC_TERM + 1
| | | | | | | number
| | binary // for "*"
| | | parsePrecedence(PREC_UNARY) // PREC_FACTOR + 1
| | | | number
| | binary // for "-"
| | | parsePrecedence(PREC_FACTOR) // PREC_TERM + 1
| | | | unary // for "-"
| | | | | parsePrecedence(PREC_UNARY)
| | | | | | number
```

## 2

Lox only has one other: left parenthesis is used as a prefix expression for
grouping, and as an infix expression for invoking a function.

Several languages allow `+` as a prefix unary operator as a parallel to `-` and
then also of course use infix `+` for addition.

A number of languages use square brackets for list or array literals, which
makes `[` a prefix expression and then also use square brackets as a subscript
operator to access elements from a list.

C uses `*` as a prefix operator to dereference a pointer and as infix for
multiplication. Likewise, `&` is a prefix address-of operator and infix bitwise
and.

`*` and `&` aren't prefix *expressions* in Ruby, but they can appear in prefix
position before an argument in an argument list.

## 3

The `?:` operator has lower precedence than almost anything, so we add a new `PREC_CONDITIONAL` level between `PREC_ASSIGN` and `PREC_OR`. I'll skip adding the new TokenType enums for `?` and `:`. That part is pretty obvious. In the new row in the table for the `?` token type, we call:

```c
static void conditional()
{
  // Compile the then branch.
  parsePrecedence(compiler, PREC_CONDITIONAL);

  consume(compiler, TOKEN_COLON,
          "Expect ':' after then branch of conditional operator.");

  // Compile the else branch.
  parsePrecedence(compiler, PREC_ASSIGNMENT);
}
```

Of course a full implementation needs more code to actually do the conditional
evaluation, but that should compile the operands with the right precedence. Note
that the precedence of the operands is a little unusual. The precedence of the
last operand is *lower* than the conditional expression itself.

That might be surprising, but it's how C rolls.


================================================
FILE: note/answers/chapter18_types.md
================================================
## 1

Having both `OP_NEGATE` and `OP_SUBTRACT` is redundant. We can replace
subtraction with negate-then-add:

```c
// Emit the operator instruction.
switch (operatorType) {
  // ...
  case TOKEN_PLUS:          emitByte(OP_ADD); break;
  case TOKEN_MINUS:         emitBytes(OP_NEGATE, OP_ADD); break; // <--
  case TOKEN_STAR:          emitByte(OP_MULTIPLY); break;
  case TOKEN_SLASH:         emitByte(OP_DIVIDE); break;
  default:
    return; // Unreachable.
}
```

Or we can replace negation with:

1. Push zero.
2. Compile the negate operand.
3. Subtract.

It's also possibly to simplify the comparison and equality instructions using
some stack juggling and a bitwise operator. Fundamentally, you only need a
single operation, an instruction that returns one of three values: "less",
"equal", or "greater". Similar to the `compareTo()` methods in many languages or
the `<=>` in Ruby. Once you have that, the other operators can be defined in
terms of it.

## 2

Many other instruction sets define dedicated instructions for common small
integer constants. 0, 1, 2, and -1 are good candidates.

A few arithmetic operations have common constant operands. For those cases, it
may be worth adding instructions for them: incrementing and decrementing by one
are the main ones. But maybe even doubling comes up enough to warrant it.

Likewise, comparisons to certain numbers are also common and can be encoded
directly in a single instruction instead of needing to load the number from a
constant and then use the comparison instruction. Many CPU instruction sets can
compare a number with zero in a single instruction.

There's been some research into "superinstructions" -- automated or manual
techniques for defining instructions that represent a sequence of common simpler
instructions. There is a point of diminishing returns because eventually you run
out of opcodes. You can use larger opcodes (16 bits, etc.), but then that slows
down dispatch overall because now your code is larger.


================================================
FILE: note/answers/chapter19_strings.md
================================================
## 1

This change is mostly mechanical and not too difficult. First, in the type
itself, we change the last field to use the C99 flexible array member syntax:

```c
struct sObjString {
  Obj obj;
  int length;
  // Was:
  // char* chars;
  // Now:
  char chars[];
};
```

This means that, by default, `chars` is treated as having zero size, but still
of array type. It's up to us to allocate enough memory for the ObjString and
as many trailing bytes as we need. This means the little memory macros don't
work, so we'll manually call `reallocate()`.

First, replace `takeString()` and `copyString()` with:

```c
ObjString* makeString(int length) {
  ObjString* string = (ObjString*)allocateObject(
      sizeof(ObjString) + length + 1, OBJ_STRING);
  string->length = length;
  return string;
}

ObjString* copyString(const char* chars, int length) {
  ObjString* string = makeString(length);

  memcpy(string->chars, chars, length);
  string->chars[length] = '\0';

  return string;
}
```

Now that the character buffer is part of the same allocation as the ObjString,
we can't take ownership of an existing character array. Instead, we need to
create the ObjString that the characters will be copied into.

The `makeString()` function allocates an ObjString with as many extra bytes at
the end as the string needs. It also sets the length, but doesn't initialize
the characters.

`copyString()` uses that to make a new string and copy in the given characters.
That's what string literals do. For concatenation, we do:

```c
static void concatenate() {
  ObjString* b = AS_STRING(pop());
  ObjString* a = AS_STRING(pop());

  int length = a->length + b->length;
  ObjString* result = makeString(length);
  memcpy(result->chars, a->chars, a->length);
  memcpy(result->chars + a->length, b->chars, b->length);
  result->chars[length] = '\0';

  push(OBJ_VAL(result));
}
```

Instead of creating the character array then the string object, we create the
string object first and then write the concatenated string right into it.

Here's how we free it:

```c
  switch (object->type) {
    case OBJ_STRING: {
      ObjString* string = (ObjString*)object;
      // Was:
      // FREE_ARRAY(char, string->chars, string->length + 1);
      // FREE(ObjString, object);
      // Now:
      reallocate(object, sizeof(ObjString) + string->length + 1, 0);
      break;
    }
  }
```

Note that we include the extra size, but also that now only a single
`reallocate()` call is needed.

## 2

This one's also not too bad. A more efficient solution would be to pack the
"is owned" bit into the type tag or as a bitfield next to the length. Of course,
since this is an optimization, the right way to go about it is to profile some
real-world programs and see if this optimization is worth doing.

But the simple implementation looks like this:

We add a field to the struct to track whether it owns the character array:

```c
struct sObjString {
  Obj obj;
  bool ownsChars; // <--
  int length;
  const char* chars; // <--
};
```

We replace `takeString()` and `copyString()` with:

```c
ObjString* makeString(bool ownsChars, char* chars, int length) {
  ObjString* string = ALLOCATE_OBJ(ObjString, OBJ_STRING);
  string->ownsChars = ownsChars;
  string->length = length;
  string->chars = chars;

  return string;
}
```

When we create a string from a literal, we call `makeString()` and have it not
own the characters:

```c
static void string() {
  emitConstant(OBJ_VAL(makeString(false,
      (char*)parser.previous.start + 1, parser.previous.length - 2)));
}
```

And when we concatenate, it does:

```c
static void concatenate() {
  ObjString* b = AS_STRING(pop());
  ObjString* a = AS_STRING(pop());

  int length = a->length + b->length;
  char* chars = ALLOCATE(char, length + 1);
  memcpy(chars, a->chars, a->length);
  memcpy(chars + a->length, b->chars, b->length);
  chars[length] = '\0';

  ObjString* result = makeString(true, chars, length); // <--
  push(OBJ_VAL(result));
}
```

We also need to fix `printObject()` since we can't assume strings are terminated
anymore:

```c
void printObject(Value value) {
  switch (OBJ_TYPE(value)) {
    case OBJ_STRING:
      // Changed:
      printf("%.*s", AS_STRING(value)->length, AS_CSTRING(value));
      break;
  }
}
```

Finally, when we free a string, we only free the character array if we own it:

```c
static void freeObject(Obj* object) {
  switch (object->type) {
    case OBJ_STRING: {
      ObjString* string = (ObjString*)object;
      if (string->ownsChars) { // <--
        FREE_ARRAY(char, (char*)string->chars, string->length + 1);
      }
      FREE(ObjString, object);
      break;
    }
  }
}
```

## 3

My preference depends on the semantics of dispatching the "+" operator. My
general goals are:

* Do convert the other operand to a string and then concatenate when possible.
* Try to maintain symmetry of the operator.

In some languages, these two goals are in conflict.

In C++, you can do it by defining `+` to take two strings. Then, any type that
wants to allow itself to be a concatenated operand defines an implicit
conversion to string. This works whether the operand is on the left or right.

C# has similar behavior, but built in. If one operand of `+` is a string, the
other is converted to a string by calling `ToString()` on it and the results are
concatenated. I think that works fine.

In languages like Smalltalk where `+` is a method dynamically dispatched on the
left-hand operand, it's harder to make the behavior symmetric. It's easy to
define a `+` method on string that converts the right-hand operand to a string.
But it's harder to define a `+` on all types that converts the receiver to a
string if the right operand is a string.

In that case, I'm not as thrilled about overloading `+` to mean concatenation
and might prefer a different operator. (In Smalltalk, that operator is `,`.)

At a higher level, while I like `+` for concatenation because it's familiar, I
don't think it's a great way to build strings out of parts. I *much* prefer
having string interpolation built into the language.


================================================
FILE: note/answers/chapter20_hash/1.md
================================================
There's nothing mind-blowing about this exercise. It's mostly just replacing
`ObjString*` with `Value` in the places where keys are passed around. In a
couple of places, you need to wrap a string in a value or unwrap it.

The full diff is below.

There are two interesting parts. First, we can no longer use a `NULL` key to
represent an empty bucket. Keys are now Values, not pointers, so there is no
`NULL`. We could use `nil`, but remember, `nil` is a valid key now too! Instead,
I added a singleton value type, "empty":

```c
typedef enum {
   VAL_BOOL,
   VAL_NIL,
   VAL_NUMBER,
   VAL_OBJ,
   VAL_EMPTY // <--
 } ValueType;
```

Users can never produce or see a value of this type. It's only used internally
to identify empty buckets.

Second, we need to be able to generate a hash code for any kind of value, not
just strings. Because the other value types are small and fixed-size, I don't
think it's worth caching the hash code. Instead, it's calculated on the fly
as needed. The implementation looks like:

```
static uint32_t hashDouble(double value) {
  union BitCast {
    double value;
    uint32_t ints[2];
  };

  union BitCast cast;
  cast.value = (value) + 1.0;
  return cast.ints[0] + cast.ints[1];
}

uint32_t hashValue(Value value) {
  switch (value.type) {
    case VAL_BOOL:   return AS_BOOL(value) ? 3 : 5;
    case VAL_NIL:    return 7;
    case VAL_NUMBER: return hashDouble(AS_NUMBER(value));
    case VAL_OBJ:    return AS_STRING(value)->hash;
    case VAL_EMPTY:  return 0;
  }
}
```

There are some somewhat arbitrary choices here. I picked distinct constant
hash codes for the singleton values `true`, `false`, and `nil`. As long as they
aren't all zero, I don't think the value matters too much.

Generating a hash code for a double is harder and exposes some subtle issues.
Should two `NaN` values that have different underlying bit representations be
considered the same or not? Should `0.0` and `-0.0` have the same hash code?

I don't claim to be an expert on this, so I just borrowed the above
implementation from Lua. CPython has an interesting, very different approach.

Here's the whole thing:

```
diff --git a/c/object.c b/c/object.c
index 94f2bb5..c6f97f5 100644
--- a/c/object.c
+++ b/c/object.c
@@ -26,7 +26,7 @@ static ObjString* allocateString(char* chars, int length,
   string->chars = chars;
   string->hash = hash;

-  tableSet(&vm.strings, string, NIL_VAL);
+  tableSet(&vm.strings, OBJ_VAL(string), NIL_VAL);

   return string;
 }
diff --git a/c/table.c b/c/table.c
index 0082f46..78dd7ed 100644
--- a/c/table.c
+++ b/c/table.c
@@ -18,14 +18,14 @@ void freeTable(Table* table) {
   initTable(table);
 }
 static Entry* findEntry(Entry* entries, int capacity,
-                        ObjString* key) {
-  uint32_t index = key->hash % capacity;
+                        Value key) {
+  uint32_t index = hashValue(key) % capacity;
   Entry* tombstone = NULL;

   for (;;) {
     Entry* entry = &entries[index];

-    if (entry->key == NULL) {
+    if (IS_EMPTY(entry->key)) {
       if (IS_NIL(entry->value)) {
         // Empty entry.
         return tombstone != NULL ? tombstone : entry;
@@ -33,7 +33,7 @@ static Entry* findEntry(Entry* entries, int capacity,
         // We found a tombstone.
         if (tombstone == NULL) tombstone = entry;
       }
-    } else if (entry->key == key) {
+    } else if (valuesEqual(key, entry->key)) {
       // We found the key.
       return entry;
     }
@@ -41,11 +41,11 @@ static Entry* findEntry(Entry* entries, int capacity,
     index = (index + 1) % capacity;
   }
 }
-bool tableGet(Table* table, ObjString* key, Value* value) {
+bool tableGet(Table* table, Value key, Value* value) {
   if (table->entries == NULL) return false;

   Entry* entry = findEntry(table->entries, table->capacity, key);
-  if (entry->key == NULL) return false;
+  if (IS_NIL(entry->key)) return false;

   *value = entry->value;
   return true;
@@ -53,14 +53,14 @@ bool tableGet(Table* table, ObjString* key, Value* value) {
 static void adjustCapacity(Table* table, int capacity) {
   Entry* entries = ALLOCATE(Entry, capacity);
   for (int i = 0; i < capacity; i++) {
-    entries[i].key = NULL;
+    entries[i].key = EMPTY_VAL;
     entries[i].value = NIL_VAL;
   }

   table->count = 0;
   for (int i = 0; i < table->capacity; i++) {
     Entry* entry = &table->entries[i];
-    if (entry->key == NULL) continue;
+    if (IS_EMPTY(entry->key)) continue;

     Entry* dest = findEntry(entries, capacity, entry->key);
     dest->key = entry->key;
@@ -72,29 +72,29 @@ static void adjustCapacity(Table* table, int capacity) {
   table->entries = entries;
   table->capacity = capacity;
 }
-bool tableSet(Table* table, ObjString* key, Value value) {
+bool tableSet(Table* table, Value key, Value value) {
   if (table->count + 1 > table->capacity * TABLE_MAX_LOAD) {
     int capacity = GROW_CAPACITY(table->capacity);
     adjustCapacity(table, capacity);
   }

   Entry* entry = findEntry(table->entries, table->capacity, key);
-  bool isNewKey = entry->key == NULL;
+  bool isNewKey = IS_EMPTY(entry->key);
   entry->key = key;
   entry->value = value;

   if (isNewKey) table->count++;
   return isNewKey;
 }
-bool tableDelete(Table* table, ObjString* key) {
+bool tableDelete(Table* table, Value key) {
   if (table->count == 0) return false;

   // Find the entry.
   Entry* entry = findEntry(table->entries, table->capacity, key);
-  if (entry->key == NULL) return false;
+  if (IS_EMPTY(entry->key)) return false;

   // Place a tombstone in the entry.
-  entry->key = NULL;
+  entry->key = EMPTY_VAL;
   entry->value = BOOL_VAL(true);

   return true;
@@ -102,7 +102,7 @@ bool tableDelete(Table* table, ObjString* key) {
 void tableAddAll(Table* from, Table* to) {
   for (int i = 0; i < from->capacity; i++) {
     Entry* entry = &from->entries[i];
-    if (entry->key != NULL) {
+    if (!IS_EMPTY(entry->key)) {
       tableSet(to, entry->key, entry->value);
     }
   }
@@ -119,11 +119,13 @@ ObjString* tableFindString(Table* table, const char* chars, int length,
   for (;;) {
     Entry* entry = &table->entries[index];

-    if (entry->key == NULL) return NULL;
-    if (entry->key->length == length &&
-        memcmp(entry->key->chars, chars, length) == 0) {
+    if (IS_EMPTY(entry->key)) return NULL;
+
+    ObjString* string = AS_STRING(entry->key);
+    if (string->length == length &&
+        memcmp(string->chars, chars, length) == 0) {
       // We found it.
-      return entry->key;
+      return string;
     }

     // Try the next slot.
diff --git a/c/table.h b/c/table.h
index 4a51599..02c365d 100644
--- a/c/table.h
+++ b/c/table.h
@@ -5,7 +5,7 @@
 #include "value.h"

 typedef struct {
-  ObjString* key;
+  Value key;
   Value value;
 } Entry;

@@ -17,9 +17,9 @@ typedef struct {

 void initTable(Table* table);
 void freeTable(Table* table);
-bool tableGet(Table* table, ObjString* key, Value* value);
-bool tableSet(Table* table, ObjString* key, Value value);
-bool tableDelete(Table* table, ObjString* key);
+bool tableGet(Table* table, Value key, Value* value);
+bool tableSet(Table* table, Value key, Value value);
+bool tableDelete(Table* table, Value key);
 void tableAddAll(Table* from, Table* to);
 ObjString* tableFindString(Table* table, const char* chars, int length,
                            uint32_t hash);
diff --git a/c/value.c b/c/value.c
index bebcdb6..c139907 100644
--- a/c/value.c
+++ b/c/value.c
@@ -30,6 +30,7 @@ void printValue(Value value) {
     case VAL_NIL:    printf("nil"); break;
     case VAL_NUMBER: printf("%g", AS_NUMBER(value)); break;
     case VAL_OBJ:    printObject(value); break;
+    case VAL_EMPTY:  printf("<empty>"); break;
   }
 }
 bool valuesEqual(Value a, Value b) {
@@ -41,5 +42,27 @@ bool valuesEqual(Value a, Value b) {
     case VAL_NUMBER: return AS_NUMBER(a) == AS_NUMBER(b);
     case VAL_OBJ:
       return AS_OBJ(a) == AS_OBJ(b);
+    case VAL_EMPTY:  return true;
+  }
+}
+
+static uint32_t hashDouble(double value) {
+  union BitCast {
+    double value;
+    uint32_t ints[2];
+  };
+
+  union BitCast cast;
+  cast.value = (value) + 1.0;
+  return cast.ints[0] + cast.ints[1];
+}
+
+uint32_t hashValue(Value value) {
+  switch (value.type) {
+    case VAL_BOOL:   return AS_BOOL(value) ? 3 : 5;
+    case VAL_NIL:    return 7;
+    case VAL_NUMBER: return hashDouble(AS_NUMBER(value));
+    case VAL_OBJ:    return AS_STRING(value)->hash;
+    case VAL_EMPTY:  return 0;
   }
 }
diff --git a/c/value.h b/c/value.h
index a24af84..2ed3370 100644
--- a/c/value.h
+++ b/c/value.h
@@ -10,7 +10,8 @@ typedef enum {
   VAL_BOOL,
   VAL_NIL, // [user-types]
   VAL_NUMBER,
-  VAL_OBJ
+  VAL_OBJ,
+  VAL_EMPTY
 } ValueType;

 typedef struct {
@@ -26,6 +27,7 @@ typedef struct {
 #define IS_NIL(value)     ((value).type == VAL_NIL)
 #define IS_NUMBER(value)  ((value).type == VAL_NUMBER)
 #define IS_OBJ(value)     ((value).type == VAL_OBJ)
+#define IS_EMPTY(value)   ((value).type == VAL_EMPTY)

 #define AS_OBJ(value)     ((value).as.obj)
 #define AS_BOOL(value)    ((value).as.boolean)
@@ -35,6 +37,7 @@ typedef struct {
 #define NIL_VAL           ((Value){ VAL_NIL, { .number = 0 } })
 #define NUMBER_VAL(value) ((Value){ VAL_NUMBER, { .number = value } })
 #define OBJ_VAL(object)   ((Value){ VAL_OBJ, { .obj = (Obj*)object } })
+#define EMPTY_VAL         ((Value){ VAL_EMPTY, { .number = 0 } })

 typedef struct {
   int capacity;
@@ -47,5 +50,6 @@ void initValueArray(ValueArray* array);
 void writeValueArray(ValueArray* array, Value value);
 void freeValueArray(ValueArray* array);
 void printValue(Value value);
+uint32_t hashValue(Value value);

 #endif
```


================================================
FILE: note/answers/chapter21_global.md
================================================
## 1

The optimization is pretty straightforward. When adding a string constant, we
look in the constant table to see if that string is already in there. The
interesting question is how. The simplest implementation is a linear scan over
the existing constants.

But that means compilation time is quadratic in the number of unique identifiers
in the chunk. While that's fine for relatively small programs, users have a
habit of writing larger programs than we ever anticipated. Virtually every
algorithm in the compiler that isn't linear is potentially a performance
problem.

Fortunately, we have a way of looking up strings in constant time -- a hash
table. So, in the compiler, we add a hash table that keeps track of the
identifier constants that have already been added. Each key is an identifier,
and its value is the index of the identifier in the constant table.

In compiler.c, add a module variable:

```c
Table stringConstants;
```

In `compile()`, we initialize and tear it down:

```c
bool compile(const char* source, Chunk* chunk) {
  initScanner(source);

  compilingChunk = chunk;
  parser.hadError = false;
  parser.panicMode = false;
  initTable(&stringConstants); // <--

  advance();

  while (!match(TOKEN_EOF)) {
    declaration();
  }

  endCompiler();
  freeTable(&stringConstants); // <--
  return !parser.hadError;
}
```

When adding an identifier constant, we look for it in the hash table first:

```c
static uint8_t identifierConstant(Token* name) {
  // See if we already have it.
  ObjString* string = copyString(name->start, name->length);
  Value indexValue;
  if (tableGet(&stringConstants, string, &indexValue)) {
    // We do.
    return (uint8_t)AS_NUMBER(indexValue);
  }

  uint8_t index = makeConstant(OBJ_VAL(string));
  tableSet(&stringConstants, string, NUMBER_VAL((double)index));
  return index;
}
```

That's pretty simple. Compiling an identifier is still (amortized) constant
time, though with slightly worse constant factors. In return, we use up fewer
constant table slots. We don't actually save memory from redundant strings
because clox already interns all strings. But the smaller table is nice.

*Note that we leak memory for the identifier string in `identifierConstant()`
if the name is already found. That's because we don't have a GC yet.*

## 2

There are a few ways to solve this. I'll do one that introduces another layer
of indirection, and a little information sharing between the compiler and VM.

In the VM, we remove the `globals` hash table and replace it with:

```c
  Table globalNames;
  ValueArray globalValues;
```

The value array is where the global variable values live. The hash table maps
the name of a global variable to its index in the value array. So, if the
program is:

```lox
var a = "value";
```

Then `globalNames` will contain a single entry, `"a" -> 0` and `globalValues`
will contain a single element, `"value"`. This association is all wired up at
compile time:

```c
static uint8_t identifierConstant(Token* name) {
  Value index;
  ObjString* identifier = copyString(name->start, name->length);
  if (tableGet(&vm.globalNames, identifier, &index)) {
    return (uint8_t)AS_NUMBER(index);
  }

  uint8_t newIndex = (uint8_t)vm.globalValues.count;
  writeValueArray(&vm.globalValues, UNDEFINED_VAL);

  tableSet(&vm.globalNames, identifier, NUMBER_VAL((double)newIndex));
  return newIndex;
}
```

When compiling a reference to a global variable, we see if we've ever
encountered its name before. If so, we know what index the value will be in in
the `globalValues` array. Otherwise, we add a new empty undefined value in the
array and then store a new hash table entry binding the name to that index.

Even though these two fields live in the VM, the compiler creates them at
compile time. You can think of it sort of like statically allocating memory for
the globals. We actually store the values in the VM so that they persist across
multiple REPL entries. We need to store the name association there too so that
we can find existing global variables.

`UNDEFINED_VAL` is a new, separate singleton value like `nil`. It's used to
mark a global variable slot as not having been defined yet. We can't use `nil`
because `nil` is a valid value to store in a variable.

At runtime, the instructions work like so:

```c
      case OP_GET_GLOBAL: {
        Value value = vm.globalValues.values[READ_BYTE()];
        if (IS_UNDEFINED(value)) {
          runtimeError("Undefined variable.");
          return INTERPRET_RUNTIME_ERROR;
        }
        push(value);
        break;
      }

      case OP_DEFINE_GLOBAL: {
        vm.globalValues.values[READ_BYTE()] = pop();
        break;
      }

      case OP_SET_GLOBAL: {
        uint8_t index = READ_BYTE();
        if (IS_UNDEFINED(vm.globalValues.values[index])) {
          runtimeError("Undefined variable.");
          return INTERPRET_RUNTIME_ERROR;
        }
        vm.globalValues.values[index] = peek(0);
        break;
      }
```

The operand for the instructions is now the direct index of the global variable
in the `globalValues` array. We've looked up the slot at compile time and
bound the result, so at runtime we don't need to worry about the name at all.
This is much faster. The only perf hit we take now is the necessary check at
runtime to ensure the variable has been initialized.

## 3

This question is more subtle than it may seem.

The seemingly safe error is to say that obviously using a variable that is
never defined anywhere is clearly wrong code so it should be an error. That's
a reasonable choice.

But when you're in the middle refactoring a large program, you sometimes have
code in a known broken state. As long as the broken code isn't *called*, it
might be nice to let the user run the other parts of the program that are OK.

You could try to have your cake and eat it too by making a reference to an
undeclared variable be a *warning*. That usually means the language reports it
as an error but still allows the program to be run. That works too, but in
practice, having shades of gray in your error reporting tends to cause user
headaches.

Some teams will want things to be black and white by turning all warnings into
errors, which sacrifices the ability you were trying to provide. Meanwhile,
other teams have the bad habit of committing code containing unfixed warnings,
leading to gradually worsening code. You will likely end up in long arguments
about which diagnostics should be considered fatal errors and which mere
warnings. People have strangely strong opinions about this stuff.

Personally, I'm pretty error-prone and like tools and languages to help me catch
my mistakes, so I'd like it to tell me if there's a use of an undeclared
variable name. If I'm in the middle of refactoring a big codebase, I'm OK with
having to comment out large regions of it to temporarily silence errors. But
that's just me.


================================================
FILE: note/answers/chapter23_jumping/1.md
================================================

Add `TOKEN_CASE`, `TOKEN_COLON`, `TOKEN_DEFAULT`, and `TOKEN_SWITCH` to
TokenType and then implement scanning `:`, `case`, `default`, and `switch` in
the scanner. Not shown here because it's not very interesting.

Most of the work is in the compiler. In `statement()`, add:

```c
  } else if (match(TOKEN_SWITCH)) {
    switchStatement();
```

Then here's the main thing:

```c
#define MAX_CASES 256

static void switchStatement() {
  consume(TOKEN_LEFT_PAREN, "Expect '(' after 'switch'.");
  expression();
  consume(TOKEN_RIGHT_PAREN, "Expect ')' after value.");
  consume(TOKEN_LEFT_BRACE, "Expect '{' before switch cases.");

  int state = 0; // 0: before all cases, 1: before default, 2: after default.
  int caseEnds[MAX_CASES];
  int caseCount = 0;
  int previousCaseSkip = -1;

  while (!match(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) {
    if (match(TOKEN_CASE) || match(TOKEN_DEFAULT)) {
      TokenType caseType = parser.previous.type;

      if (state == 2) {
        error("Can't have another case or default after the default case.");
      }

      if (state == 1) {
        // At the end of the previous case, jump over the others.
        caseEnds[caseCount++] = emitJump(OP_JUMP);

        // Patch its condition to jump to the next case (this one).
        patchJump(previousCaseSkip);
        emitByte(OP_POP);
      }

      if (caseType == TOKEN_CASE) {
        state = 1;

        // See if the case is equal to the value.
        emitByte(OP_DUP);
        expression();

        consume(TOKEN_COLON, "Expect ':' after case value.");

        emitByte(OP_EQUAL);
        previousCaseSkip = emitJump(OP_JUMP_IF_FALSE);

        // Pop the comparison result.
        emitByte(OP_POP);
      } else {
        state = 2;
        consume(TOKEN_COLON, "Expect ':' after default.");
        previousCaseSkip = -1;
      }
    } else {
      // Otherwise, it's a statement inside the current case.
      if (state == 0) {
        error("Can't have statements before any case.");
      }
      statement();
    }
  }

  // If we ended without a default case, patch its condition jump.
  if (state == 1) {
    patchJump(previousCaseSkip);
    emitByte(OP_POP);
  }

  // Patch all the case jumps to the end.
  for (int i = 0; i < caseCount; i++) {
    patchJump(caseEnds[i]);
  }

  emitByte(OP_POP); // The switch value.
}
```

The `==` operator pops its operands. In order, to repeatedly compare the switch
value to each case, we need to keep it around, so before we case, we push a copy
of the switch value using a new `OP_DUP` (for "duplicate") instruction.

Add `OP_DUP` to OpCode. In the VM, its implementation is simply:

```c
case OP_DUP: push(peek(0)); break;
```

Given all that, if you compile:

```lox
switch (2) {
case 1:
  print("one");
case 2:
  print("two");
case 3:
  print("three");
default:
  print("default");
}
print("after");
```

Then it generates:

```
    0000    1 OP_CONSTANT         0 '2'
    0002    2 OP_DUP
    0003    | OP_CONSTANT         1 '1'
    0005    | OP_EQUAL
.-- 0006    | OP_JUMP_IF_FALSE    6 -> 16
|   0009    | OP_POP
|   0010    3 OP_CONSTANT         2 'one'
|   0012    | OP_PRINT
|   0013    4 OP_JUMP            13 -> 50 ------.
'-> 0016    | OP_POP                            |
    0017    | OP_DUP                            |
    0018    | OP_CONSTANT         3 '2'         |
    0020    | OP_EQUAL                          |
.-- 0021    | OP_JUMP_IF_FALSE   21 -> 31       |
|   0024    | OP_POP                            |
|   0025    5 OP_CONSTANT         4 'two'       |
|   0027    | OP_PRINT                          |
|   0028    6 OP_JUMP            28 -> 50 ------|
'-> 0031    | OP_POP                            |
    0032    | OP_DUP                            |
    0033    | OP_CONSTANT         5 '3'         |
    0035    | OP_EQUAL                          |
.-- 0036    | OP_JUMP_IF_FALSE   36 -> 46       |
|   0039    | OP_POP                            |
|   0040    7 OP_CONSTANT         6 'three'     |
|   0042    | OP_PRINT                          |
|   0043    8 OP_JUMP            43 -> 50 ------|
'-> 0046    | OP_POP                            |
    0047    9 OP_CONSTANT         7 'default'   |
    0049    | OP_PRINT                          |
.-----------------------------------------------'
'-> 0050   10 OP_POP
    0051   11 OP_CONSTANT         8 'after'
    0053    | OP_PRINT
    0054   13 OP_RETURN
```

There are a couple of interesting design questions to think about:

*   Can you have declarations inside a case? If so, what is their scope? I said
    no. You can introduce a block if you want them.

*   Can you have a switch with no cases? I allow this.

*   Can you have a switch with only a default. I allow this too.

For all of these, I just picked the simplest-to-implement choice. In a real
implementation, I probably would allow variables, scoped to the current case. I
would forbid empty or default-only switches because they clearly aren't useful.


================================================
FILE: note/answers/chapter23_jumping/2.md
================================================
Add `TOKEN_CONTINUE` to TokenType and then implement scanning the `continue`
keyword. Not shown here because it's not very interesting.

Most of the work is in the compiler. First, we need two mode global variables:

```c
int innermostLoopStart = -1;
int innermostLoopScopeDepth = 0;
```

These keep track of the point that a `continue` statement should jump to, and
the scope of the variables declared inside the loop.

We change `forStatement()` to keep track of those (and restore their previous
values in the case of a nested loop:

```c
static void forStatement() {
  beginScope();

  consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
  if (match(TOKEN_VAR)) {
    varDeclaration();
  } else if (match(TOKEN_SEMICOLON)) {
    // No initializer.
  } else {
    expressionStatement();
  }

  int surroundingLoopStart = innermostLoopStart; // <--
  int surroundingLoopScopeDepth = innermostLoopScopeDepth; // <--
  innermostLoopStart = currentChunk()->count; // <--
  innermostLoopScopeDepth = current->scopeDepth; // <--

  int exitJump = -1;
  if (!match(TOKEN_SEMICOLON)) {
    expression();
    consume(TOKEN_SEMICOLON, "Expect ';' after loop condition.");

    // Jump out of the loop if the condition is false.
    exitJump = emitJump(OP_JUMP_IF_FALSE);
    emitByte(OP_POP); // Condition.
  }

  if (!match(TOKEN_RIGHT_PAREN)) {
    int bodyJump = emitJump(OP_JUMP);

    int incrementStart = currentChunk()->count;
    expression();
    emitByte(OP_POP);
    consume(TOKEN_RIGHT_PAREN, "Expect ')' after for clauses.");

    emitLoop(innermostLoopStart); // <--
    innermostLoopStart = incrementStart; // <--
    patchJump(bodyJump);
  }

  statement();

  emitLoop(innermostLoopStart); // <--

  if (exitJump != -1) {
    patchJump(exitJump);
    emitByte(OP_POP); // Condition.
  }

  innermostLoopStart = surroundingLoopStart; // <--
  innermostLoopScopeDepth = surroundingLoopScopeDepth; // <--

  endScope();
}
```

Now we're ready to implement `continue`. In `statement()`, add:

```c
  } else if (match(TOKEN_CONTINUE)) {
    continueStatement();
```

That calls:

```c
static void continueStatement() {
  if (innermostLoopStart == -1) {
    error("Can't use 'continue' outside of a loop.");
  }

  consume(TOKEN_SEMICOLON, "Expect ';' after 'continue'.");

  // Discard any locals created inside the loop.
  for (int i = current->localCount - 1;
       i >= 0 && current->locals[i].depth > innermostLoopScopeDepth;
       i--) {
    emitByte(OP_POP);
  }

  // Jump to top of current innermost loop.
  emitLoop(innermostLoopStart);
}
```


================================================
FILE: note/answers/chapter23_jumping/3.md
================================================
Reusing [an old StackOverflow answer of mine][answer]:

[answer]: https://stackoverflow.com/a/4296080/9457

Most languages have built-in functions to cover the common cases, but
"fencepost" loops are always a chore: loops where you want to do something on
each iteration and also do something else between iterations. For example,
joining strings with a separator:

```
String result = "";
for (int i = 0; i < items.Count; i++) {
  result += items[i];
  if (i < items.Count - 1) result += ", "; // This is gross.
  // What if I can't access items by index?
  // I have off-by-one errors *every* time I do this.
}
```

I know folds can cover this case, but sometimes you want something imperative.
It would be cool if you could do:

```
String result = "";
for (var item in items) {
  result += item;
} between {
  result += ", ";
}
```


================================================
FILE: note/answers/chapter24_calls/1.md
================================================
Since our interpreter is so small, the change is pretty straightforward. First,
we declare a local variable for the `ip` of the current CallFrame:

```c
static InterpretResult run() {
  CallFrame* frame = &vm.frames[vm.frameCount - 1];
  register uint8_t* ip = frame->ip; // <-- Add.
```

We replace the macros to read from that:

```c
#define READ_BYTE() (*ip++)
#define READ_SHORT() \
    (ip += 2, (uint16_t)((ip[-2] << 8) | ip[-1]))
```

Then the jump instructions write to it:

```c
      case OP_JUMP: {
        uint16_t offset = READ_SHORT();
        ip += offset;
        break;
      }

      case OP_JUMP_IF_FALSE: {
        uint16_t offset = READ_SHORT();
        if (isFalsey(peek(0))) ip += offset;
        break;
      }

      case OP_LOOP: {
        uint16_t offset = READ_SHORT();
        ip -= offset;
        break;
      }
```

Cache invalidation is the harder part. Before a call, we store the `ip` back
into the frame in case the call pushes a new frame. Then we load the `ip` of
the new frame once the call has pushed it:

```c
      case OP_CALL: {
        int argCount = READ_BYTE();
        frame->ip = ip; // <-- Add.
        if (!callValue(peek(argCount), argCount)) {
          return INTERPRET_RUNTIME_ERROR;
        }
        frame = &vm.frames[vm.frameCount - 1];
        ip = frame->ip; // <-- Add.
        break;
      }
```

Likewise, on a return, we need to reload the `ip` of the CallFrame we're
returning to:

```c
        frame = &vm.frames[vm.frameCount - 1];
        ip = frame->ip; // <-- Add.
        break;
```

The last place that `ip` is used is in `runtimeError()`. We need to ensure
every code path that calls `runtimeError()` from `run()` stores the `ip` first.
The runtime errors that are the result of bad calls are handled already, so it's
just the other instructions:

```c
#define BINARY_OP(valueType, op) \
    do { \
      if (!IS_NUMBER(peek(0)) || !IS_NUMBER(peek(1))) { \
        frame->ip = ip; // <-- Add.
        runtimeError("Operands must be numbers."); \
        return INTERPRET_RUNTIME_ERROR; \
      } \
      \
      double b = AS_NUMBER(pop()); \
      double a = AS_NUMBER(pop()); \
      push(valueType(a op b)); \
    } while (false)

// ...

      case OP_GET_GLOBAL: {
        ObjString* name = READ_STRING();
        Value value;
        if (!tableGet(&vm.globals, name, &value)) {
          frame->ip = ip; // <-- Add.
          runtimeError("Undefined variable '%s'.", name->chars);
          return INTERPRET_RUNTIME_ERROR;
        }
        push(value);
        break;
      }

// ...

      case OP_SET_GLOBAL: {
        ObjString* name = READ_STRING();
        if (tableSet(&vm.globals, name, peek(0))) {
          tableDelete(&vm.globals, name);
          frame->ip = ip; // <-- Add.
          runtimeError("Undefined variable '%s'.", name->chars);
          return INTERPRET_RUNTIME_ERROR;
        }
        break;
      }

// ...

      case OP_ADD: {
        if (IS_STRING(peek(0)) && IS_STRING(peek(1))) {
          concatenate();
        } else if (IS_NUMBER(peek(0)) && IS_NUMBER(peek(1))) {
          double b = AS_NUMBER(pop());
          double a = AS_NUMBER(pop());
          push(NUMBER_VAL(a + b));
        } else {
          frame->ip = ip; // <-- Add.
          runtimeError("Operands must be two numbers or two strings.");
          return INTERPRET_RUNTIME_ERROR;
        }
        break;
      }

// ...

      case OP_NEGATE:
        if (!IS_NUMBER(peek(0))) {
          frame->ip = ip; // <-- Add.
          runtimeError("Operand must be a number.");
          return INTERPRET_RUNTIME_ERROR;
        }

        push(NUMBER_VAL(-AS_NUMBER(pop())));
        break;
```

Note that in all of these cases, the code to store the `ip` is only executed
*after* we're sure a runtime error will occur. That avoids wasting cycles
storing it when not necessary.

On my machine, this reduce the execution time of a simple Fibonacci benchmark by
about 8.5%. That doesn't sound like a huge amount, but many language
implementers would be thrilled to find an optimization that juicy. If you run
the VM in a profiler, you'll see a good chunk of the execution time is spent
looking up `fib` in the global variable hash table, so speeding up calls is only
going to buy us so much.

I definitely think this is worth it.


================================================
FILE: note/answers/chapter24_calls/2.md
================================================
There are a few ways you can do this. The interesting part is that the native
C function needs to have sort of two signal paths to get data back to the VM:
it needs to be able to return a Value when successful, and it needs a separate
way to indicate a runtime error.

I think a clean way is to use the `args` array as both an input and output to
the native function. The function will read arguments from that and write the
result value to it when successful. Right now, `args` points to the first
argument. After a call completes, the return value is expected to be at the
slot just before that, which currently contains the function itself. So we'll
say that a native function is expected to store the return value in `args[-1]`.

Then the return value of the C function itself can be used to indicate success
or failure:

```c
typedef bool (*NativeFn)(int argCount, Value* args);
```

So the `clock()` native function becomes this:

```c
static bool clockNative(int argCount, Value* args) {
  args[-1] = NUMBER_VAL((double)clock() / CLOCKS_PER_SEC);
  return true;
}
```

If a native function does fail, it would be nice to print a runtime error, so
we'll let it store a string in `args[-1]` for an error message to print. Here's
one that always fails:

```c
static bool errNative(int argCount, Value* args) {
  args[-1] = OBJ_VAL(copyString("Error!", 6));
  return false;
}
```

The VM needs to handle this new calling convention. In `callValue()`, the new
code looks like this:

```c
      case OBJ_NATIVE: {
        NativeFn native = AS_NATIVE(callee);
        if (native(argCount, vm.stackTop - argCount)) {
          vm.stackTop -= argCount;
          return true;
        } else {
          runtimeError(AS_STRING(vm.stackTop[-argCount - 1])->chars);
          return false;
        }
      }
```

In some ways, the code is simpler. Instead of getting the return value from the
C function and pushing it onto the stack, this simply discards all but one of
the stack slots. Since the return value is already there at slot zero, that
leaves it right on top with no extra work.

But the `if` statement to see if the call succeeded is expensive. Inserting some
control flow on a critical path like this is always a performance hit. On my
laptop, this change makes the Fibonnaci benchmark about 25% slower, even though
no actual runtime errors ever occur.

That's the price you pay for a robust VM, I guess.


================================================
FILE: note/answers/chapter25_closures/1.md
================================================
One could spend a lot of time tweaking this and optimizing. Here's a simple
implementation. First, in the compiler we need to not emit `OP_CLOSURE` and the
subsequent operands if there are no upvalues. Instead, we just emit an
`OP_CONSTANT` to load the function like we did before we had closures.

```c
  // Create the function object.
  ObjFunction* function = endCompiler();
  // Remove 7 lines and add:
  uint8_t functionConstant = makeConstant(OBJ_VAL(function));
  if (function->upvalueCount > 0) {
    emitBytes(OP_CLOSURE, functionConstant);

    // Emit arguments for each upvalue to know whether to capture a local
    // or an upvalue.
    for (int i = 0; i < function->upvalueCount; i++) {
      emitByte(compiler.upvalues[i].isLocal ? 1 : 0);
      emitByte(compiler.upvalues[i].index);
    }
  } else {
    // No need to create a closure.
    emitBytes(OP_CONSTANT, functionConstant);
  }
  // End.
}
```

In the VM, we first need to change CallFrame. We can't rely on the current
function always being an ObjClosure:

```c
typedef struct {
  // Remove 1 line and add:
  Obj* function;
  // End.
  uint8_t* ip;
  Value* slots;
} CallFrame;
```

We store it as an `Obj*` since it may be either an ObjClosure or ObjFunction.
Since Obj contains the type type, we can use that at runtime to see which kind
of function we have.

Over in the implementation, add:

```c
static inline ObjFunction* getFrameFunction(CallFrame* frame) {
  if (frame->function->type == OBJ_FUNCTION) {
    return (ObjFunction*)frame->function;
  } else {
    return ((ObjClosure*)frame->function)->function;
  }
}
```

Accessing the underlying ObjFunction for a given CallFrame requires some
conditional logic. We need to do this in a couple of places, including macros,
so I wrapped it in a function that the compiler will hopefully inline for us.

In `runtimeError()`, replace:

```c
    ObjFunction* function = frame->closure->function;
```

With:

```c
    ObjFunction* function = getFrameFunction(frame);
```

In `callValue()`, we need to handle both kinds of callable objects. There are
a few ways to do this, but I split `call()` into two functions:

```c
      case OBJ_CLOSURE:
        return callClosure(AS_CLOSURE(callee), argCount);
      case OBJ_FUNCTION:
        return callFunction(AS_FUNCTION(callee), argCount);
```

Delete the old `call()` and replace it with:

```c
static bool call(Obj* callee, ObjFunction* function, int argCount) {
  if (argCount != function->arity) {
    runtimeError("Expected %d arguments but got %d.",
        function->arity, argCount);
    return false;
  }

  if (vm.frameCount == FRAMES_MAX) {
    runtimeError("Stack overflow.");
    return false;
  }

  CallFrame* frame = &vm.frames[vm.frameCount++];
  frame->function = (Obj*)callee;
  frame->ip = function->chunk.code;

  frame->slots = vm.stackTop - argCount - 1;
  return true;
}

static bool callClosure(ObjClosure* closure, int argCount) {
  return call((Obj*)closure, closure->function, argCount);
}

static bool callFunction(ObjFunction* function, int argCount) {
  return call((Obj*)function, function, argCount);
}
```

Most of the code is the same, but we have to jump through a few hoops to handle
the level of indirection in ObjClosure.

I did a little benchmarking. On our old fib program that doesn't use any
closures, this change makes it a few percent slower. Unsurprising because
there's a little more conditional logic when accessing the function from a
CallFrame. I was actually surprised there wasn't a bigger performance cost.

Then I made a little synthetic benchmark to stress closure creation:

```
for (var i = 0; i < 10; i = i + 1) {
  var start = clock();
  var sum = 0;
  for (var j = 0; j < 1000000; j = j + 1) {
    fun outer(a, b, c) {
      fun inner() {
        return a + b + c;
      }
      return inner;
    }

    var closure = outer(j, j, j);
    sum = sum + closure();
  }

  print sum;
  print clock() - start;
}
```

This program is obviously pathological. Real code rarely creates so many
functions and closures. But on this program, there was a significant improvement
with the new code. About 24% faster. I think most of this is because we don't
have to create a closure for each declaration of `outer()`.

Overall, I'm not sure if this optimization is worth it. I'd want to try it on
real-world code that uses closures in an idiomatic way.


================================================
FILE: note/answers/chapter25_closures/2.md
================================================
This took me quite a while to get working, even though the end result is pretty
simple. I wandered down a few dead ends before I picked the right path.

The basic idea is pretty simple:

1.  Right before compile the body of the loop, create a new scope with a local
    variable that shadows the loop variable. Initialize that variable with the
    loop variable's current value.

2.  Compile the loop body. This way, if a closure happens to reference the loop
    variable, it will resolve to that inner shadowed one.

3.  Store the current value of that inner variable back in the outer one it
    shadows. This is important so that any explicit modifications to the loop
    variable inside the body correctly affect the loop condition and increment
    clauses. Otherwise, this loop will never exit:

    ```lox
    for (var i = 0; i < 10; ) {
      i = i + 1;
    }
    ```

4.  After the body, end the scope where the inner variable is declared. If it
    got captured by the closure, this will close its upvalue and capture the
    current value of it.

Here's the entire resulting function, with comments marking the changes,
numbered by which point about the correspond to:

```c
static void forStatement() {
  beginScope();

  // 1: Grab the name and slot of the loop variable so we can refer to it later.
  int loopVariable = -1;
  Token loopVariableName;
  loopVariableName.start = NULL;
  // end.

  consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
  if (match(TOKEN_VAR)) {
    // 1: Grab the name of the loop variable.
    loopVariableName = parser.current;
    // end.
    varDeclaration();
    // 1: And get its slot.
    loopVariable = current->localCount - 1;
    // end.
  } else if (match(TOKEN_SEMICOLON)) {
    // No initializer.
  } else {
    expressionStatement();
  }

  int loopStart = currentChunk()->count;

  int exitJump = -1;
  if (!match(TOKEN_SEMICOLON)) {
    expression();
    consume(TOKEN_SEMICOLON, "Expect ';' after loop condition.");

    // Jump out of the loop if the condition is false.
    exitJump = emitJump(OP_JUMP_IF_FALSE);
    emitByte(OP_POP); // Condition.
  }

  if (!match(TOKEN_RIGHT_PAREN)) {
    int bodyJump = emitJump(OP_JUMP);

    int incrementStart = currentChunk()->count;
    expression();
    emitByte(OP_POP);
    consume(TOKEN_RIGHT_PAREN, "Expect ')' after for clauses.");

    emitLoop(loopStart);
    loopStart = incrementStart;
    patchJump(bodyJump);
  }

  // 1: If the loop declares a variable...
  int innerVariable = -1;
  if (loopVariable != -1) {
    // 1: Create a scope for the copy...
    beginScope();
    // 1: Define a new variable initialized with the current value of the loop
    //    variable.
    emitBytes(OP_GET_LOCAL, (uint8_t)loopVariable);
    addLocal(loopVariableName);
    markInitialized();
    // 1: Keep track of its slot.
    innerVariable = current->localCount - 1;
  }
  // end.

  statement();

  // 3: If the loop declares a variable...
  if (loopVariable != -1) {
    // 3: Store the inner variable back in the loop variable.
    emitBytes(OP_GET_LOCAL, (uint8_t)innerVariable);
    emitBytes(OP_SET_LOCAL, (uint8_t)loopVariable);
    emitByte(OP_POP);

    // 4: Close the temporary scope for the copy of the loop variable.
    endScope();
  }

  emitLoop(loopStart);

  if (exitJump != -1) {
    patchJump(exitJump);
    emitByte(OP_POP); // Condition.
  }

  endScope();
}
```

================================================
FILE: note/answers/chapter25_closures/3.lox
================================================
// Here is the classic message-based pattern:
fun vector(x, y) {
  fun object(message) {
    fun add(other) {
      return vector(x + other("x"), y + other("y"));
    }

    if (message == "x") return x;
    if (message == "y") return y;
    if (message == "add") return add;
    print "unknown message";
  }

  return object;
}

var a = vector(1, 2);
var b = vector(3, 4);
var c = a("add")(b);
print c("x");
print c("y");

// The constructor, "vector()" returns a closure that closes over the object's
// fields. In this case, it's the "x" and "y" parameters. The closure accepts a
// single argument which is the string name of the "method" to invoke on the
// object. It supports three methods:
//
// "x" returns the vector's X coordinate. Likewise "y". "add" returns a second
// function, which is the add method. That function in turn accepts an argument
// for the other vector to add to it.


================================================
FILE: note/answers/chapter26_garbage/1.md
================================================
On my 64-bit Mac laptop, it takes 16 bytes or 128 bits. That's quite a lot for a
pointer, a Boolean, and an enum with only eight cases (once we add the couple of
remaining ones for classes and instances).

In principle all we need is 64 bits for the pointer, 1 bit for the mark, and
3 bits for the type. And, in fact, most 64-bit operating systems don't give an
application a full 64 bits of address space. On x64 and ARM, a pointer will only
ever use 48 of those bits.

Also, the pointer in our Obj header points to another Obj whose first field is
also a pointer. When allocating memory for objects, the OS will align them to a
8-byte boundary. That implies the low three bits of the pointer will always be
zero and there's really only 45 meaningful bits of pointer data.

Thus, the minimum we really need is 49 bits: 45 for the pointer, 1 for the mark
bit, and 3 for the type enum. Because of alignment reasons, we won't be able to
get that all the way down, so we'll round it to 64 bits. If we leave the
pointer bits where they normal are in there, that leaves two empty bytes at the
top and a few empty bits at the bottom.

We'll store the type enum in the highest byte, the mark bit in the next byte,
and the next pointer in the remaining bits, like this:

```
00000000 00000000 01111111 11010110 01001111 01010000 00000000 01100000
Bit position:
66665555 55555544 44444444 33333333 33222222 22221111 11111100 00000000
32109876 54321098 76543210 98765432 10987654 32109876 54321098 76543210

Bits needed for pointer:
........ ........ |------- -------- -------- ------- --------- ----|...

Packing everything in:
.....TTT .......M NNNNNNNN NNNNNNNN NNNNNNNN NNNNNNNN NNNNNNNN NNNNNNNN

T = type enum, M = mark bit, N = next pointer.
```

To implement this, we'll replace the old fields in Obj with a single 64-bit int:

```c
struct sObj {
  uint64_t header;
};
```

Because the values are all bit-packed together, simple field access no longer
works. Instead, we'll write accessor functions to pull the right bits out and
convert them back to their desired representation:

```c
static inline ObjType objType(Obj* object) {
  return (ObjType)((object->header >> 56) & 0xff);
}

static inline bool isMarked(Obj* object) {
  return (bool)((object->header >> 48) & 0x01);
}

static inline Obj* objNext(Obj* object) {
  return (Obj*)(object->header & 0x0000ffffffffffff);
}
```

They're fairly straightforward. Each uses a bitwise and with a constant to mask
off and clear the bits for the *other* fields, then shifts the remaining bits
down to where they belong for the desired type. For the next pointer, we don't
need to shift anything.

Setting the fields a little more complex:

```c
static inline void setIsMarked(Obj* object, bool isMarked) {
  object->header = (object->header & 0xff00ffffffffffff) |
      ((uint64_t)isMarked << 48);
}

static inline void setObjNext(Obj* object, Obj* next) {
  object->header = (object->header & 0xffff000000000000) |
      (uint64_t)next;
}
```

We need to clear out the old value of the field and store the updated bits. But
we also need to preserve the bits for the *other* fields. So this time we mask
and clear only the bits are updating and preserve the rest.

When an object is first created, the mark bit is clear, and we have a type and
next pointer, so we initialized it like:

```c
object->header = (unsigned long)vm.objects | (unsigned long)type << 56;
```

All that remains is to replace every use of the old fields in the VM with calls
to the above utility functions. That's mechanical so I won't write them all out
here. The end result is that we've cut the size of the object header in half.

There is some runtime expense when accessing fields now because of the masking
and shifting. The next pointer and mark bits are only used during GC, so that's
likely not a large impact. Accessing the object's type is potentially more of an
issue since that happens frequently during runtime. One option we could take is
to store the type bits down in the least significant bits and shift the next
pointer up. That would let us access the type just by bitmasking without needing
a shift.


================================================
FILE: note/answers/chapter26_garbage/2.md
================================================
The basic idea is that instead of clearing the mark bit of every live object,
we simply redefine their current value to mean "not marked". In other words,
instead of "true" always meaning "marked", after each cycle, we toggle which
Boolean value represents the marked state. Since every live object will have
the previous version's mark value, toggling the definition of marked instantly
sets them all to unmarked.

The implementation is fairly straightforward. In the VM struct, we add a new
field to store the Boolean value that currently means "marked":

```c
  bool markValue;
```

In `initVM()`, we initialize that to some value (it doesn't matter which):

```c
  vm.markValue = true;
```

Over in `sObj`, we rename the mark field from `isMarked` to `mark` to make it
clearer that `true` doesn't necessarily mean it's marked:

```
  bool mark;
```

Then we go through all of the code that uses `isMarked` and update it to the
new semantics:


```diff
 static Obj* allocateObject(size_t size, ObjType type) {
   Obj* object = (Obj*)reallocate(NULL, 0, size);
   object->type = type;
-  object->isMarked = false;
+  object->mark = !vm.markValue;

   object->next = vm.objects;
   vm.objects = object;
```

A new object starts off unmarked, so we initialize `mark` to the opposite of
the value that means "marked".

```diff
 void markObject(Obj* object) {
   if (object == NULL) return;
-  if (object->isMarked) return;
+  if (object->mark == vm.markValue) return;
```

To see if an object is marked, we compare its mark value to the VM's. If they
are the same, the object is marked.

Otherwise, we mark it like so:

```diff
-  object->isMarked = true;
+  object->mark = vm.markValue;
```

When removing the weak references from the string table, we also check the mark
bit:

```diff
 void tableRemoveWhite(Table* table) {
   for (int i = 0; i < table->capacity; i++) {
     Entry* entry = &table->entries[i];
-    if (entry->key != NULL && !entry->key->obj.isMarked) {
+    if (entry->key != NULL && entry->key->obj.mark != vm.markValue) {
       tableDelete(table, entry->key);
     }
   }
```

Over in `sweep()`, we compare against the VM's mark value to check each object's
mark state:

```diff
   Obj* previous = NULL;
   Obj* object = vm.objects;
   while (object != NULL) {
-    if (object->isMarked) {
-      object->isMarked = false;
+    if (object->mark == vm.markValue) {
       previous = object;
       object = object->next;
     } else {
```

The whole point of this change is that other removed line. We no longer need to
clear the mark bit on each live object.

Finally, when `collectGarbage()` completes, we flip which value means "marked":

```diff
   sweep();

   vm.nextGC = vm.bytesAllocated * GC_HEAP_GROW_FACTOR;
-
+  vm.markValue = !vm.markValue;
```

This way, every object's current mark value now means "unmarked". OK, so what's
the performance gain here? On my laptop, with one little microbenchmark...
almost none. It was slightly faster, but small enough to be within the noise.
Does that mean this is a bad technique? It's hard to say. It might make a bigger
difference on other benchmarks or other machines.


================================================
FILE: note/answers/chapter27_classes/1.md
================================================
In Ruby, if you access an instance variable that you never defined, you silently
get `nil` in return. It's as if the object has all fields and they are
implicitly initialized to `nil` for you.

If you want to explicitly check to see if an instance variable is defined, you
can call a special `instance_variable_defined?()` method on the object, passing
in the name of the instance variable as a string or symbol:

```ruby
some_object.instance_variable_defined?("field_name")
```

JavaScript works somewhat like Ruby. If you access a property on an object that
was never set, you get an implicit sentinel value back. To make things more
confusing, JavaScript has *two* special "absent" values: `null` and `undefined`.
When you access an undefined field, you get `undefined` back. You can think of
`null` as the "application-level" absent value that users can define to mean
what they want in their program. `undefined` is more like a "system-level"
absent value that gets returned from some built-in language semantics like
accessing an undefined field.

To tell if a property is present on the object, you can call `hasOwnProperty()`
on it, passing in the name of the property as a string.

Python takes a stricter approach. Accessing a non-existent object attribute
throws an exception. You can catch this if you want to handle the absent field
directly. To determine whether a field exists *before* an exception gets thrown,
you can a special top-level function `hasattr()`, passing in the object in
question and the name of the attribute.

In statically-typed languages, of course, it is a compile-time error to access
a field on defined for an object.

In other words, there are basically two dynamic approaches to handling accessing
undefined fields:

1. Return a special sentinel value like `nil`.
2. Produce a runtime error.

For Lox, the former feels too loose to me. Lox is generally stricter around
things like missing function arguments, and I think it should be strict here
too. At the same time, Lox lacks exceptions or a way for user to handle runtime
errors so we need to take that into account.

If users have a way to *detect* an absent field before trying to access it,
then it's fine for the language to abort on undefined field access -- users can
avoid that by checking beforehand. So I think that's the approach I'd take for
Lox.

We'll add a global `hasField()` native function that takes an instance and a
field name and returns `true` if the field is defined on the instance. Here is
an implementation:

```c
static Value hasFieldNative(int argCount, Value* args) {
  if (argCount != 2) return FALSE_VAL;
  if (!IS_INSTANCE(args[0])) return FALSE_VAL;
  if (!IS_STRING(args[1])) return FALSE_VAL;

  ObjInstance* instance = AS_INSTANCE(args[0]);
  Value dummy;
  return BOOL_VAL(tableGet(&instance->fields, AS_STRING(args[1]), &dummy));
}
```

The error-checking at the top is lame. Right now, the VM doesn't support
native functions producing runtime errors, so it just returns `false` if you
pass invalid arguments. Ideally, those would be runtime errors.

We define it when the VM starts up by adding this to `initVM()`:

```c
  defineNative("hasField", hasFieldNative);
```


================================================
FILE: note/answers/chapter27_classes/2.md
================================================
I am actually iffy on whether a language should allow this, or at least whether
it should make accessing fields using imperatively-built strings should be
*easy*.

That's really something like a metaprogramming feature. Users are writing code
that builds almost a tiny piece of "code" -- a field name -- and then executing
that. Metaprogramming is useful, but I think it should be clear to users when
they are doing it.

JavaScript tried to merge instances and data structures into a single "object"
concept and the result was a mess. People would try to use normal JavaScript
objects as hash tables, which JS encourages by putting a `[]` operator right on
objects that let you pass in string for field names. Then they would get very
surprised when their "hash table" happened to contain "keys" like `toString`.

I think it's better to keep objects and data structures stratified, and likewise
to keep regular programming and metaprogramming clearly distinguished. That
said, I do think it's useful to offer metaprogramming.

A simple way to offer the functionality but make users go out of their way to
get it is by using a top-level function instead of hanging some kind of operator
syntax right off the instance. (An even more explicit approach is to put those
functions in a separate "reflection" module users have to import, but Lox
doesn't have any modularity story.)

So let's add two new functions `getField()` and `setField()`. The first takes
an instance and a field name string. The second takes those plus a value to
store.

They are implemented like so:

```c
static Value getFieldNative(int argCount, Value* args) {
  if (argCount != 2) return FALSE_VAL;
  if (!IS_INSTANCE(args[0])) return FALSE_VAL;
  if (!IS_STRING(args[1])) return FALSE_VAL;

  ObjInstance* instance = AS_INSTANCE(args[0]);
  Value value;
  tableGet(&instance->fields, AS_STRING(args[1]), &value);
  return value;
}

static Value setFieldNative(int argCount, Value* args) {
  if (argCount != 3) return FALSE_VAL;
  if (!IS_INSTANCE(args[0])) return FALSE_VAL;
  if (!IS_STRING(args[1])) return FALSE_VAL;

  ObjInstance* instance = AS_INSTANCE(args[0]);
  tableSet(&instance->fields, AS_STRING(args[1]), args[2]);
  return args[2];
}
```

Like I said in answer #1, the error-handling in these is lame. Ideally, they
would abort with a runtime error if the arguments were incorrect.

Likewise, calling `getField()` when the instance doesn't have that field should
be a runtime error, but here is just returns `nil`.

These get declared as top level functions by adding this to `initVM()`:

```c
  defineNative("getField", getFieldNative);
  defineNative("setField", setFieldNative);
```


================================================
FILE: note/answers/chapter27_classes/3.md
================================================
Ruby provides a private method, `remove_instance_variable` that an object can
call on itself passing in the name of the instance vairable to delete. Ruby is
interesting in that it has the model that accessing an undefined instance
variable returns `nil`. But it it still makes a distinction between a deleted
instance variable and an instance variable whose value has been set to `nil`.
If you use `defined?` to tell if the instance variable exists, one whose value
is `nil` does exist, while a deleted one does not.

Lua has, I think, a more consistent model. Accessing a non-existent table key
-- Lua's rough analogue to fields -- returns `nil`. And there is no special way
to delete a table key. You just set its value to `nil`.

Python does not treat absent attributes as equivalent to `None`. Accessing an
attribute that does not exist throws an exception. To remove an attribute, you
can use the built `del` statement:

```python
del obj.some_attribute
```

In my answer for #1, I felt Lox should go with a stricter approach like Python.
That suggests we shouldn't use setting a field to `nil` to delete it. Instead,
following the previous two answers, we'll add another top level native function:

```c
static Value deleteFieldNative(int argCount, Value* args) {
  if (argCount != 2) return NIL_VAL;
  if (!IS_INSTANCE(args[0])) return NIL_VAL;
  if (!IS_STRING(args[1])) return NIL_VAL;

  ObjInstance* instance = AS_INSTANCE(args[0]);
  tableDelete(&instance->fields, AS_STRING(args[1]));
  return NIL_VAL;
}
```

And wire it up in `initVM()`:

```c
  defineNative("deleteField", deleteFieldNative);
```

Honestly, I don't think this is a great user experience. Lox makes it very
easy and natural to add a field, so it's weird to have to call a native function
and pass in the field as a string in order to remove one.

If I were making a full language, I would consider some built-in syntax for
removing a field. On the other hand, removing a field is a pretty strange, rare
operation. In most object-oriented programs, the set of fields an object has is
essentially fixed, even in dynamically-typed ones.


================================================
FILE: note/answers/chapter27_classes/4.md
================================================
I'll just point you to a resource. Look for the paper "An Efficient
Implementation of Self, a Dynamically-Typed Object-Oriented Language Based on
Prototypes".


================================================
FILE: note/answers/chapter28_methods/1.md
================================================
An easy optimization is to cache the initializer directly in the ObjClass to
avoid the hash table lookup:

```c
typedef struct ObjClass {
  Obj obj;
  ObjString* name;
  Value initializer; // <--
  Table methods;
} ObjClass;
```

It starts out nil:

```c
ObjClass* newClass(ObjString* name) {
  ObjClass* klass = ALLOCATE_OBJ(ObjClass, OBJ_CLASS);
  klass->name = name;
  klass->initializer = NIL_VAL; // <--
  initTable(&klass->methods);
  return klass;
}
```

When a method is defined, if it's the initializer, then we also store it in
that field:

```c
static void defineMethod(ObjString* name) {
  Value method = peek(0);
  ObjClass* klass = AS_CLASS(peek(1));
  tableSet(&klass->methods, name, method);
  if (name == vm.initString) klass->initializer = method; // <--
  pop();
}
```

Then in `callValue()` we use that instead of looking for the initializer in the
method table:

```c
      case OBJ_CLASS: {
        ObjClass* klass = AS_CLASS(callee);
        vm.stackTop[-argCount - 1] = OBJ_VAL(newInstance(klass));
        if (!IS_NIL(klass->initializer)) {                       // <--
          return call(AS_CLOSURE(klass->initializer), argCount); // <--
        } else if (argCount != 0) {
          runtimeError("Expected 0 arguments but got %d.", argCount);
          return false;
        }
```

It's a reasonable little optimization. On my machine, it doesn't really affect
perf in a noticeable way. Even in a benchmark that stresses creating instances,
it's only a marginal improvement. That's because the heap allocation and GC of
the instances dominates the runtime.

However if we had a more sophisticated implementation with its own faster
memory allocator, then that might go down. At that point, looking up the
initializer could be a larger piece of the time to instantiate and object and
might be more important to speed up.


================================================
FILE: note/answers/chapter28_methods/2.md
================================================
The answer here is "inline caching". At each callsite, the VM inserts a little
space to store a cached reference to a class and a method. When the callsite is
first reached, the VM looks up the class of the receiver and then looks up the
method on that class. It stores that class and method in the cache next to that
callsite and then invokes the method as normal.

The next time that callsite executes, the VM checks to see if the receiver has
the same class as the cached one. If so, it knows the same method will be
resolved so it uses the cached method directly instead of looking it up again.


================================================
FILE: note/answers/chapter28_methods/3.md
================================================
I'm actually *not* a fan of this choice, though it is certainly a common one.
I like how Ruby uses a leading `@` to distinguish instance fields from methods
and getters on the object. In my own language Wren, I use a leading underscore
to similar effect.

This means that methods and fields never shadow one another since they are
textually distinct. With my language Wren, it also means that we can tell the
set of fields a class uses just by parsing the class body. We can thus avoid
the need for a hash table to store the instance's state. Instead, an instance
has a single inline array of fields. Field access is a simple array lookup with
an index determined at compile time. It is *much* faster than Lox.

But, for the book, I felt it made sense to stick with a more traditional
language choice. JavaScript, Python, Lua, and many other dynamically typed
languages all treat objects as hash tables under the hood, so I felt it was
worth showing how those languages work.


================================================
FILE: note/answers/chapter29_superclasses/1.md
================================================
I created a hobby language named Wren. The clox VM was actually based on Wren's
implementation, and the design of Lox borrows a lot from Wren. Lox is sort of
simplified slightly-less-weird Wren.

In Wren, fields have a leading underscore in their name. This solves the problem
in the previous chapter of fields shadowing methods, and it also helps address
this problem. Because the compiler can syntactically identify a field access,
and it knows the surrounding class, it effectively "renames" each field based
on the surrounding class.

So in a program like this (using more Lox-like syntax):

```
class A {
  init() {
    _field = "a field";
  }
}

class B < A {
  init() {
    super.init();
    _field = "b field";
  }
}
```

There is no collision here because the compiler treats `_field` inside methods
of class A as having a distinct name from `_field` inside class B. The main
downside is that fields become "private" instead of "protected". There's no way
for a subclass to directly access a field defined by a superclass, even on the
same instance.

I think that's a worthwhile trade-off.


================================================
FILE: note/answers/chapter29_superclasses/2.md
================================================
I can think of a few approaches:

## 1. Eagerly rebuild the subclass method tables

We could keep doing copy-down inheritance like we do here. But also give each
superclass references to the set of subclasses that inherit from it. When a
superclass's method table is modified, it walks the subclasses and also updates
or rebuilds their now-invalidated method tables. That sounds slow, and it would
be. However, meta-programming like this usually happens only a couple of times
near the beginning of the program's execution and then stops. It's unusual for a
class's set of methods to change frequently during a program's run or inside a
hot loop. So this likely doesn't need to be fast.

## 2. Lazily rebuild the subclass method tables

The downside of 1 is that the superclass needs to maintain a list of every
subclass. Every single time a method is touched, the entire tree of subclasses
must be updated. If it's common to change a number of methods in succession,
that's a lot of work. Also, maintaining the list of references from superclass
to subclass makes GC harder (they'll need to be weak references if you want to
be able ever collect subclasses) and makes classes heavier-weight.

Another option is to have the subclass lazily rebuild its method table when it
sees the superclass has changed. In each class, we add a "version" integer
field. It starts out at zero and increments any time the class's set of methods
is modified. (In principle, this could overflow, but that's pretty unlikely.)

We also add an integer field to each class to track the version of its
*superclass*. This stores the version that the superclass was at when this
subclass inherited its methods.

When a subclass is declared, it copies the methods from its superclass, and
also records the superclass's current version number in its superclass version
field.

Whenever a class's method set changes after the declaration executes, we also
increment its version. If a subclass's own superclass version field is ever out
of sync with the version field on its actual superclass, then we know the
superclass has changed since the last time its methods were copied down.

When do we check that? The only real natural point in time is right before a
method call. Adding overhead to each method call is a drag, but it's a fairly
simple check between two numbers. If the two versions are out of sync, we
rebuild the subclass's method table and then re-sync the version numbers.

## 3. Lean on inline caching

This is probably the best approach (though I wouldn't put money on it). If the
VM already does some form of inline caching, then method lookup doesn't need to
be that. For a given callsite, you'll only do the lookup once and then rely on
the fast inline cache for most calls.

So in this case, we could keep something like jlox's slow approach where methods
are resolved by dynamically walking the inheritance chain. Then once we find the
method, we store it in the inline cache, and after that it's as fast as we could
want.

The only missing piece is handling the fact that the cache can now become
invalidated. If a class's method set cannot change, then the only way an inline
cache can become stale is if the class of the receiver changes. Now an inline
cache on the same receiver can become stale if a method changes and a lookup
would now produce a different method.

Inline caches usually track the receiver's class by having some kind of numeric
ID for each class. Each class stores its ID and in the inline cache, we store
the ID of the receiver's class that the method was called one. If those match,
the cache is valid.

We might be able to extend that by having a method change to a class change its
ID. It is as if metaprogramming a class produces a new class with a different
ID. Since the inline cache only stores the ID of the leaf-most class of the
receiver, we also have to ensure that metaprogramming a *superclass* also
affects the ID of the subclasses. So we'd want to do something like the approach
in 1 where changing a superclass means we traverse the tree of subclasses and
update their IDs too.

There are probably better solutions, but these are the first few that came to
mind.


================================================
FILE: note/answers/chapter29_superclasses/3.diff
================================================
diff --git a/c/chunk.h b/c/chunk.h
index 3fe9250..b035513 100644
--- a/c/chunk.h
+++ b/c/chunk.h
@@ -19,7 +19,6 @@ typedef enum {
   OP_SET_UPVALUE,
   OP_GET_PROPERTY,
   OP_SET_PROPERTY,
-  OP_GET_SUPER,
   OP_EQUAL,
   OP_GREATER,
   OP_LESS,
@@ -35,7 +34,7 @@ typedef enum {
   OP_LOOP,
   OP_CALL,
   OP_INVOKE,
-  OP_SUPER_INVOKE,
+  OP_INNER,
   OP_CLOSURE,
   OP_CLOSE_UPVALUE,
   OP_RETURN,
diff --git a/c/compiler.c b/c/compiler.c
index 78ce52d..125bf8c 100644
--- a/c/compiler.c
+++ b/c/compiler.c
@@ -69,7 +69,9 @@ typedef struct Compiler {
 
 typedef struct ClassCompiler {
   struct ClassCompiler* enclosing;
+  uint16_t id;
   Token name;
+  Token methodName;
   bool hasSuperclass;
 } ClassCompiler;
 
@@ -484,27 +486,27 @@ static Token syntheticToken(const char* text) {
   token.length = (int)strlen(text);
   return token;
 }
-static void super_(bool canAssign) {
+static void inner(bool canAssign) {
   if (currentClass == NULL) {
-    error("Cannot use 'super' outside of a class.");
-  } else if (!currentClass->hasSuperclass) {
-    error("Cannot use 'super' in a class with no superclass.");
+    error("Cannot use 'inner' outside of a class.");
   }
 
-  consume(TOKEN_DOT, "Expect '.' after 'super'.");
-  consume(TOKEN_IDENTIFIER, "Expect superclass method name.");
-  uint8_t name = identifierConstant(&parser.previous);
-
   namedVariable(syntheticToken("this"), false);
-  if (match(TOKEN_LEFT_PAREN)) {
-    uint8_t argCount = argumentList();
-    namedVariable(syntheticToken("super"), false);
-    emitBytes(OP_SUPER_INVOKE, name);
-    emitByte(argCount);
-  } else {
-    namedVariable(syntheticToken("super"), false);
-    emitBytes(OP_GET_SUPER, name);
+  consume(TOKEN_LEFT_PAREN, "Expect argument list after 'inner'.");
+  uint8_t argCount = argumentList();
+  
+  uint8_t constant = 0;
+  if (currentClass != NULL) {
+    char name[256];
+    sprintf(name, "%.*s@%x",
+        currentClass->methodName.length,
+        currentClass->methodName.start,
+        currentClass->id);
+    constant = makeConstant(OBJ_VAL(copyString(name, (int)strlen(name))));
   }
+
+  emitBytes(OP_INNER, constant);
+  emitByte(argCount);
 }
 static void this_(bool canAssign) {
   if (currentClass == NULL) {
@@ -561,7 +563,7 @@ ParseRule rules[] = {
   { NULL,     or_,     PREC_OR },         // TOKEN_OR
   { NULL,     NULL,    PREC_NONE },       // TOKEN_PRINT
   { NULL,     NULL,    PREC_NONE },       // TOKEN_RETURN
-  { super_,   NULL,    PREC_NONE },       // TOKEN_SUPER
+  { inner,    NULL,    PREC_NONE },       // TOKEN_INNER
   { this_,    NULL,    PREC_NONE },       // TOKEN_THIS
   { literal,  NULL,    PREC_NONE },       // TOKEN_TRUE
   { NULL,     NULL,    PREC_NONE },       // TOKEN_VAR
@@ -638,6 +640,7 @@ static void function(FunctionType type) {
 }
 static void method() {
   consume(TOKEN_IDENTIFIER, "Expect method name.");
+  currentClass->methodName = parser.previous;
   uint8_t constant = identifierConstant(&parser.previous);
 
   FunctionType type = TYPE_METHOD;
@@ -656,12 +659,17 @@ static void classDeclaration() {
   declareVariable();
 
   emitBytes(OP_CLASS, nameConstant);
+  uint16_t id = vm.nextClassID++;
+  emitByte((id >> 8) & 0xff);
+  emitByte(id & 0xff);
+  
   defineVariable(nameConstant);
 
   ClassCompiler classCompiler;
   classCompiler.name = parser.previous;
   classCompiler.hasSuperclass = false;
   classCompiler.enclosing = currentClass;
+  classCompiler.id = id;
   currentClass = &classCompiler;
 
   if (match(TOKEN_LESS)) {
@@ -672,10 +680,6 @@ static void classDeclaration() {
       error("A class cannot inherit from itself.");
     }
 
-    beginScope();
-    addLocal(syntheticToken("super"));
-    defineVariable(0);
-
     namedVariable(className, false);
     emitByte(OP_INHERIT);
     classCompiler.hasSuperclass = true;
@@ -689,10 +693,6 @@ static void classDeclaration() {
   consume(TOKEN_RIGHT_BRACE, "Expect '}' after class body.");
   emitByte(OP_POP);
 
-  if (classCompiler.hasSuperclass) {
-    endScope();
-  }
-
   currentClass = currentClass->enclosing;
 }
 static void funDeclaration() {
diff --git a/c/debug.c b/c/debug.c
index ce23cbc..321a133 100644
--- a/c/debug.c
+++ b/c/debug.c
@@ -82,8 +82,6 @@ int disassembleInstruction(Chunk* chunk, int offset) {
       return constantInstruction("OP_GET_PROPERTY", chunk, offset);
     case OP_SET_PROPERTY:
       return constantInstruction("OP_SET_PROPERTY", chunk, offset);
-    case OP_GET_SUPER:
-      return constantInstruction("OP_GET_SUPER", chunk, offset);
     case OP_EQUAL:
       return simpleInstruction("OP_EQUAL", offset);
     case OP_GREATER:
@@ -114,8 +112,8 @@ int disassembleInstruction(Chunk* chunk, int offset) {
       return byteInstruction("OP_CALL", chunk, offset);
     case OP_INVOKE:
       return invokeInstruction("OP_INVOKE", chunk, offset);
-    case OP_SUPER_INVOKE:
-      return invokeInstruction("OP_SUPER_INVOKE", chunk, offset);
+    case OP_INNER:
+      return invokeInstruction("OP_INNER", chunk, offset);
     case OP_CLOSURE: {
       offset++;
       uint8_t constant = chunk->code[offset++];
diff --git a/c/object.c b/c/object.c
index 4ba65f0..976fb4a 100644
--- a/c/object.c
+++ b/c/object.c
@@ -31,9 +31,10 @@ ObjBoundMethod* newBoundMethod(Value receiver, ObjClosure* method) {
   bound->method = method;
   return bound;
 }
-ObjClass* newClass(ObjString* name) {
+ObjClass* newClass(ObjString* name, uint16_t id) {
   ObjClass* klass = ALLOCATE_OBJ(ObjClass, OBJ_CLASS);
   klass->name = name; // [klass]
+  klass->id = id;
   initTable(&klass->methods);
   return klass;
 }
@@ -47,6 +48,7 @@ ObjClosure* newClosure(ObjFunction* function) {
   closure->function = function;
   closure->upvalues = upvalues;
   closure->upvalueCount = function->upvalueCount;
+  closure->classID = 0xffff;
   return closure;
 }
 ObjFunction* newFunction() {
diff --git a/c/object.h b/c/object.h
index dddcfe1..c560468 100644
--- a/c/object.h
+++ b/c/object.h
@@ -74,11 +74,14 @@ typedef struct {
   ObjFunction* function;
   ObjUpvalue** upvalues;
   int upvalueCount;
+  // If this closure is a method, the ID of the class that declares it.
+  uint16_t classID;
 } ObjClosure;
 
 typedef struct sObjClass {
   Obj obj;
   ObjString* name;
+  uint16_t id;
   Table methods;
 } ObjClass;
 
@@ -95,7 +98,7 @@ typedef struct {
 } ObjBoundMethod;
 
 ObjBoundMethod* newBoundMethod(Value receiver, ObjClosure* method);
-ObjClass* newClass(ObjString* name);
+ObjClass* newClass(ObjString* name, uint16_t id);
 ObjClosure* newClosure(ObjFunction* function);
 ObjFunction* newFunction();
 ObjInstance* newInstance(ObjClass* klass);
diff --git a/c/scanner.c b/c/scanner.c
index a577951..69b14ee 100644
--- a/c/scanner.c
+++ b/c/scanner.c
@@ -116,12 +116,18 @@ static TokenType identifierType()
         }
       }
       break;
-    case 'i': return checkKeyword(1, 1, "f", TOKEN_IF);
+    case 'i':
+      if (scanner.current - scanner.start > 1) {
+        switch (scanner.start[1]) {
+          case 'f': return TOKEN_IF;
+          case 'n': return checkKeyword(2, 3, "ner", TOKEN_INNER);
+        }
+      }
+      break;
     case 'n': return checkKeyword(1, 2, "il", TOKEN_NIL);
     case 'o': return checkKeyword(1, 1, "r", TOKEN_OR);
     case 'p': return checkKeyword(1, 4, "rint", TOKEN_PRINT);
     case 'r': return checkKeyword(1, 5, "eturn", TOKEN_RETURN);
-    case 's': return checkKeyword(1, 4, "uper", TOKEN_SUPER);
     case 't':
       if (scanner.current - scanner.start > 1) {
         switch (scanner.start[1]) {
diff --git a/c/scanner.h b/c/scanner.h
index 089c7c3..a8f82ec 100644
--- a/c/scanner.h
+++ b/c/scanner.h
@@ -20,7 +20,7 @@ typedef enum {
   // Keywords.
   TOKEN_AND, TOKEN_CLASS, TOKEN_ELSE, TOKEN_FALSE,
   TOKEN_FOR, TOKEN_FUN, TOKEN_IF, TOKEN_NIL, TOKEN_OR,
-  TOKEN_PRINT, TOKEN_RETURN, TOKEN_SUPER, TOKEN_THIS,
+  TOKEN_PRINT, TOKEN_RETURN, TOKEN_INNER, TOKEN_THIS,
   TOKEN_TRUE, TOKEN_VAR, TOKEN_WHILE,
 
   TOKEN_ERROR,
diff --git a/c/vm.c b/c/vm.c
index 626f8c2..e2431c6 100644
--- a/c/vm.c
+++ b/c/vm.c
@@ -60,6 +60,8 @@ void initVM() {
   vm.grayCount = 0;
   vm.grayCapacity = 0;
   vm.grayStack = NULL;
+  
+  vm.nextClassID = 0;
 
   initTable(&vm.globals);
   initTable(&vm.strings);
@@ -175,6 +177,20 @@ static bool invoke(ObjString* name, int argCount) {
 
   return invokeFromClass(instance->klass, name, argCount);
 }
+static bool invokeInner(ObjString* name, int argCount) {
+  Value receiver = peek(argCount);
+  ObjInstance* instance = AS_INSTANCE(receiver);
+
+  Value method;
+  if (!tableGet(&instance->klass->methods, name, &method)) {
+    // No inner method, so discard args and return nil.
+    vm.stackTop -= argCount + 1;
+    push(NIL_VAL);
+    return true;
+  }
+
+  return call(AS_CLOSURE(method), argCount);
+}
 static bool bindMethod(ObjClass* klass, ObjString* name) {
   Value method;
   if (!tableGet(&klass->methods, name, &method)) {
@@ -221,6 +237,18 @@ static void closeUpvalues(Value* last) {
 static void defineMethod(ObjString* name) {
   Value method = peek(0);
   ObjClass* klass = AS_CLASS(peek(1));
+  
+  AS_CLOSURE(method)->classID = klass->id;
+  
+  ObjString* originalName = name;
+  Value existing;
+  while (tableGet(&klass->methods, name, &existing)) {
+    ObjClosure* existingClosure = AS_CLOSURE(existing);
+    char newNameChars[256];
+    sprintf(newNameChars, "%s@%x", originalName->chars, existingClosure->classID);
+    name = copyString(newNameChars, (int)strlen(newNameChars));
+  }
+  
   tableSet(&klass->methods, name, method);
   pop();
 }
@@ -378,15 +406,6 @@ static InterpretResult run() {
         break;
       }
 
-      case OP_GET_SUPER: {
-        ObjString* name = READ_STRING();
-        ObjClass* superclass = AS_CLASS(pop());
-        if (!bindMethod(superclass, name)) {
-          return INTERPRET_RUNTIME_ERROR;
-        }
-        break;
-      }
-
       case OP_EQUAL: {
         Value b = pop();
         Value a = pop();
@@ -466,12 +485,11 @@ static InterpretResult run() {
         frame = &vm.frames[vm.frameCount - 1];
         break;
       }
-
-      case OP_SUPER_INVOKE: {
+      
+      case OP_INNER: {
         ObjString* method = READ_STRING();
         int argCount = READ_BYTE();
-        ObjClass* superclass = AS_CLASS(pop());
-        if (!invokeFromClass(superclass, method, argCount)) {
+        if (!invokeInner(method, argCount)) {
           return INTERPRET_RUNTIME_ERROR;
         }
         frame = &vm.frames[vm.frameCount - 1];
@@ -517,9 +535,12 @@ static InterpretResult run() {
         break;
       }
 
-      case OP_CLASS:
-        push(OBJ_VAL(newClass(READ_STRING())));
+      case OP_CLASS: {
+        ObjString* name = READ_STRING();
+        uint16_t id = READ_SHORT();
+        push(OBJ_VAL(newClass(name, id)));
         break;
+      }
 
       case OP_INHERIT: {
         Value superclass = peek(1);
diff --git a/c/vm.h b/c/vm.h
index 9ce5805..56c48a4 100644
--- a/c/vm.h
+++ b/c/vm.h
@@ -32,6 +32,8 @@ typedef struct {
   int grayCount;
   int grayCapacity;
   Obj** grayStack;
+  
+  uint16_t nextClassID;
 } VM;
 
 typedef enum {
diff --git a/test/inner/arguments.lox b/test/inner/arguments.lox
new file mode 100644
index 0000000..b8032b4
--- /dev/null
+++ b/test/inner/arguments.lox
@@ -0,0 +1,16 @@
+class A {
+  method(a, b) {
+    print "A.method " + a + " " + b;
+    inner(b, a);
+  }
+}
+
+class B < A {
+  method(a, b) {
+    print "B.method " + a + " " + b;
+  }
+}
+
+B().method("first", "second");
+// expect: A.method first second
+// expect: B.method second first
diff --git a/test/inner/inner_at_top_level.lox b/test/inner/inner_at_top_level.lox
new file mode 100644
index 0000000..9d3756f
--- /dev/null
+++ b/test/inner/inner_at_top_level.lox
@@ -0,0 +1 @@
+inner("bar"); // Error at 'inner': Cannot use 'inner' outside of a class.
diff --git a/test/inner/inner_in_top_level_function.lox b/test/inner/inner_in_top_level_function.lox
new file mode 100644
index 0000000..5201e64
--- /dev/null
+++ b/test/inner/inner_in_top_level_function.lox
@@ -0,0 +1,3 @@
+fun foo() {
+  inner("arg"); // Error at 'inner': Cannot use 'inner' outside of a class.
+}
diff --git a/test/inner/missing_argument_list.lox b/test/inner/missing_argument_list.lox
new file mode 100644
index 0000000..78e1b9a
--- /dev/null
+++ b/test/inner/missing_argument_list.lox
@@ -0,0 +1,5 @@
+class A {
+  method() {
+    inner; // Error at ';': Expect argument list after 'inner'.
+  }
+}
diff --git a/test/inner/no_inner.lox b/test/inner/no_inner.lox
new file mode 100644
index 0000000..7483e21
--- /dev/null
+++ b/test/inner/no_inner.lox
@@ -0,0 +1,14 @@
+class A {
+  method(a) {
+    print inner();
+    print inner(1, 2, 3);
+    print a;
+  }
+}
+
+class B < A {}
+
+B().method("arg");
+// expect: nil
+// expect: nil
+// expect: arg
diff --git a/test/inner/simple.lox b/test/inner/simple.lox
new file mode 100644
index 0000000..6801a61
--- /dev/null
+++ b/test/inner/simple.lox
@@ -0,0 +1,18 @@
+class A {
+  method() {
+    print "A.method() before";
+    inner();
+    print "A.method() after";
+  }
+}
+
+class B < A {
+  method() {
+    print "B.method()";
+  }
+}
+
+B().method();
+// expect: A.method() before
+// expect: B.method()
+// expect: A.method() after


================================================
FILE: note/answers/chapter29_superclasses/3.md
================================================
I have a solution that implements the right semantics and makes inner calls as
fast as any other method call. I won't walk through it in detail, but there is
a diff in this directory that you can apply to the result of chapter 29's code
to see the full thing.

The basic idea is that each `inner()` call gets compiled to a call to a method
whose name is a combination of the surrounding method name and a unique ID for
the containing class. So in:

```lox
class A {
  foo() {
    inner();
  }
}
```

The compiler desugars it to something like:

```lox
class A {
  foo() {
    this.foo@0();
  }
}
```

Here, "foo@0" is all part of the method name. "0" is the ID of the class A, and
we use "@" as a separator to ensure the generated name can't collide with a real
method name. At runtime, when a subclass inherits from a superclass, we copy
down all of the superclass's methods as before. That doesn't change. But when
the subclass then defines its *own* methods, we do some extra work.

Before storing the method in the subclass's method table, we look for an
existing method with that name. If we find one, it means an "outer" method with
that name already exists on some superclass. In that case, this subclass method
definition must become an inner method and thus we need to change its name. But
to what?

We know we need to append a class ID, but it's not clear which one. We extend
ObjClass to store its ID. We also extend ObjClosure to store the class ID of the
class where the method is declared. (We could make a separate ObjMethod type for
this, but I was lazy and put it in ObjClosure even though its only used for
closures that are method bodies.)

When defining a new method in a subclass, if we see a method with that name in
the table already, then that method is the outermost method that the subclass's
method is an inner method for. So we look at the class ID of the method already
in the table, and then generate a new name for the new method that includes
that class ID. So in:

```lox
class A { // ID 0.
  foo() {
    inner();
  }
}

class B < A { // ID 1.
  foo() {}
}
```

When we execute the `OP_METHOD` for `foo()` in B, we have already copied the
definition of `foo()` from A into B's method table. We see that collision. So we
look up the class ID stored in that closure and find 0. So then we change the
name of the method we're defining to `foo@0` instead.

We can't stop there. There may be multiple levels of `inner()` methods in
superclasses, so we look up `foo@0` in the method table too. If we find *that*
as well, then we look for *its* class ID. We keep looping like that walking
down the inheritance hierarchy until we eventually find an open slot that
corresponds to `inner()` on the lowest class in the hierarchy and slot our new
method there.

That's basically it. Since we already compile `inner()` calls to be method calls
on `this` with a correctly synthesized name, they will route to the right
method definition and behave as they should using the exist runtime code we
have for method dispatch.

The only missing piece is what happens when you call `inner()` in a class where
there is no subclass that refines it. We don't want to be a runtime error since
there's no way for a superclass to detect that. Instead, we treat it as if there
is an empty method that returns `nil`.

To implement that, I just made a new `OP_INNER` instruction to use instead of
`OP_INVOKE` for `inner()` calls. It behaves almost exactly like `OP_INVOKE`
except that in the case where no method could be found, instead of aborting, it
discards any argument stack slots and then pushes `nil`. Another option would
to actually compile default empty methods into the class, but then we'd have to
take care not to incorrectly inherit those and have them get in the way of real
calls.

For all the details, apply the diff to the code and see how it looks.


================================================
FILE: note/blurb.txt
================================================
Software engineers use programming languages every day, but few of us understand how those languages are designed and implemented. Crafting Interpreters gives you that insight by implementing two complete interpreters from scratch. In the process, you'll learn parsing, compilation, garbage collection, and other fundamental computer science concepts. But don't be intimidated! Crafting Interpreters walks you through all of this one step at a time with an emphasis on having fun and getting your hands dirty.

---

Despite using them every day, most software engineers know little about how programming languages are designed and implemented. For many, their only experience with that corner of computer science was a terrifying "compilers" class that they suffered through in undergrad and tried to blot from their memory as soon as they had scribbled their last NFA to DFA conversion on the final exam.

That fearsome reputation belies a field that is rich with useful techniques and not so difficult as some of its practitioners might have you believe. A better understanding of how programming languages are built will make you a stronger software engineer and teach you concepts and data structures you'll use the rest of your coding days. You might even have fun.

This book teaches you everything you need to know to implement a full-featured, efficient scripting language. You'll learn both high-level concepts around parsing and semantics and gritty details like bytecode representation and garbage collection. Your brain will light up with new ideas, and your hands will get dirty and calloused.

Starting from main(), you will build a language that features rich syntax, dynamic typing, garbage collection, lexical scope, first-class functions, closures, classes, and inheritance. All packed into a few thousand lines of clean, fast code that you thoroughly understand because you wrote each one yourself.

---

Bob Nystrom is a senior software engineer at Google working on the Dart programming language. Before discovering a love of programming languages, he developed games at Electronic Arts. He is the author of the best-selling book "Game Programming Patterns".

---

We use programming languages every day, but few of us know how they are designed and implemented. Crafting Interpreters teaches you that by building two complete interpreters from scratch. You'll learn parsing, compilation, garbage collection, and other fundamental CS concepts. But don't be intimidated! You learn it one step at a time with an emphasis on having fun and getting your hands dirty.

================================================
FILE: note/contents.txt
================================================
high-level goal: a *small* book that builds a complete, efficient interpreter.
instead of a wide text about programming language*s*, it is a single path
through the language space. aim for 60k words.

possible mission: cover most of the topics needed to understand how mri, cpython, and lua work.

stuff *not* included:
- type systems
- ahead of time compilation
- machine code
- bottom up parsing
- parser generators
- ir
- context-sensitive analysis
- most compile-time optimizations

stuff to maybe include:
- your first language - simple stack-based language
- lexing
- recursive descent parsing
- scopes as dictionaries
- stack-based vm
- name binding of locals
- objects as dictionaries
- objects
- classes
- prototypes
- control flow
- functions
- first-class functions
- closures
- arithmetic
- primitive methods/functions
- external functions
- compiling to bytecode
- tree-walk interpreting
- mark-sweep collection
- copy collection
- lisp2 algorithm?
- bump-pointer allocation
- stack traces and line information
- lexer errors
- compile time errors
- runtime errors
- nan tagging
- object representation
- variables and assignment
- scope
- jitting
- internal representations
- roots
- fibers and coroutines
- passing arguments
- expression parsing
- aesthetics and usability of syntax design
- backjumping and infinite lookahead or context-sensitive grammars
- symbol tables and hash tables
- strings
- tail call optimization
- virtual machine
- stack frames
- stack based bytecode
- register based bytecode
- strings
- arrays
- hash tables (for internal use and as object in language)
- dynamic dispatch
- testing

- kinds of asides
  - historical context and people
  - further things to learn
  - omitted alternatives


================================================
FILE: note/design breaks.md
================================================
- The "novelty budget" and choosing which things to keep familiar and which to
  keep new.

- Learnability versus consistency. Being internally consistent leads to a
  simpler, more elegant language, but doesn't leverage what the user already
  knows.

- Building an entire ecosystem: implementation, spec, core libraries, docs, etc.

- Deciding how many things can be done at the library level versus special
  permissions only the language has.

- Choosing reserved words. Abbreviations to avoid name collisions.

- When to write a language spec and what it's for. Useful to help think
  precisely about semantics. Doesn't help improve usability of language. Need to
  actually play with an implementation for that. Very time consuming. Users
  don't need it. They need more user-friendly docs. Important when you have
  multiple competing implementations.

- When to introduce new operators. Very hard to read. If users don't have
  intuition about precedence, they can't even visually parse it until they know
  what it is. Tempting, but resist.

  Some language designers, when presented with a problem, think "I know, I'll
  add a new operator." Now you have %*$! problems.

  Note that overloading existing operators is different from defining new
  ones. With the former, readers can still parse the code.

- Syntactic novelty. First time design language and write lexer and parser,
  excited to be able to do things different from other languages because you
  can and because novelty is fun. Do that. Try all sorts of new things. Get it
  out of your system. In practice, novelty has high cost.

- Put a code sample on the front page of your site.

- Which language features can have user-defined behavior. For example, for
  loop in some languages works with user-defined sequence types. Some
  languages allow operators to be overloaded, even assignment. Some allow
  user-defined truthiness.

  Trade off is power versus concrete readability.

- Something about evolving a language after it's released.

---

Kinds of language docs.

- way you describe lang varies based on who description is for
- common kinds:
  - tutorial for beginners with language
    - example heavy
    - people learn by example
    - linear narrative
    - deliberately not comprehensive
  - guide
    - still informal
    - not as linear -- search friendly
    - still omit edge cases
  - reference
    - comprehensive
    - document all the things
    - final word for user
    - may be enough for implementer, assuming good faith
    - no narrative
  - specification
    - final word for implementer
    - legalese
    - if implementer follows all rules in spec, resulting code correctly
      implements lang
    - even if implementer was trying not to!
    - competing org
    - may be formal -- machine checkable

- if doc your own language, priority is in that order
- many langs never even get to spec

- when designing a language, have to capture and communicate design to
  implementers and users
  - need to have a language (mechanism) to do that
  - users and implementers have different needs!
- when implementing a language, need to consume design from designer
- even if designing and implementing your own language, still need to teach it
  to users
- also need to teach it to yourself
- writing it down clearly helps you organize design in your mind


================================================
FILE: note/images.md
================================================
Unscaled source images are scanned at 1200 DPI and stored at around that
resolution. (We'll want the extra pixels when it comes time to do the print
edition.)

Raw scans are saved grayscale unless scanned from graph paper drawings on blue
lines. Those are color, so we can extract the blue channel.

Images are drawn to scale so that text sizes and line thickness is consistent
across illustrations. Each two squares of quarter-ruled graph paper is one
column of the page.

Images are in one of three sizes:

- Normal images are the width of the text column.

- Aside images are the width of the sidebar.
  To avoid being too tall, aside images usually don't fill the entire width of
  the image. Instead, they are scaled to a smaller number of columns, then
  the canvas is grown to pad with pixels.

- Wide images cover the text column and sidebar.

## Web

Web images are scaled to two pixels per CSS pixel (to account for high DPI
screens). They are saved as 64-color PNGs.

The web site is organized into 48 pixel columns. At 2x for high DPI, that's
96 image pixels per column.

Final images are in one of three sizes:

- Normal:              12 columns = 1152 pixels = 24 graph paper squares
- Aside:                6 columns =  576 pixels
- Wide:   12 + 6 + 1 = 19 columns = 1824 pixels

## Print

Print images are 1200 DPI bitmaps saved as TIFF files. Sizes:

- Normal: 324 pts = 4.5  inches = 5400 pixels
- Aside:  126 pts = 1.75 inches = 2100 pixels
- Wide:   468 pts = 6.5  inches = 7800 pixels


================================================
FILE: note/indexing.md
================================================
## Acronyms

The expanded form gets most of the locators. The acronym gets a "See" cross-ref
to the expanded form:

    National Basketball Association 3, 5, 7, 9
    NBA See National Basketball Association

If the prose explains the acronym, the acronym also gets a locator.

## Subheadings

Subheadings are for attributes of the main heading, not a taxonomic grouping.
I wouldn't do:

    animals
      mammals
        bats

Nor:

    literal
      number
        decimal
        integer

Remember, indexes are mostly for looking things up by name, not defining a
hierarchy.

In cases where there are subtopics related to a main topic, prefer flattening
the subtopics and putting the locators there and then add "See also" cross-refs
to the main topic:

    double-precision 4, 5, 6
    floating point 1, 2, 3
    number 6, 7
      See also double-precision
      See also floating point

## Double-posting

Don't. Not because it's not useful but because it's too much of a headache.
Instead, use cross-links.

If the synonym is defined in the book, add an entry for the page and a separate
"See [also]" link for the main term. If the synonym is not defined in the book,
use a "See" link.

## Languages

Include references to programming languages. Prefer giving them subtopics since
there are so many.

Don't link to C and Java just because they are the implementation languages for
clox and jlox. Only mention them when there is something interesting about those
languages in particular.

## clox and jlox

Don't bother distinguishing entries by jlox and clox, like "error handling, in
jlox". The reader can tell pretty easily from the page number which interpreter
an entry is for.

## Other stuff

*   When indexing a design pattern, do "<Name> design pattern". Note the pattern
    name is capitalized.

*   Add entries for jlox classes named "<name> class". (Probably link the
    generated AST classes to the appendix.)

*   Add entries for jlox interfaces named "<name> interface".

*   Add entries for jlox and clox enum types named "<name> enum".

*   Add entries for clox struct named "<name> struct".

*   Add entries for each clox opcode. Link to the place where the opcode itself
    is defined in the enum.

*   Don't add entries for methods and functions.

## TODO at end

*   Go through and make sure I caught all the classes, structs, and enums that
    should have entries.

*   Make sure all opcodes have entries.

*   For topics with a lot of page numbers (like most language names), go through
    and see which ones can have subtopics. Or just remove some of them if they
    don't add value.

*   Look for topics that should be collapsed like "dynamic typing" and "dynamic
    types".


================================================
FILE: note/log.txt
================================================
2021/07/29 - *** launch! ***
2021/07/28 - work on blog post
2021/07/27 - work on blog post
2021/07/26 - responsive index page
2021/07/25 - index page photos
2021/07/24 - work on index page
2021/07/23 - work on index page, change cover colors
2021/07/22 - work on index page
2021/07/21 - work on index page
2021/07/20 - work on kindle version
2021/07/19 - work on kindle version
2021/07/18 - ebook covers, proof in calibre, kindle export
2021/07/17 - ebook styling
2021/07/16 - email
2021/07/15 - apply remaining proofreading fixes, justify challenges and notes
2021/07/14 - apply proofreading fixes up through 10
2021/07/13 - finish proofreading, sync styles and masters
2021/07/12 - proofread 26 through 28
2021/07/11 - proofread 19 through 25
2021/07/10 - proofread 14 through 18
2021/07/09 - proofread 7 through 13
2021/07/08 - finish proofreading 6, start 7
2021/07/07 - start proofreading 6
2021/07/06 - proofread 5
2021/07/05 - proofread 3 and 4
2021/07/04 - weekend
2021/07/03 - weekend
2021/07/02 - proofread 1 and 2
2021/07/01 - epub export
2021/06/30 - epub export
2021/06/29 - clean up code for ebook stuff
2021/06/28 - cover and ebook css
2021/06/27 - fix epub validation errors
2021/06/26 - epub export
2021/06/25 - github issues
2021/06/24 - work on ebook export
2021/06/23 - upload and order proof
2021/06/22 - cover design
2021/06/21 - cover lettering
2021/06/20 - cover illustration
2021/06/19 - cover illustration
2021/06/18 - cover illustration
2021/06/17 - cover illustration
2021/06/16 - cover illustration
2021/06/15 - cover design
2021/06/14 - copyright, isbn, etc.
2021/06/13 - acknowledgements and dedication
2021/06/12 - weekend
2021/06/11 - work on covers
2021/06/10 - update gpp, finish other todos
2021/06/09 - finish index
2021/06/08 - index jumping back and forth and calls and functions
2021/06/07 - index hash tables, global variables, and local variables
2021/06/06 - index compiling expressions, types of values, and strings
2021/06/05 - weekend
2021/06/04 - index scanning on demand
2021/06/03 - index classes, inheritance, chunks of bytecode, and a virtual machine
2021/06/02 - index functions and resolving and binding
2021/06/01 - index statements and state and control flow
2021/05/30 - weekend
2021/05/29 - weekend
2021/05/28 - indexing parsing expressions and evaluating expressions
2021/05/27 - indexing the lox language, scanning, representing code
2021/05/26 - indexing the lox language
2021/05/25 - indexing introduction and a map of the territory
2021/05/24 - indexing introduction and a map of the territory
2021/05/23 - design index
2021/05/22 - toc
2021/05/21 - fix running headers
2021/05/20 - typeset optimization and appendices
2021/05/19 - typeset superclasses
2021/05/18 - typeset methods and initializers
2021/05/17 - typeset methods and initializers
2021/05/16 - typeset classes and instances
2021/05/15 - typeset garbage collection
2021/05/14 - typeset closures
2021/05/13 - typeset closures
2021/05/12 - typeset calls and functions
2021/05/11 - typeset calls and functions
2021/05/10 - typeset calls and functions
2021/05/09 - weekend
2021/05/08 - typeset calls and functions
2021/05/07 - typeset jumping back and forth
2021/05/06 - typeset local variables
2021/05/05 - typeset global variables
2021/05/04 - typeset hash tables
2021/05/03 - typeset hash tables
2021/05/02 - typeset strings
2021/05/01 - typeset strings
2021/04/30 - issues and prs
2021/04/29 - typeset types of values
2021/04/28 - vaccine
2021/04/27 - typeset types of values
2021/04/26 - typeset compiling expressions
2021/04/25 - weekend
2021/04/24 - weekend
2021/04/23 - trim unneeded blank lines
2021/04/22 - start typesetting compiling expressions, trim lines
2021/04/21 - typeset scanning on demand
2021/04/20 - typeset a virtual machine
2021/04/19 - typeset chunks of bytecode
2021/04/12 to 2021/04/18 - spring break
2021/04/11 - typeset chunks of bytecode
2021/04/10 - typeset inheritance
2021/04/09 - typeset classes
2021/04/08 - typeset classes
2021/04/07 - typeset resolving and binding
2021/04/06 - typeset resolving and binding
2021/04/05 - typeset functions
2021/04/04 - weekend
2021/04/03 - typeset control flow
2021/04/02 - typeset statements and state
2021/04/01 - typeset evaluating expressions
2021/03/31 - finish typesetting parsing expressions
2021/03/30 - typeset parsing expressions
2021/03/29 - finish typesetting representing code, start parsing expressions
2021/03/28 - weekend
2021/03/27 - weekend
2021/03/26 - typeset representing code
2021/03/25 - table styles, typeset representing code
2021/03/24 - soft breaks for code output
2021/03/23 - typeset scanning
2021/03/22 - typeset scanning
2021/03/21 - typeset first three chapters
2021/03/20 - more xml tweaks, bug fixes
2021/03/19 - get aside js script working again
2021/03/18 - chapter start page
2021/03/17 - header design
2021/03/16 - rebuild styles
2021/03/15 - rebuild styles
2021/03/14 - xml output
2021/03/13 - xml output
2021/03/12 - xml output
2021/03/11 - xml output
2021/03/10 - xml output
2021/03/09 - page layout
2021/03/08 - page layout
2021/03/07 - weekend
2021/03/06 - weekend
2021/03/05 - page layout
2021/03/04 - page layout
2021/03/03 - page layout
2021/03/02 - page layout
2021/03/01 - page layout
2021/02/28 - weekend
2021/02/27 - weekend
2021/02/26 - work on layout
2021/02/25 - work on layout
2021/02/24 - work on layout
2021/02/23 - 5 issues
2021/02/22 - remove caching bit mask in optimization chapter
2021/02/21 - weekend
2021/02/20 - weekend
2021/02/19 - 8 issues and prs
2021/02/18 - copy edits for optimization
2021/02/17 - copy edits for methods and initializers and superclasses
2021/02/16 - copy edits for classes and instances
2021/02/15 - copy edits for garbage collection
2021/02/14 - weekend
2021/02/13 - weekend
2021/02/12 - copy edits for closures
2021/02/11 - copy edits for calls and functions
2021/02/10 - copy edits for local variables and jumping back and forth
2021/02/09 - copy edits for strings, hash tables, and global variables
2021/02/08 - copy edits for compiling expressions and types of values
2021/02/07 - weekend
2021/02/06 - weekend
2021/02/05 - copy edits for chunks of bytecode, a virtual machine, and scanning
2021/02/04 - copy edits for classes and inheritance
2021/02/03 - copy edits for resolving and binding
2021/02/02 - copy edits for control flow and functions
2021/02/01 - copy edits for evaluating expressions and statements and state
2021/01/31 - weekend
2021/01/30 - weekend
2021/01/29 - copy edits for parsing expressions
2021/01/28 - copy edits for representing code
2021/01/27 - copy edits for the lox language
2021/01/26 - change ellipse formatting, copy edits for a map of the territory
2021/01/25 - copy edits for introduction, appendices, and section pages
2021/01/24 - weekend
2021/01/23 - weekend
2021/01/22 - make 8x10 layout
2021/01/21 - email
2021/01/20 - email
2021/01/19 - email
2021/01/18 - mlk
2021/01/17 - weekend
2021/01/16 - weekend
2021/01/15 - 4 issues and prs
2021/01/14 - fix illustrations, 6 issues
2021/01/13 - 3 issues
2021/01/12 - finish custom interpreters in test runner
2021/01/11 - 1 issue, work on custom interpreters in the test runner
2021/01/10 - weekend
2021/01/09 - weekend
2021/01/08 - 1 issue
2021/01/07 - 8 issues and prs
2021/01/06 - font research
2021/01/05 - sync changes with copy editor
2020/12/24 - 2020/01/04 holiday break
2020/12/23 - 9585 words, edit optimization and appendices
2020/12/22 - 4602 words, edit superclasses
2020/12/21 - 8427 words, edit methods and initializers
2020/12/20 - weekend
2020/12/19 - weekend
2020/12/18 - 1559 words, finish classes and instances
2020/12/17 - 1025 words, classes and instances
2020/12/16 - 1254 words, classes and instances
2020/12/15 - 9286 words, edit garbage collection
2020/12/14 - 5833 words, finish closures
2020/12/13 - weekend
2020/12/12 - weekend
2020/12/11 - 4735 words, finish calls and functions
2020/12/10 - 5192 words, finish calls and functions
2020/12/09 - 4301 words, calls and functions
2020/12/08 - email
2020/12/07 - 2604 words, finish jumping back and forth
2020/12/06 - weekend
2020/12/05 - weekend
2020/12/04 - 1431 words, jumping back and forth
2020/12/03 - 2218 words, jumping back and forth
2020/12/02 - 4669 words, edit local variables
2020/12/01 - editing
2020/11/30 - 4756 words, edit global variables
2020/11/29 - weekend
2020/11/28 - weekend
2020/11/27 - 4523 words, finish hash tables
2020/11/26 - 3992 words, hash tables
2020/11/25 - email, 3 issues
2020/11/24 - email
2020/11/23 - email
2020/11/22 - weekend
2020/11/21 - weekend
2020/11/20 - email
2020/11/19 - 8 issues
2020/11/18 - update style guide, better word count
2020/11/17 - 5275 words, edit strings
2020/11/16 - email
2020/11/15 - weekend
2020/11/14 - weekend
2020/11/13 - 1949 words, finish types of values
2020/11/12 - 2691 words, types of values
2020/11/11 - 4178 words, finish compiling expressions
2020/11/10 - 446 words, compiling expressions
2020/11/09 - 1882 words, compiling expressions
2020/11/08 - weekend
2020/11/07 - weekend
2020/11/06 - 5374 words, edit scanning on demand
2020/11/05 - 1990 words, finish a virtual machine
2020/11/04 - 3302 words, a virtual machine
2020/11/03 - 759 words, a virtual machine
2020/11/02 - 5813 words, finish chunks of bytecode
2020/11/01 - weekend
2020/10/31 - weekend
2020/10/30 - 1883 words, chunks of bytecode
2020/10/29 - 160 words, a bytecode vm
2020/10/28 - 11 issues and prs
2020/10/27 - 2112 words, finish inheritance
2020/10/26 - 1966 words, inheritance
2020/10/25 - weekend
2020/10/24 - weekend
2020/10/23 - 2088 words, finish classes
2020/10/22 - 3677 words, classes
2020/10/21 - 742 words, classes
2020/10/20 - 1067 words, classes
2020/10/19 - 3731 words, finish resolving and binding
2020/10/18 - weekend
2020/10/17 - weekend
2020/10/16 - 2624 words, resolving and binding
2020/10/15 - 2878 words, finish functions
2020/10/14 - 2598 words, functions
2020/10/13 - 967 words, functions
2020/10/12 - 1887 words, finish control flow
2020/10/11 - weekend
2020/10/10 - weekend
2020/10/09 - 2545 words, control flow
2020/10/08 - 2771 words, finish statements and state
2020/10/07 - 3934 words, statements and state
2020/10/06 - 1749 words, statements and state
2020/10/05 - 1 issue
2020/10/04 - weekend
2020/10/03 - weekend
2020/10/02 - 4 issues
2020/10/01 - 5 issues
2020/09/30 - 2301 words, finish evaluating expressions
2020/09/29 - 1549 words, evaluating expressions
2020/09/28 - 613 words, evaluating expressions
2020/09/27 - weekend
2020/09/26 - weekend
2020/09/25 - finish long lines
2020/09/24 - work on long lines
2020/09/23 - 2926 words, finish parsing expressions
2020/09/22 - 1308 words, parsing expressions
2020/09/21 - 1820 words, parsing expressions
2020/09/20 - weekend
2020/09/19 - weekend
2020/09/18 - fix 8 issues
2020/09/17 - 3422 words, finish representing code
2020/09/16 - 2805 words, representing code
2020/09/15 - 2604 words, finish scanning
2020/09/14 - 596 words, scanning
2020/09/13 - weekend
2020/09/12 - weekend
2020/09/11 - 2273 words, scanning
2020/09/10 - ~5900 words, the lox language and a tree-walk interpreter
2020/09/09 - 1030 words, finish a map of the territory
2020/09/08 - 3795 words, a map of the territory
2020/09/07 - labor day
2020/09/06 - weekend
2020/09/05 - weekend
2020/09/04 - 1221 words, finish introduction
2020/09/03 - 2533 words, welcome and introduction
2020/09/02 - couple of issues
2020/09/01 - 12 issues and prs
2020/08/31 - weekend
2020/08/30 - weekend
2020/08/29 - design note styles, xml export
2020/08/28 - more xml export
2020/08/27 - more xml export
2020/08/26 - more xml export
2020/08/25 - work on lists
2020/08/24 - challenge styles
2020/08/23 - work on indesign styles, set up printer, test prints
2020/08/22 - weekend
2020/08/21 - weekend
2020/08/20 - work on indesign styles
2020/08/19 - work on indesign scripting
2020/08/18 - work on indesign scripting
2020/08/17 - work on indesign scripting
2020/08/17 - work on indesign scripting
2020/08/16 - weekend
2020/08/15 - weekend
2020/08/14 - work on indesign scripting
2020/08/13 - work on indesign scripting
2020/08/12 - work on indesign scripting
2020/08/11 - work on indesign scripting
2020/08/10 - more work on print design
2020/08/09 - more work on print design
2020/08/08 - weekend
2020/08/07 - more work on print design
2020/08/06 - more work on print design
2020/08/05 - work on code line length for print
2020/08/04 - more work on xml export and indesign styles
2020/08/03 - more work on xml export and indesign styles
2020/08/02 - weekend
2020/08/01 - weekend
2020/07/31 - vacation
2020/07/30 - vacation
2020/07/29 - vacation
2020/07/28 - vacation
2020/07/27 - vacation
2020/07/26 - more work on xml export and indesign styles
2020/07/25 - scan references, work on xml export
2020/07/24 - typos and email
2020/07/23 - work on page layout
2020/07/22 - fix #680
2020/07/21 - email
2020/07/20 - fix #683
2020/07/19 - weekend
2020/07/18 - weekend
2020/07/17 - fix #693
2020/07/16 - 3 issues
2020/07/15 - get rest of chapter snippets compiling
2020/07/14 - 1 issue
2020/07/13 - 6 issues
2020/07/12 - finish type punning, one other issue
2020/07/11 - weekend
2020/07/10 - work on type punning
2020/07/09 - email
2020/07/08 - 5 issues
2020/07/07 - 1 issue
2020/07/06 - 2 issues
2020/07/05 - weekend
2020/07/04 - weekend
2020/07/03 - fix #635
2020/07/02 - use code font for statement types
2020/07/01 - email and issues
2020/06/30 - email
2020/06/29 - three issues and prs
2020/06/28 - weekend
2020/06/27 - weekend
2020/06/26 - 404 page
2020/06/25 - 2 issues
2020/06/24 - bugs and email
2020/06/23 - bugs
2020/06/22 - fix a few issues, fix aside markers in context lines
2020/06/21 - weekend
2020/06/20 - weekend
2020/06/19 - email and issues
2020/06/18 - close a few issues
2020/06/17 - email
2020/06/16 - 3 issues
2020/06/15 - 9 prs and issues
2020/06/12 - email
2020/06/11 - email
2020/06/10 - fix trailing whitespace css issue
2020/06/09 - work on trailing whitespace css issue
2020/06/08 - work on trailing whitespace css issue
2020/06/07 - email and bug fixes
2020/06/06 - weekend
2020/06/05 - fix bugs
2020/06/04 - email
2020/06/03 - fix bugs
2020/06/02 - fix bugs
2020/06/01 - port benchmark.py to dart
2020/05/31 - more output clean up, ebnf grammar
2020/05/30 - bunch of clean up in html output
2020/05/29 - switch over to dart tools
2020/05/28 - port test.py to dart
2020/05/27 - port split_chapters and compile_snippets to dart
2020/05/26 - port build system to dart
2020/05/25 - port build system to dart
2020/05/24 - port build system to dart, fix missing context lines
2020/05/23 - port build system to dart
2020/05/22 - port build system to dart
2020/05/21 - port build system to dart
2020/05/20 - port build system to dart
2020/05/19 - port build system to dart
2020/05/18 - port build system to dart
2020/05/17 - port build system to dart
2020/05/16 - weekend
2020/05/15 - work on page layout
2020/05/14 - start work on page layout
2020/05/13 - email
2020/05/12 - email
2020/05/11 - email
2020/05/10 - weekend
2020/05/09 - weekend
2020/05/08 - email and issues
2020/05/07 - email and issues
2020/05/06 - email and issues
2020/05/05 - 17 prs and issues
2020/05/04 - email
2020/05/03 - weekend
2020/05/02 - weekend
2020/05/01 - email
2020/04/30 - talk to editor
2020/04/29 - email
2020/04/28 - file washington taxes, email
2020/04/27 - start catching up on email
*** nice long break ***
2020/04/05 - edit blog post, publish chapter, DONE!
2020/04/04 - write blog post
2020/04/03 - 9494 words, third draft "optimization"
2020/04/02 - 6670 words, finish second draft "optimization"
2020/04/01 - 3743 words, second draft "optimization"
2020/03/31 - one illustration
2020/03/30 - one illustration
2020/03/29 - ink and photoshop two illustrations
2020/03/28 - 1552 words, finish first draft "optimization"
             (bank 27) pencil 5 illustrations
             (bank 28) ink and photoshop 5 illustrations
2020/03/27 - 1157 words, first draft "optimization"
2020/03/26 - 1619 words, first draft "optimization"
2020/03/25 - 738 words, first draft "optimization"
             (bank 26) 1383 words, first draft "optimization"
2020/03/24 - 401 words, first draft "optimization"
2020/03/23 - 1370 words, first draft "optimization"
2020/03/22 - 1343 words, first draft "optimization"
2020/03/21 - 1518 words, finish outline "optimization"
2020/03/20 - 702 words, outline "optimization"
             (bank 25) 480 words, outline "optimization"
2020/03/19 - finish organizing snippets for "optimization"
             (bank 24) 1420 words, outline "optimization"
2020/03/18 - publish chapter
2020/03/17 - 5 issues
2020/03/16 - 14 issues
2020/03/15 - third draft of "superclasses"
             (bank 23) split snippets for "optimization"
2020/03/14 - one illustration
2020/03/13 - reword some prose around an illustration
2020/03/12 - photoshop and tweak illustration
2020/03/11 - pencil and ink illustration
2020/03/10 - one illustration
2020/03/09 - one illustration
2020/03/08 - taxes and email
2020/03/07 - one illustration
2020/03/06 - one illustration
2020/03/05 - 3138 words, finish second draft "superclasses"
2020/03/04 - 1156 words, second draft "superclasses"
2020/03/03 - finish writing up third challenge, fix bug, file gc issues
2020/03/02 - more work on third challenge
2020/03/01 - work on third challenge
2020/02/29 - first two challenge answers
2020/02/28 - 1429 words, finish first draft "superclasses"
2020/02/27 - 822 words, first draft "superclasses"
2020/02/26 - 802 words, first draft "superclasses"
2020/02/25 - 149 words, first draft "superclasses"
2020/02/24 - 686 words, first draft "superclasses"
2020/02/23 - 512 words, first draft "superclasses"
2020/02/22 - 1725 words, finish outline "superclasses"
2020/02/21 - 492 words, outline "superclasses"
2020/02/20 - split up and order snippets for "superclasses"
2020/02/19 - publish "methods and initializers"
2020/02/18 - file seattle taxes
2020/02/17 - use bank 21
2020/02/16 - email
2020/02/15 - 12 issues and prs
             (bank 22) email
2019/02/14 - 1372 words, finish third draft "methods and initializers"
2019/02/13 - 3159 words, third draft "methods and initializers"
2019/02/12 - 1336 words, third draft "methods and initializers"
2019/02/11 - fix python install
2019/02/10 - 1 illustration
2019/02/09 - 2503 words, third draft "methods and initializers"
2019/02/08 - photoshop illustration
2019/02/07 - ink two illustrations, photoshop one
2019/02/06 - pencil two illustrations
2019/02/05 - add aside
2019/02/04 - 1 illustration
2019/02/03 - 1 illustration
2019/02/02 - 1 illustration
2019/02/01 - 1 illustration
2019/01/31 - other challenge answers
2019/01/30 - 1 challenge answer
2019/01/29 - 1308 words, finish second draft "methods and initializers"
2019/01/28 - 1112 words, second draft "methods and initializers"
2019/01/27 - 1035 words, second draft "methods and initializers"
2019/01/26 - 610 words, second draft "methods and initializers"
2019/01/25 - 1461 words, second draft "methods and initializers"
2019/01/24 - 788 words, second draft "methods and initializers"
2019/01/23 - 328 words, second draft "methods and initializers"
2019/01/22 - 284 words, second draft "methods and initializers"
2019/01/21 - 1314 words, second draft "methods and initializers"
2019/01/20 - 1090 words, finish first draft "methods and initializers"
2019/01/19 - 781 words first draft "methods and initializers"
2019/01/18 - 391 words first draft "methods and initializers"
2019/01/17 - 1123 words first draft "methods and initializers"
2019/01/16 - 635 words first draft "methods and initializers"
2019/01/15 - 896 words first draft "methods and initializers"
2019/01/14 - 793 words first draft "methods and initializers"
2019/01/13 - 509 words first draft "methods and initializers"
2019/01/12 - 227 words first draft "methods and initializers"
2019/01/11 - 1063 words first draft "methods and initializers"
2019/01/10 - 1008 words first draft "methods and initializers"
2019/01/09 - 320 words first draft "methods and initializers"
2019/01/08 - 803 words finish outline "methods and initializers"
2019/01/07 - 891 words outline "methods and initializers"
2019/01/06 - 472 words outline "methods and initializers"
2019/01/05 - 520 words outline "methods and initializers"
2019/01/04 - 732 words outline "methods and initializers"
2019/01/03 - 415 words outline "methods and initializers"
2019/01/02 - reread part i classes chapter
2019/01/01 - couple of minor tweaks
2019/12/31 - publish chapter
2019/12/30 - more issues
2019/12/29 - issues and prs
2019/12/28 - email
2019/12/27 - order snippets for "methods and initializers"
2019/12/26 - figure out quote
2019/12/25 - split up snippets for "methods and initializers"
2019/12/24 - last two answers for "classes and instances"
2019/12/23 - use bank 20 (painting)
2019/12/22 - first two answers for "classes and instances"
2019/12/21 - third draft "classes and instances"
2019/12/20 - photoshop illustration
2019/12/19 - pencil and ink illustration
2019/12/18 - email
2019/12/17 - photoshop illustration
2019/12/16 - finish inking one illustration
2019/12/15 - pencil and half-ink one illustration
2019/12/14 - one illustration
2019/12/13 - 1451 words, finish second draft "classes and instances"
2019/12/12 - 2159 words, second draft "classes and instances"
2019/12/11 - 578 words, finish first draft "classes and instances"
2019/12/10 - 671 words, first draft "classes and instances"
2019/12/09 - 568 words, first draft "classes and instances"
2019/12/08 - 557 words, first draft "classes and instances"
2019/12/07 - 503 words, first draft "classes and instances"
2019/12/06 - quote
2019/12/05 - 508 words, first draft "classes and instances"
2019/12/04 - 667 words, finish outline "classes and instances"
2019/12/03 - 531 words, outline "classes and instances"
             (bank 21) 500 words, outline "classes and instances"
2019/12/02 - 5 issues, 1 pr
2019/12/01 - split up and order snippets for "classes and instances"
2019/11/30 - 2 prs, publish chapter
2019/11/29 - fix field name in illustrations
2019/11/28 - work on https://github.com/munificent/craftinginterpreters/pull/552
2019/11/27 - 3 issues
2019/11/26 - 5 issues
2019/11/25 - prs and issues, work on 531
2019/11/24 - 6 issues
2019/11/23 - 5 prs
2019/11/22 - 2495 words, finish third draft "garbage collection"
2019/11/21 - work on third draft "garbage collection"
2019/11/20 - 2161 words, third draft "garbage collection"
2019/11/19 - 2294 words, third draft "garbage collection"
2019/11/18 - 2108 words, third draft "garbage collection"
2019/11/17 - reorganize subheaders
2019/11/16 - baguette illustration
2019/11/15 - redo lines in latency illustration
2019/11/14 - one illustration
2019/11/13 - photoshop illustration, make bullet images
2019/11/12 - pencil and ink one illustration
2019/11/11 - use bank 19
2019/11/10 - use bank 18
2019/11/09 - one illustration
2019/11/08 - fix previous and pencil one large illustration
             (bank 20) ink and photoshop illustration
2019/11/07 - one illustration
             (bank 19) another illustration
2019/11/06 - one illustration
2019/11/02 - fix crash bug and other issues
2019/11/01 - look into crash bug
2019/10/31 - write first answer for "garbage collection"
2019/10/30 - code for first answer for "garbage collection"
2019/10/29 - 2166 words, finish second draft "garbage collection"
2019/10/28 - 767 words, second draft "garbage collection"
2019/10/27 - 2205 words, second draft "garbage collection"
2019/10/26 - 1049 words, second draft "garbage collection"
2019/10/25 - 1621 words, second draft "garbage collection"
2019/10/24 - 1142 words, second draft "garbage collection"
2019/10/23 - 587 words, finish first draft "garbage collection"
2019/10/22 - 2047 words, first draft "garbage collection"
2019/10/21 - 531 words, first draft "garbage collection"
2019/10/20 - 884 words, first draft "garbage collection"
2019/10/19 - 624 words, first draft "garbage collection"
2019/10/18 - 1502 words, first draft "garbage collection"
2019/10/17 - 779 words, first draft "garbage collection"
2019/10/16 - 1405 words, first draft "garbage collection"
2019/10/15 - 656 words, first draft "garbage collection"
2019/10/14 - 673 words, first draft "garbage collection"
2019/10/13 - 7 bug fixes, publish site
2019/10/12 - 8 bug fixes
2019/10/11 - 4 bug fixes
2019/10/10 - 6 prs, fix location for overloads
2019/10/09 - 1615 words, finish outline "garbage collection"
2019/10/08 - outline "garbage collection"
2019/10/07 - 675 words, outline "garbage collection"
2019/10/06 - 844 words, outline "garbage collection"
2019/10/05 - 800 words, outline "garbage collection"
2019/10/04 - 101 words, outline "garbage collection"
2019/10/03 - 223 words, outline "garbage collection"
2019/10/02 - finish organizing snippets
2019/10/01 - more splitting and organizing snippets
2019/09/30 - more splitting and organizing snippets
2019/09/29 - split up and order snippets
2019/09/28 - issues and pull requests
2019/09/27 - publish "closures"
2019/09/26 - split out snippets for "garbage collection"
2019/09/25 - 2142 words, finish third draft "closures"
2019/09/24 - 1173 words, third draft "closures"
2019/09/23 - 2484 words, third draft "closures"
2019/09/22 - 2633 words, third draft "closures"
2019/09/21 - 2084 words, third draft "closures"
2019/09/20 - tease apart and commit changes
2019/09/19 - 1 illustration
2019/09/18 - 1 illustration, fix positions of asides in chrome
2019/09/17 - 1 illustration
2019/09/16 - 1 illustration
2019/09/15 - 1 illustration
2019/09/14 - 1 illustration
2019/09/13 - 1 illustration
2019/09/12 - 1 illustration
2019/09/11 - 2176 words, second draft "closures"
2019/09/10 - 759 words, second draft "closures"
2019/09/09 - 1788 words, second draft "closures"
2019/09/08 - 1210 words, second draft "closures"
2019/09/07 - 2603 words, second draft "closures"
2019/09/06 - 1395 words, second draft "closures"
2019/09/05 - 699 words, second draft "closures"
2019/09/04 - fix an issue and publish site
2019/09/03 - email and bug fixes
2019/09/02 - six issues
2019/09/01 - three issues
2019/08/31 - use bank 17
2019/08/30 - answer 3
2019/08/29 - finish answer 2
2019/08/28 - work on answer 2
2019/08/27 - work on answer 2
2019/08/26 - write up text for answer 1
             (bank 18) work on answer 2
2019/08/25 - code for answer 1
2019/08/24 - 17 emails
             (bank 17) 950 words, finish first draft "closures"
2019/08/23 - first draft "closures"
2019/08/22 - 749 words, first draft "closures"
2019/08/21 - 1017 words, first draft "closures"
2019/08/20 - 1327 words, first draft "closures"
2019/08/19 - 1207 words, first draft "closures"
2019/08/18 - 1016 words, first draft "closures"
2019/08/17 - 1401 words, first draft "closures"
2019/08/16 - 419 words, first draft "closures"
2019/08/15 - email
2019/08/14 - 401 words, first draft "closures"
2019/08/13 - 1271 words, first draft "closures"
2019/08/12 - outline design note for "closures"
2019/08/11 - code samples for "closures" design note
2019/08/10 - more outline "closures"
2019/08/09 - more outline "closures"
2019/08/08 - more outline "closures"
2019/08/07 - 408 words outline "closures"
2019/08/06 - 447 words outline "closures"
2019/08/05 - 515 words outline "closures"
2019/08/04 - more outline "closures"
2019/08/03 - research, 283 words outline "closures"
2019/08/02 - 4 prs and issues
2019/08/01 - survey about (void), 6 prs and issues
2019/07/31 - 7 prs and issues
2019/07/30 - get "calls and functions" compiling partway through
2019/07/29 - finish snippet test script
2019/07/28 - set up test snippets for more chapters
2019/07/27 - set up test snippets for more chapters
2019/07/26 - set up test snippets for more chapters
2019/07/25 - start building system to test snippets in middle of chapters
2019/07/24 - close 2 prs and 6 issues
2019/07/23 - answers for "calls and functions"
2019/07/22 - 1812 words, finish third draft "calls and functions"
2019/07/21 - 2145 words, third draft "calls and functions"
2019/07/20 - 1658 words, third draft "calls and functions"
2019/07/19 - 3530 words, third draft "calls and functions"
2019/07/18 - photoshop illustration
2019/07/17 - ink illustration
2019/07/16 - 178 words outline closures
2019/07/15 - re-read jlox chapters around closures
2019/07/14 - merge calls branch into closures
2019/07/13 - pencil illustration
2019/07/12 - photoshop four illustrations
2019/07/11 - redraw three illustrations
2019/07/10 - fix more bugs
2019/07/09 - track down bugs, fix stack handling of script
2019/07/08 - finish ordering snippets for "closures"
2019/07/07 - ordering snippets for "closures"
2019/07/06 - ordering snippets for "closures"
2019/07/05 - outline "closures"
2019/07/04 - split snippets for "closures"
2019/07/03 - email and bug fixes
2019/07/02 - another illustration for "calls"
2019/07/01 - ink and photoshop 3 illustrations for "calls"
2019/06/30 - pencil 3 illustrations for "calls"
2019/06/29 - another illustration for "calls"
2019/06/28 - ink and photoshop illustration for "calls"
2019/06/27 - pencil illustration for "calls"
2019/06/26 - 2169 words finish second draft "calls"
2019/06/25 - 1676 words second draft "calls"
2019/06/24 - 1065 words second draft "calls"
2019/06/23 - 2167 words second draft "calls"
2019/06/22 - 1793 words second draft "calls"
2019/06/21 - 1124 words finish first draft "calls"
2019/06/20 - 1943 words first draft "calls"
2019/06/19 - 758 words first draft "calls"
2019/06/18 - 1050 words first draft "calls"
2019/06/17 - 744 words first draft "calls"
2019/06/16 - 75 words first draft "calls" (raccoon)
2019/06/15 - 490 words first draft "calls"
2019/06/14 - 865 words first draft "calls"
2019/06/13 - 472 words first draft "calls"
2019/06/12 - 550 words first draft "calls"
2019/06/11 - 840 words first draft "calls"
2019/06/10 - challenges and finish outline "calls"
2019/06/09 - 160 words outline "calls"
2019/06/08 - 1126 words outline "calls"
2019/06/07 - 585 words outline "calls"
2019/06/06 - 168 words outline "calls", change max arg count
2019/06/05 - simplify call, invoke, and super instructions
2019/06/04 - 544 words outline "calls"
2019/06/03 - 331 words outline "calls", quote, fix build script
2019/06/02 - 853 words outline "calls"
2019/06/01 - 571 words outline "calls"
2019/05/31 - 267 words outline "calls"
2019/05/30 - sign polish translation contract, 90 words outline "calls"
2019/05/29 - fix illustration bugs
2019/05/28 - email
2019/05/27 - email
2019/05/26 - email
2019/05/25 - bugs
2019/05/24 - email
2019/05/23 - merge pr
2019/05/22 - 10 issues, 3 pull requests
2019/05/21 - tweak some code
2019/05/20 - fix bugs
2019/05/19 - publish chapter
2019/05/18 - finish organizing snippets for "calls and functions"
2019/05/17 - still more work on "calls and functions" snippets
2019/05/16 - more work on "calls and functions" snippets
2019/05/15 - a little work on "calls and functions" snippets
2019/05/14 - a little work on "calls and functions" snippets
2019/05/13 - fix issues, merge branches, work on "calls and functions" snippets
2019/05/12 - fix/close 10 issues
2019/05/11 - 3414 words, finish third draft "jumping"
2019/05/10 - set up venv for python stuff, update markdown
2019/05/09 - work on snippets for "calls and functions"
2019/05/08 - work on snippets for "calls and functions"
2019/05/07 - work on snippets for "calls and functions"
2019/05/06 - split up snippets for "calls and functions"
2019/05/05 - 1734 words, third draft "jumping"
2019/05/04 - 1046 words, third draft "jumping"
2019/05/03 - another illustration
2019/05/02 - more illustrations
2019/05/01 - one illustration
2019/04/30 - email
2019/04/29 - photoshop two illustrations
2019/04/28 - photoshop three illustrations
2019/04/27 - ink two illustrations
2019/04/26 - ink two illustrations
2019/04/25 - ink two illustrations
2019/04/24 - draw and photoshop one illustration
2019/04/23 - pencil illustrations
2019/04/22 - pencil illustrations
2019/04/21 - pencil illustrations
2019/04/20 - answers 2 and 3 for "jumping"
2019/04/19 - answer 1 for "jumping"
2019/04/18 - fix rest of grammar examples in "representing code"
2019/04/17 - 1528 words, finish first draft "jumping"
2019/04/16 - 318 words, first draft "jumping"
2019/04/15 - 1049 words, first draft "jumping"
2019/04/14 - 559 words, first draft "jumping"
2019/04/13 - 661 words, first draft "jumping"
2019/04/12 - fix overlapping chapter number, quotes
2019/04/11 - 878 words, first draft "jumping"
2019/04/10 - 828 words, first draft "jumping"
2019/04/09 - use bank 16
2019/04/08 - fix five issues
             (bank 16) 463 words, first draft "jumping"
2019/04/07 - redo illustration for #378
2019/04/06 - work on #378
2019/04/05 - fix two issues
2019/04/04 - 863 words, design note
2019/04/03 - illustrations
2019/04/02 - outline design note
2019/04/01 - research goto considered harmful
2019/03/31 - 155 words outline "jumping back and forth"
2019/03/30 - 829 words outline "jumping back and forth"
2019/03/29 - 409 words outline "jumping back and forth"
2019/03/28 - 457 words outline "jumping back and forth"
2019/03/27 - 278 words outline "jumping back and forth"
2019/03/26 - rename chapter, 309 words outline "jumping back and forth"
2019/03/25 - split and order snippets for "jumping forward and back"
2019/03/24 - publish chapter
2019/03/23 - email
2019/03/22 - email
2019/03/21 - email
2019/03/20 - email
2019/03/19 - email
2019/03/18 - 1921 words, finish third draft "local variables"
2019/03/17 - 3029 words, third draft "local variables"
2019/03/16 - tweak illustration and add caption
2019/03/15 - another image
2019/03/14 - photoshop illustration
2019/03/13 - draw illustration
2019/03/12 - ink and photoshop illustration
2019/03/11 - pencil illustration
2019/03/10 - another illustration
2019/03/09 - ink and photoshop illustration
2019/03/08 - pencil and start inking illustration
2019/03/07 - 2131 words, finish second draft "local variables"
2019/03/06 - 944 words, second draft "local variables"
2019/03/05 - finish off #394
2019/03/04 - more work on #394
2019/03/03 - work on #394
2019/03/02 - work on #394
2019/03/01 - work on #394
2019/02/28 - work on #394
2019/02/27 - 59 words, second draft "local variables" (ginny :( )
2019/02/26 - 554 words, second draft "local variables"
2019/02/25 - 649 words, second draft "local variables"
2019/02/24 - 659 words, second draft "local variables"
2019/02/23 - 3 issues and prs
2019/02/22 - email
2019/02/21 - fix #389
2019/02/20 - 5 bugs
2019/02/19 - 8 bugs
2019/02/18 - 385 words, finish first draft "local variables"
2019/02/17 - 877 words, first draft "local variables"
2019/02/16 - 698 words, first draft "local variables"
2019/02/15 - more work on first draft "local variables"
2019/02/14 - 1224 words, first draft "local variables"
2019/02/13 - 169 words, first draft "local variables"
2019/02/12 - 634 words, first draft "local variables"
2019/02/11 - 395 words, first draft "local variables"
2019/02/10 - 217 words, first draft "local variables"
2019/02/09 - 835 words, finish outline "local variables"
2019/02/08 - 394 words, outline "local variables"
2019/02/07 - 472 words, outline "local variables"
2019/02/06 - email
2019/02/05 - 1 more issue
2019/02/04 - 10 issues
2019/02/03 - 67 words, outline "local variables"
2019/02/02 - 446 words, outline "local variables"
2019/02/01 - fix broken repo, look into broken payhip
2019/01/31 - simplify how "in its own initializer" error is reported, finish ordering snippets
2019/01/30 - work on ordering snippets
2019/01/29 - put chapter online, work on snippets for "local variables"
2019/01/28 - fix 5 issues
2019/01/27 - fix 17 issues
2019/01/26 - fix 5 issues, work on one more
2019/01/25 - fix a few issues, work on #327
2019/01/24 - fix issues
2019/01/23 - 1880 words, finish third draft "global variables", last answer
2019/01/22 - 2733 words, third draft "global variables"
2019/01/21 - photoshop illustration
2019/01/20 - one illustration
2019/01/19 - two illustrations
2019/01/18 - photoshop illustration
2019/01/17 - draw illustration
2019/01/16 - 6 emails
2019/01/15 - edit and tweak "global variables"
2019/01/14 - two answers for challenges in "global variables"
2019/01/13 - finish second draft of "global variables"
2019/01/12 - redo intro to "global variables"
2019/01/11 - 1030 words, second draft "global variables"
2019/01/10 - 800 words, second draft "global variables"
2019/01/09 - 17 emails
2019/01/08 - 10 emails
2019/01/07 - 376 words, second draft "global variables"
2019/01/06 - taxes
2019/01/05 - 361 words, second draft "global variables"
2019/01/04 - 181 words, second draft "global variables"
2019/01/03 - 1811 words, finish first draft "global variables"
2019/01/02 - 1070 words first draft "global variables"
2019/01/01 - 657 words first draft "global variables"
2018/12/31 - 740 words first draft "global variables"
2018/12/30 - challenges
2018/12/29 - titles for quotes
2018/12/28 - 526 words outline "global variables"
2018/12/27 - 230 words outline "global variables"
2018/12/26 - 563 words outline "global variables"
2018/12/25 - 561 words outline "global variables"
2018/12/24 - outlining and notes for "global variables"
2018/12/23 - order snippets for "global variables"
2018/12/22 - start outlining and notes for "global variables"
2018/12/21 - publish chapter, split up snippets for "global variables"
2018/12/20 - get email ready
2018/12/19 - 8 issues
2018/12/18 - 6 issues
2018/12/17 - 2 prs, 4 issues
2018/12/16 - pay taxes
2018/12/15 - write up answer 1, upgrade markdown package, issues, prs
2018/12/14 - work on answer 1
2018/12/13 - 402 words, finish third draft "hash tables"
2018/12/12 - 3135 words, third draft "hash tables"
2018/12/11 - 860 words, third draft "hash tables"
2018/12/10 - 2338 words, third draft "hash tables"
2018/12/09 - 1627 words, third draft "hash tables"
2018/12/08 - fix tombstone illustration text
2018/12/07 - rework prose for delete illustrations
2018/12/06 - three more illustrations
2018/12/05 - photoshop illustration, then do tombstone illustration
2018/12/04 - draw and ink illustration
2018/12/03 - photoshop illustration
2018/12/02 - draw and ink illustration
2018/12/01 - prose for insert sequence
2018/11/30 - photoshop insert illustrations
2018/11/29 - ink and scan insert illustrations
2018/11/28 - pencil illustrations
2018/11/27 - photoshop pigeons
2018/11/26 - pigeon illustration
2018/11/25 - draw pigeons, pencil one illustration
2018/11/24 - draw one illustration
2018/11/23 - couple more prs and bugs
2018/11/22 - merge a few prs
2018/11/21 - 965 words, second draft of "hash tables"
2018/11/20 - 549 words, second draft of "hash tables"
2018/11/19 - 1840 words, second draft of "hash tables"
2018/11/18 - 179 words, second draft of "hash tables"
2018/11/17 - 866 words, second draft of "hash tables"
2018/11/16 - 1074 words, second draft of "hash tables"
2018/11/15 - 257 words, second draft of "hash tables"
2018/11/14 - 796 words, second draft of "hash tables"
2018/11/13 - 1506 words, second draft of "hash tables"
2018/11/12 - 415 words, first draft of "hash tables"
2018/11/11 - 590 words, first draft of "hash tables"
2018/11/10 - 76 words, first draft of "hash tables"
2018/11/09 - 423 words, first draft of "hash tables"
2018/11/08 - 481 words, first draft of "hash tables"
2018/11/07 - 727 words, first draft of "hash tables"
2018/11/06 - 455 words, first draft of "hash tables"
2018/11/05 - 439 words, first draft of "hash tables"
2018/11/04 - 459 words, first draft of "hash tables"
2018/11/03 - 343 words, first draft of "hash tables"
2018/11/02 - 84 words, first draft of "hash tables" (sick)
2018/11/01 - 331 words, first draft of "hash tables"
2018/10/31 - 208 words, first draft of "hash tables"
2018/10/30 - 323 words, first draft of "hash tables"
2018/10/29 - 810 words, first draft of "hash tables"
2018/10/28 - 617 words, first draft of "hash tables"
2018/10/27 - first draft of "hash tables"
2018/10/26 - 642 words, first draft of "hash tables"
2018/10/25 - 733 words, new first draft of "hash tables"
2018/10/24 - outline more on "hash tables"
2018/10/23 - try to figure out what order to introduce concepts
2018/10/22 - rewrite some of "hash tables"
2018/10/21 - 344 words, first draft "hash tables"
2018/10/20 - 186 words, first draft "hash tables"
2018/10/19 - 401 words, first draft "hash tables"
2018/10/18 - 384 words, first draft "hash tables"
2018/10/17 - 221 words, finish outline "hash tables"
2018/10/16 - 351 words, outline "hash tables"
2018/10/15 - finish outlining deletion
2018/10/14 - token amount of work on deletion :(
2018/10/13 - work on deletion a bit
2018/10/12 - use tombstones in hash table
2018/10/11 - benchmark hash table deletion
2018/10/10 - research deleting from hash tables
2018/10/09 - more outlining "hash tables"
2018/10/08 - 327 words, outline "hash tables"
2018/10/07 - 390 words, outline "hash tables"
2018/10/06 - little more outlining "hash tables"
2018/10/05 - more outline "hash tables"
2018/10/04 - 470 words, outline "hash tables"
2018/10/03 - more outlining "hash tables"
2018/10/02 - 603 words, outline "hash tables"
2018/10/01 - 246 words, outline "hash tables"
2018/09/30 - 165 words, outline "hash tables"
2018/09/29 - 148 words, outline "hash tables"
2018/09/28 - finish ordering snippets
2018/09/27 - more ordering snippets
2018/09/26 - start ordering snippets
2018/09/25 - outlining on "hash tables"
2018/09/24 - put strings online
2018/09/23 - slice up snippets for "hash tables"
2018/09/22 - 4288 words, finish third draft "strings"
2018/09/21 - ink and photoshop illustration
2018/09/20 - one illustration, pencil another
2018/09/19 - redo illustration to fix 265
2018/09/18 - fix 282
2018/09/17 - fix two illustrations
2018/09/16 - close two issues
2018/09/15 - fix 269
2018/09/14 - merge 2 prs
2018/09/13 - finish illustration
2018/09/12 - start illustration
2018/09/11 - photoshop one illustration
2018/09/10 - draw and ink one illustration
2018/09/09 - 922 words, third draft "strings"
2018/09/08 - taxes
2018/09/07 - 2695 words, finish second draft "strings"
2018/09/06 - 1972 words, second draft "strings"
2018/09/05 - 516 words, second draft "strings"
2018/09/04 - answers for "strings" challenges
2018/09/03 - 330 words, finish first draft "strings"
2018/09/02 - 754 words, first draft "strings"
2018/09/01 - 1187 words, first draft "strings"
2018/08/31 - 1363 words, first draft "strings"
2018/08/30 - 1101 words, first draft "strings"
2018/08/29 - 505 words, first draft "strings"
2018/08/28 - 485 words, outline "strings"
2018/08/27 - 638 words, outline "strings"
2018/08/26 - 506 words, outline "strings"
2018/08/25 - 680 words, outline "strings"
2018/08/24 - 195 words, outline "strings"
2018/08/23 - 344 words, outline "strings"
2018/08/22 - start outlining
2018/08/21 - finish ordering snippets
2018/08/20 - more ordering snippets
2018/08/19 - 4 prs, 1 issue
2018/08/18 - more ordering snippets, fix aside comments in snippets
2018/08/17 - start ordering snippets
2018/08/16 - finish splitting up snippets, start outlining "strings"
2018/08/15 - 10 emails
2018/08/14 - start slicing up snippets for "strings"
2018/08/13 - put chapter online
2018/08/12 - fix three issues
2018/08/11 - answers for "types of values"
2018/08/10 - 1257 words, finish third draft "types of values"
2018/08/09 - 3203 words, third draft "types of values"
2018/08/08 - photoshop one illustration
2018/08/07 - draw one illustration
2018/08/06 - one illustration
2018/08/05 - redo prose around value size
2018/08/04 - use bank 15
2018/08/03 - three illustrations
2018/08/02 - redo location code in build script
2018/08/01 - handle trailing commas in snippets better
2018/07/31 - 5 issues
2018/07/30 - finish fixing horizontal code scrolling and long lines
2018/07/29 - work on fixing horizontal code scrolling
2018/07/28 - 1 pull request, 2 issues, try to fix another
2018/07/27 - use bank 14
2018/07/26 - 1766 words, finish second first draft "types of values"
2018/07/25 - 933 words, second first draft "types of values"
2018/07/24 - 1000 words, second draft "types of values"
             (bank 15) 1399 words, second draft "types of values"
2018/07/23 - 940 words, finish first draft "types of values"
2018/07/22 - 324 words, first draft "types of values"
2018/07/21 - use bank 13 (hauberk hackathon)
2018/07/20 - 516 words, first draft design note for "compiling expressions"
2018/07/19 - 892 words, first draft "types of values"
2018/07/18 - 824 words, first draft "types of values"
2018/07/17 - 100 words, first draft "types of values"
             (bank 14) 439 words, first draft "types of values"
2018/07/16 - 14 emails
2018/07/15 - 203 words, first draft "types of values"
2018/07/14 - 810 words, outline "types of values"
2018/07/13 - 793 words, outline "types of values"
2018/07/12 - 608 words, outline "types of values"
2018/07/11 - order snippets, start rough outline
2018/07/10 - split out snippets and start organizing "types of values"
2018/07/09 - put chapter online
2018/07/08 - write email
2018/07/07 - email, typos, and bug reports
2018/07/06 - 3388 words, finish third draft "compiling expressions"
2018/07/05 - 446 words, third draft "compiling expressions"
2018/07/04 - 1955 words, third draft "compiling expressions"
2018/07/03 - 7 emails
2018/07/02 - 9 emails
2018/07/01 - one illustration
2018/06/30 - one illustration
2018/06/29 - one illustration (that didn't work out)
2018/06/28 - ink and photoshop illustration
2018/06/27 - pencil illustration
2018/06/26 - research and close one issue
2018/06/25 - one question and answer for "compiling expressions", 5 issues
             (bank 13) one illustration
2018/06/24 - 772 words, finish second draft "compiling expressions", two answers
2018/06/23 - 1499 words, second draft "compiling expressions"
2018/06/22 - one illustration
2018/06/21 - quotes
2018/06/20 - 1543 words, second draft "compiling expressions"
2018/06/19 - 835 words, second draft "compiling expressions"
2018/06/18 - 1242 words, second draft "compiling expressions"
2018/06/17 - fix two more bugs
2018/06/16 - fix #238, other tweaks
2018/06/15 - 14 emails
2018/06/14 - work on #238
2018/06/13 - fix todos, 1 pr, 1 issue
2018/06/12 - 883 words, first draft "compiling expressions"
2018/06/11 - 177 words, first draft "compiling expressions"
2018/06/10 - 636 words, first draft "compiling expressions"
2018/06/09 - 750 words, first draft "compiling expressions"
2018/06/08 - 710 words, first draft "compiling expressions"
2018/06/07 - 827 words, first draft "compiling expressions"
2018/06/06 - 410 words, first draft "compiling expressions"
2018/06/05 - 488 words, first draft "compiling expressions"
2018/06/04 - 931 words, first draft "compiling expressions"
2018/06/03 - 367 words, first draft "compiling expressions"
2018/06/02 - finish outline "compiling expressions"
2018/06/01 - 379 words outline "compiling expressions"
2018/05/31 - 342 words outline "compiling expressions"
2018/05/30 - more outlining "compiling expressions"
2018/05/29 - more outlining "compiling expressions"
2018/05/28 - taxes, fix footer css on toc
2018/05/27 - more outlining "compiling expressions"
2018/05/26 - 284 words, outline "compiling expressions"
2018/05/25 - finish slicing and ordering snippets for "compiling expressions"
2018/05/24 - fix early return from init() in jlox
2018/05/23 - 2 pull requests, 4 issues
2018/05/22 - more slicing snippets for "compiling expressions"
2018/05/21 - more slicing snippets for "compiling expressions"
2018/05/20 - start slicing snippets for "compiling expressions"
2018/05/19 - use bank 12
2018/05/18 - post chapter
2018/05/17 - email
2018/05/16 - email and a couple of issues
2018/05/15 - email
2018/05/14 - 2208 words, finish third draft "scanning on demand"
2018/05/13 - 1748 words, third draft "scanning on demand"
2018/05/12 - 1330 words, third draft "scanning on demand"
2018/05/11 - 1 pr, better snippet locations inside c typedefs
2018/05/10 - better snippet locations inside c typedefs
2018/05/09 - 5 bugs
2018/05/08 - 3 prs, 3 bugs
2018/05/07 - 1606 words, finish second draft "scanning on demand"
2018/05/06 - 943 words, second draft "scanning on demand"
2018/05/05 - 1174 words, second draft "scanning on demand"
             (bank 12) 1022 words, second draft "scanning on demand"
2018/05/04 - 920 words, second draft "scanning on demand"
2018/05/03 - axolotl illustration
2018/05/02 - draw and photoshop one illustration
2018/05/01 - draw and photoshop one illustration
2018/04/30 - photoshop two illustrations
2018/04/29 - draw another illustration
2018/04/28 - draw one illustration
2018/04/27 - challenges and answers for "scanning on demand"
2018/04/26 - 889 words, first draft "scanning on demand"
2018/04/25 - 649 words, first draft "scanning on demand"
2018/04/24 - tweak em dashes
2018/04/23 - 994 words, first draft "scanning on demand"
2018/04/22 - use bank 11
2018/04/21 - 651 words, first draft "scanning on demand"
2018/04/20 - 725 words, first draft "scanning on demand"
2018/04/19 - 213 words, first draft "scanning on demand"
2018/04/18 - 857 words, first draft "scanning on demand"
2018/04/17 - 406 words, first draft "scanning on demand"
2018/04/16 - 879 words, finish outline "scanning on demand"
2018/04/15 - 1038 words outline "scanning on demand"
2018/04/14 - 540 words outline "scanning on demand"
2018/04/13 - use bank 10
2018/04/12 - switch-based keyword recognizer
2018/04/11 - finish ordering snippets
2018/04/10 - outlining and ordering snippets
2018/04/09 - use bank 9
2018/04/08 - finish slicing up snippets
2018/04/07 - fix typos
             (bank 11) start slicing up "scanning on demand"
2018/04/06 - publish "a virtual machine", fix typos
2018/04/05 - redo reallocate()
2018/04/04 - 5 pull requests, 10 bugs
2018/04/03 - 2000 words, finish third draft "a virtual machine"
             (bank 10) 2034 words, third draft "a virtual machine"
2018/04/02 - 1937 words, third draft "a virtual machine"
2018/04/01 - ink and photoshop two illustrations
2018/03/30 - pencil two illustrations
2018/03/29 - pancakes
2018/03/28 - photoshop three illustrations
2018/03/27 - ink more illustrations
2018/03/26 - one more illustration
2018/03/25 - record screencast, edit video
2018/03/24 - shoot video, start editing
             (bank 9) edit
2018/03/23 - sketch another illustration
2018/03/22 - one illustration
2018/03/21 - two illustrations
2018/03/20 - illustratin'
2018/03/19 - practice illustration
2018/03/18 - work on script and set
2018/03/17 - answer challenge three
2018/03/16 - answer two challenges
2018/03/15 - test video
2018/03/14 - 1763 words, second draft "a virtual machine"
2018/03/13 - 923 words, second draft "a virtual machine"
2018/03/12 - 1762 words, second draft "a virtual machine"
2018/03/11 - 727 words, second draft "a virtual machine"
2018/03/10 - 624 words, second draft "a virtual machine"
2018/03/09 - first draft of note for "a virtual machine"
2018/03/08 - outline note for "a virtual machine"
2018/03/07 - 402 words, challenges and finish first draft "a virtual machine"
2018/03/06 - 844 words, first draft "a virtual machine"
2018/03/05 - 506 words, first draft "a virtual machine"
2018/03/04 - 615 words, first draft "a virtual machine"
2018/03/03 - 922 words, first draft "a virtual machine"
2018/03/02 - 691 words, first draft "a virtual machine"
2018/03/01 - 362 words, first draft "a virtual machine"
2018/02/28 - 319 words, first draft "a virtual machine"
2018/02/27 - 585 words, first draft "a virtual machine"
2018/02/26 - 535 words, finish main outline "a virtual machine"
2018/02/25 - 824 words outline "a virtual machine"
2018/02/24 - 932 words outline "a virtual machine"
2018/02/23 - finish ordering snippets
2018/02/22 - order snippets
2018/02/21 - finish slicing "a virtual machine", start outlining
2018/02/20 - mostly finish slicing "a virtual machine" snippets
2018/02/19 - post chapter online
2018/02/18 - print style pr, 3 bugs, other stuff
2018/02/17 - 7 pull requests
2018/02/16 - second answer for "chunks of bytecode"
2018/02/15 - one answer for "chunks of bytecode"
2018/02/14 - 1921 words, finish third draft of "chunks of bytecode"
2018/02/13 - 2110 words, third draft of "chunks of bytecode"
2018/02/12 - 923 words, third draft of "chunks of bytecode"
2018/02/11 - 246 words, third draft of "chunks of bytecode" (service :( )
2018/02/10 - 514 words, third draft of "chunks of bytecode"
2018/02/09 - 1887 words, third draft of "chunks of bytecode"
2018/02/08 - more slicing "a virtual machine" snippets
2018/02/07 - start slicing "a virtual machine" snippets
2018/02/06 - more quotes, 7 emails
2018/02/05 - 2 emails, quote research
2018/02/04 - one more illustration
2018/02/03 - table for realloc()
2018/02/02 - photoshop 2 illustrations
2018/02/01 - ink 1 1/2 illustrations
2018/01/31 - pencil 2 illustrations for "chunks of bytecode", ink 1/2
2018/01/30 - illustration for "chunks of bytecode"
2018/01/29 - taxes
2018/01/28 - 1941 words, finish second draft "chunks of bytecode"
2018/01/27 - 2192 words, second draft "chunks of bytecode"
2018/01/26 - 1386 words, second draft "chunks of bytecode"
2018/01/25 - 499 words, second draft "chunks of bytecode" (flying)
2018/01/24 - 768 words, second draft "chunks of bytecode"
2018/01/23 - use bank 8
2018/01/22 - ink and photoshop ast illustration
             (bank 8) 1056 words, second draft "chunks of bytecode"
2018/01/21 - pencil ast illustration
2018/01/20 - figure out how to illustrate chunks
2018/01/19 - design note for "chunks of bytecode"
2018/01/18 - outline design note for "chunks of bytecode"
2018/01/17 - challenges for "chunks of bytecode"
2018/01/16 - 919 words, first draft "chunks of bytecode"
2018/01/15 - implement run-length encoding of line info
2018/01/14 - 775 words, first draft "chunks of bytecode"
2018/01/13 - 737 words, first draft "chunks of bytecode"
2018/01/12 - 186 words, first draft "chunks of bytecode"
2018/01/11 - 250 words, first draft "chunks of bytecode"
2018/01/10 - 254 words, first draft "chunks of bytecode" (4 :( )
2018/01/09 - 130 words, first draft "chunks of bytecode" (traveling)
2018/01/08 - 434 words, first draft "chunks of bytecode"
2018/01/07 - 653 words, first draft "chunks of bytecode"
2018/01/06 - 201 words, first draft "chunks of bytecode"
2018/01/05 - 216 words, first draft "chunks of bytecode"
2018/01/04 - 190 words, first draft "chunks of bytecode"
2018/01/03 - 147 words, first draft "chunks of bytecode", try storing ip on stack
2018/01/02 - 852 words, first draft "chunks of bytecode"
2018/01/01 - 623 words, first draft "chunks of bytecode"
2017/12/31 - work on optimizing clox
2017/12/30 - 537 words, first draft "chunks of bytecode"
2017/12/29 - finish outlining "chunks of bytecode"
2017/12/28 - more outlining "chunks of bytecode"
2017/12/27 - more outlining "chunks of bytecode"
2017/12/26 - 233 words outline "chunks of bytecode"
2017/12/25 - more work outlining "chunks of bytecode"
2017/12/24 - more work organizing "chunks of bytecode"
2017/12/23 - 149 words outline "chunks of bytecode"
2017/12/22 - 164 words outline "chunks of bytecode"
2017/12/21 - finish splitting snippets for "chunks of bytecode"
2017/12/20 - split up and organize snippets for "chunks of bytecode"
2017/12/19 - more outline "chunks of bytecode"
2017/12/18 - 186 words, outline "chunks of bytecode"
2017/12/17 - add generated ast appendix and link to chapters
2017/12/16 - add appendices and grammar appendix
2017/12/15 - 9 emails
2017/12/14 - 10 emails
2017/12/13 - 3 emails
2017/12/12 - 11 emails
2017/12/11 - put "inheritance" online
2017/12/10 - one more illustration for "inheritance"
2017/12/09 - merge 6 prs and close 7 bugs
2017/12/08 - 2723 words, finish third draft of "inheritance"
2017/12/07 - 1401 words, third draft of "inheritance"
2017/12/06 - incorporate illustration into text
2017/12/05 - process illustration
2017/12/04 - draw one illustration
2017/12/05 - draw and process one illustration
2017/12/03 - draw and process two illustrations
2017/12/02 - more quote digging
2017/12/01 - look for quotes
2017/11/30 - more splitting up code for "bytecode", outlining
2017/11/29 - start splitting up code for "bytecode"
2017/11/28 - prose for challenge 1 answer in inheritance
2017/11/27 - code for challenge 1 answer in inheritance
2017/11/26 - research c3 linearization
2017/11/25 - answer challenge 2 in inheritance
2017/11/24 - prose for challenge 3 answer in inheritance
2017/11/23 - code for challenge 3 answer in inheritance
2017/11/22 - 1105 words, finish second draft "inheritance"
2017/11/21 - 485 words, second draft "inheritance"
2017/11/20 - 580 words, second draft "inheritance"
2017/11/19 - 212 words, second draft "inheritance"
2017/11/18 - 751 words, second draft "inheritance"
2017/11/17 - 759 words, second draft "inheritance" (still sick)
2017/11/16 - 203 words, second draft "inheritance" (still sick)
2017/11/15 - tweaks and asides
2017/11/14 - 1526 words, finish first draft "inheritance"
2017/11/13 - broke the chain, sick and forgot, made up on 11/14
2017/11/12 - 303 words, first draft "inheritance" (sick)
2017/11/11 - 659 words, first draft "inheritance"
2017/11/10 - 453 words, first draft "inheritance"
2017/11/09 - 594 words, first draft "inheritance"
2017/11/08 - long aside on "sub-"
2017/11/07 - 535 words, first draft "inheritance"
2017/11/06 - finish challenges and outline for "inheritance"
2017/11/05 - outline conclusion
2017/11/04 - mostly done with outline, challenges
2017/11/03 - work on outline
2017/11/02 - research oop history
2017/11/01 - start outline and taking notes for "inheritance"
2017/10/31 - email, split up snippets for "inheritance"
2017/10/30 - post new chapter
2017/10/29 - write email
2017/10/28 - 1139 words, finish third draft of "classes"
2017/10/27 - fix #156
2017/10/26 - one more illustration for "classes"
2017/10/25 - 859 words, third draft of "classes"
2017/10/24 - 1579 words, third draft of "classes"
2017/10/23 - 1865 words, third draft of "classes"
2017/10/22 - 1147 words, third draft of "classes"
2017/10/21 - 769 words, third draft of "classes"
2017/10/20 - fix #147, #153, #131
2017/10/19 - 7 pull requests and a few bugs
2017/10/18 - redo section around this to take illustrations into account
2017/10/17 - photoshop two illustrations and work into text
2017/10/16 - draw two more illustrations for "classes"
2017/10/15 - finish fourth illustration for "classes"
2017/10/14 - start working on illustration four for "classes"
2017/10/13 - third illustration for "classes"
2017/10/12 - second illustration for "classes"
2017/10/11 - first illustration for "classes"
2017/10/10 - 968 words, finish second draft of "classes"
2017/10/09 - 1460 words, second draft of "classes"
2017/10/08 - 1318 words, second draft of "classes"
2017/10/07 - 1481 words, second draft of "classes"
2017/10/06 - 1138 words, second draft of "classes"
2017/10/05 - 902 words, second draft of "classes"
2017/10/04 - finish answers for "classes"
2017/10/03 - work on answers for "classes"
2017/10/02 - 897 words, design note for "classes", finish first draft
2017/10/01 - work on challenges for "classes"
2017/09/30 - 179 words, outline design note for "classes"
2017/09/29 - 2155 words across two sessions, first draft "classes"
2017/09/28 - 828 words, first draft "classes"
2017/09/27 - 835 words, first draft "classes"
2017/09/26 - broke the chain, busy in aarhus and forgot :(
             made up on 09/29
2017/09/25 - 417 words, first draft "classes"
2017/09/24 - 712 words, first draft "classes"
2017/09/23 - 891 words, first draft "classes"
2017/09/22 - 344 words, first draft "classes"
2017/09/21 - 327 words, first draft "classes"
2017/09/20 - finish rough outline "classes"
2017/09/19 - 773 words outline "classes"
2017/09/18 - 278 words outline "classes"
2017/09/17 - finish ordering and slicing snippets
2017/09/16 - more ordering and slicing up snippets
2017/09/15 - more ordering and slicing up snippets
2017/09/14 - last answers for chapter 11
2017/09/13 - first three answers for chapter 11
2017/09/11 - split up snippets and start outlining chapter 12
2017/09/11 - publish chapter
2017/09/10 - fix 1 bug, prep email
2017/09/09 - 4 prs and 5 bugs
2017/09/08 - 1853 words, finish third draft chapter 11
2017/09/07 - 1428 words, third draft chapter 11
2017/09/06 - 1101 words, third draft chapter 11
2017/09/05 - ~1870 words, third draft chapter 11
2017/09/04 - 1 more illustration
2017/09/03 - work illustrations into chapter
2017/09/02 - ~517 words, third draft chapter 11
2017/09/01 - ~500 words, third draft chapter 11
2017/08/31 - photoshop 4 illustrations
2017/08/30 - ink 4 illustrations
2017/08/29 - 1724 words, third draft chapter 11
2017/08/28 - 409 words, third draft chapter 11
2017/08/27 - sketch illustrations
2017/08/26 - explain semantic analysis
2017/08/25 - 1450 words, finish second draft chapter 11, delete ~50
2017/08/24 - 945 words, second draft chapter 11
2017/08/23 - 1322 words, second draft chapter 11, delete ~120
2017/08/22 - 849 words, second draft chapter 11, delete ~130
2017/08/21 - use bank 7
2017/08/20 - 760 words, second draft chapter 11, delete ~120
2017/08/19 - 828 words, second draft chapter 11, delete 140
2017/08/18 - 1000 words, finish first draft chapter 11
             (bank 7) 955 words first draft chapter 11
2017/08/17 - 874 words, first draft chapter 11
2017/08/16 - 958 words, first draft chapter 11
2017/08/15 - 592 words, first draft chapter 11
2017/08/14 - 546 words, first draft chapter 11
2017/08/13 - 829 words, first draft chapter 11
2017/08/12 - 315 words, first draft chapter 11
2017/08/11 - 490 words, first draft chapter 11
2017/08/10 - 142 words, first draft chapter 11 (camping, sick :( )
2017/08/09 - finish merging old and new outline
2017/08/08 - redo 321 words outline chapter 11
2017/08/07 - finish outline and code snippet splitting
2017/08/06 - work on reorganizing outline
2017/08/05 - 806 words outline chapter 11
2017/08/04 - 400 words outline chapter 11
2017/08/03 - 539 words outline chapter 11
2017/08/02 - 876 words outline chapter 11
2017/08/01 - label code snippets for chapter 11
2017/07/31 - publish chapter
2017/07/30 - prep email, fix #122
2017/07/29 - email
2017/07/28 - redo direction illustration, fix bugs
2017/07/27 - address 3 prs and work on 1 bug
2017/07/26 - 2092 words, finish third draft "functions"
2017/07/25 - 2917 words, third draft "functions"
2017/07/24 - fix #123
2017/07/23 - 1521 words, third draft "functions"
2017/07/22 - integrate illustrations into text
2017/07/21 - photoshop five illustrations
2017/07/20 - draw three illustrations
2017/07/19 - draw two illustrations
2017/07/18 - photoshop two illustrations, other chapter tweaks
2017/07/17 - draw two illustrations
2017/07/16 - 1120 words, finish second draft chapter 10, write answers
2017/07/15 - 953 words, second draft chapter 10
2017/07/14 - 1230 words, second draft chapter 10
2017/07/13 - 3007 words, second draft chapter 10
2017/07/12 - 1785 words, finish draft chapter 10
2017/07/11 - 885 words, first draft chapter 10
2017/07/10 - 564 words, first draft chapter 10
2017/07/09 - 1152 words, first draft chapter 10
2017/07/08 - 1073 words, first draft chapter 10
2017/07/07 - 631 words, first draft chapter 10
2017/07/06 - 547 words, finish outline chapter 10, first draft challenges
2017/07/05 - 169 words, outline chapter 10 :(
2017/07/04 - 948 words, outline chapter 10
2017/07/03 - 796 words, outline chapter 10
2017/07/02 - 526 words, outline chapter 10
2017/07/01 - finish organizing code snippets for chapter 10
2017/06/30 - more slicing and organizing code snippets for chapter 10
2017/06/29 - code snippets for chapter 10
2017/06/28 - email and bug fixes
2017/06/27 - address 3 prs and 4 bugs
2017/06/26 - publish "control flow"
2017/06/25 - address 2 prs and 4 bugs
2017/06/24 - answers for chapter 9
2017/06/23 - third draft of chapter 9, all 5837 words
2017/06/22 - illustrate dangling else
2017/06/21 - add support for aside images above the text
2017/06/20 - illustrate turing machine
2017/06/19 - 2499 words, finish second draft chapter 9 (cut ~80)
2017/06/18 - 1818 words, second draft chapter 9 (cut ~250)
2017/06/17 - answers for chapter 8
2017/06/16 - 688 words, finish first draft chapter 9
2017/06/15 - redo turing machine part
2017/06/14 - 1666 words first draft chapter 9
2017/06/13 - ~1440 words first draft chapter 9
2017/06/12 - 854 words first draft chapter 9
2017/06/11 - hack script to estimate completion date
2017/06/10 - 897 words outline chapter 9
2017/06/09 - 700 words outline chapter 9
2017/06/08 - 648 words outline chapter 9
2017/06/07 - split up snippets for control flow, start outlining
2017/06/06 - fix snippet labeling (#97) and 3 other bugs
2017/06/05 - work on fixing snippet labeling (#97)
2017/06/04 - paperwork, 1 pr, 5 bugs
2017/06/03 - 6 prs, 4 bug
2017/06/02 - 12 emails
2017/06/01 - put chapter 8 online
2017/05/31 - one pr, two bugs
2017/05/30 - one pr, two bugs
2017/05/29 - show date range in copyright, one pr
2017/05/28 - resolve 3 bugs
2017/05/27 - merge 2 prs, fix 6 bugs
2017/05/26 - 1484 words, finish third draft chapter 8
2017/05/25 - 1412 words, third draft chapter 8
2017/05/24 - 2392 words, third draft chapter 8 (cut ~80)
2017/05/23 - 1219 words, third draft chapter 8
2017/05/22 - 1198 words, third draft chapter 8 (cut ~60)
2017/05/21 - 579 words, third draft chapter 8 (cut ~50)
2017/05/20 - scan and process illustrations
2017/05/19 - cactus illustration
2017/05/18 - environment illustrations
2017/05/17 - letter and scan brain illustration
2017/05/16 - brain illustration 2
2017/05/15 - brain illustration
2017/05/14 - 1663 words, finish second draft chapter 8 (cut ~100)
2017/05/13 - 1051 words, second draft chapter 8 (cut ~50)
2017/05/12 - 1348 words, second draft chapter 8 (cut ~40)
2017/05/11 - 1342 words, second draft chapter 8 (cut ~100)
2017/05/10 - 1236 words, second draft chapter 8
2017/05/09 - 1090 words, second draft chapter 8 (cut ~100)
2017/05/08 - 786 words, second draft chapter 8
2017/05/07 - tinker with splitting chapter 8 in two, replace quote
2017/05/06 - 1621 words, finish first draft chapter 8
2017/05/05 - 907 words, first draft chapter 8
2017/05/04 - 648 words, first draft chapter 8
2017/05/03 - 708 words, first draft chapter 8
2017/05/02 - 736 words, first draft chapter 8
2017/05/01 - 1147 words, first draft chapter 8
2017/04/30 - 1027 words, first draft chapter 8
2017/04/29 - 245 words, first draft chapter 8
2017/04/28 - 558 words, first draft chapter 8
2017/04/27 - 377 words and quote, first draft chapter 8
2017/04/26 - 803 words, first draft chapter 8
2017/04/25 - 362 words, first draft chapter 8
2017/04/24 - finish design note outline, full outline for chapter 8
2017/04/23 - 371 words, sketch outline for chapter 8 design note
2017/04/22 - 1248 words, outline chapter 8
2017/04/21 - 958 words, outline chapter 8
2017/04/20 - 917 words, outline chapter 8
2017/04/19 - 904 words, outline chapter 8
2017/04/18 - ~200 words, outline chapter 8
2017/04/17 - use bank 6
2017/04/16 - use bank 5
2017/04/15 - 413 words, outline chapter 8
2017/04/14 - use bank 4
2017/04/13 - use bank 3
2017/04/12 - use bank 2
2017/04/11 - use bank 1
2017/04/10 - finish splitting snippets for chapter 8, rough outline
2017/04/09 - start splitting up snippets for chapter 8
2017/04/08 - email
2017/04/07 - put chapter 7 online
2017/04/06 - prep email, resolve 2 bugs
2017/04/05 - 2000 words, third draft chapter 7
           - (bank 6) 2209 words, finish third draft chapter 7
2017/04/04 - skeleton illustration
2017/04/03 - muffin illustration, sketch skeleton
2017/04/02 - lightning illustration for chapter 7
2017/04/01 - 5 pull requests, start working on glossary
2017/03/31 - write design note, 1200 words second draft chapter 7
             (bank 5) 1511 words, finish second draft chapter 7
2017/03/30 - 1405 words, second draft chapter 7
2017/03/29 - 10 emails
2017/03/28 - answers to chapter 7 questions
2017/03/27 - 354 words, finish first draft chapter 7
2017/03/26 - 600 words, first draft chapter 7
           - (bank 4) 609 words, first draft chapter 7
2017/03/25 - 1000 words, first draft chapter 7
           - (bank 3) 690 words, first draft chapter 7
2017/03/24 - 623 words, first draft chapter 7
2017/03/23 - tweak outline, challenges chapter 7
2017/03/22 - finish outline chapter 7
2017/03/21 - ~400 words outline chapter 7
2017/03/20 - put chapter 6 online
2017/03/19 - order snippets
2017/03/18 - write email, slice chapter 7 code into snippets
2017/03/17 - fix bugs and merge prs
2017/03/16 - 2000 words, third draft chapter 6
           - (bank 2) 2136 words, finish third draft chapter 6
2017/03/15 - 1609 words, third draft chapter 6
2017/03/14 - 1 more illustration for chapter 6
2017/03/13 - 1 illustration for chapter 6
2017/03/12 - 2 illustrations for chapter 6
           - (bank 1) 1 1/2 illustrations for chapter 6
2017/03/11 - 1031 words, finish second draft chapter 6
2017/03/10 - 780 words, second draft chapter 6
2017/03/09 - 569 first draft design note for chapter 6
2017/03/08 - outline design note for chapter 6
2017/03/07 - 313 words, second draft chapter 6
2017/03/06 - 3514 words, second draft chapter 6
2017/03/05 - 488 words, finish first draft of chapter 6, answers for challenges
2017/03/04 - 1017 words, first draft chapter 6
2017/03/03 - 1014 words, first draft chapter 6
2017/03/02 - 708 words, first draft chapter 6
2017/03/01 - rework part of first draft chapter 6
2017/02/28 - 590 words, first draft chapter 6
2017/02/27 - precedence table and css for tables
2017/02/26 - 688 words, first draft chapter 6
2017/02/25 - 576 words, first draft chapter 6
2017/02/24 - finish outlining chapter 6 (except design note)
2017/02/23 - 447 words outline chapter 6
2017/02/22 - finish redoing panic mode recovery
2017/02/21 - more panic mode hacking
2017/02/20 - revisit panic mode synchronization
2017/02/19 - more outlining, look into error recovery
2017/02/18 - 188 words outline parsing expressions, split up code
2017/02/17 - email and bug fixes, start working on chapter 6
2017/02/16 - remove topics from toc, publish chapter 5
2017/02/15 - 2499 words, finish third draft representing code
2017/02/14 - 1819 words, third draft representing code
2017/02/13 - 1592 words, third draft representing code
2017/02/12 - 2533 words, finish second draft representing code
2017/02/11 - 2737 words, second draft representing code
2017/02/10 - 793 words, second draft representing code
2017/02/09 - last two illustrations
2017/02/08 - rows and columns illustrations
2017/02/07 - table illustration
2017/02/06 - play grammar illustration
2017/02/05 - evaluate tree illustration
2017/02/04 - 519 words, finish first draft representing code
2017/02/03 - 319 words, allow hiding snippet location in build script
2017/02/02 - 1354 words, first draft representing code
2017/02/01 - 352 words, first draft representing code (sick :( )
2017/01/31 - close 6 issues and 2 pull requests
2017/01/30 - 1250 words, first draft representing code
2017/01/29 - 315 words, first draft representing code (sick :( )
2017/01/28 - 800 words, first draft representing code
2017/01/27 - 1076 words, first draft representing code
2017/01/26 - 348 words, first draft representing code
2017/01/25 - first draft and answers for challenges representing code
2017/01/24 - finish outline representing code
2017/01/23 - >1000 words, outline representing code
2017/01/22 - 738 words, outline representing code
2017/01/21 - 579 words, outline representing code
2017/01/20 - 736 words, outline representing code
2017/01/19 - more work on README, fix #24, start on chapter 5
2017/01/18 - fix bug accessing "this" in super calls (#20), README
2017/01/17 - fix 11 issues, lots more email
2017/01/16 - email and bug fixes
2017/01/15 - go live!
2017/01/14 - fourth draft of scanning, tweak styles, copyright image
2017/01/13 - link to next chapter in footer, tweak code styles
2017/01/12 - 3004 words, finish third draft, write answers to challenges
2017/01/11 - 2244 words, third draft scanning
2017/01/10 - lexigator illustration
2017/01/09 - lexeme illustration
2017/01/08 - 2449 words, finish second draft scanning
2017/01/07 - 1445 words, second draft scanning
2017/01/06 - 1556 words, second draft scanning
2017/01/05 - redo scanning headers, third draft part intro, explain snippet in intro
2017/01/04 - second draft part intro, up nav links, rename parts
2017/01/03 - first draft part ii intro, hunt down quotes
2017/01/02 - figure out a license
2017/01/01 - 772 words design note for scanner, aside markers in code
2016/12/31 - 1081 words first draft scanner, mostly done
2016/12/30 - 1085 words first draft scanner
2016/12/29 - 722 words first draft scanner
2016/12/28 - 561 words first draft scanner
2016/12/27 - 1127 words first draft scanner
2016/12/26 - finish outlining and splitting, reallow multiline strings
2016/12/25 - fix some bugs in chapter splitting, make multiline strings and error
2016/12/24 - slice up more scanning code into snippets
2016/12/23 - allow named snippets
2016/12/22 - handle surrounding context in code snippets
2016/12/21 - simplify error reporting
2016/12/20 - 1063 words, outline scanning
2016/12/19 - fix "lox language" to make print not a function
2016/12/18 - optimize refreshing in build server
2016/12/17 - better validation of transclusion
2016/12/16 - finish transclusion code
2016/12/15 - work on code to transclude code chunks
2016/12/14 - prototype lookup illustration
2016/12/13 - finish class lookup illustration
2016/12/12 - work on class lookup illustration
2016/12/11 - one more draft, read out loud, of lox chapter
2016/12/10 - 1445, finish third draft lox chapter
2016/12/09 - 2041 words, third draft lox chapter
2016/12/08 - ~2300 words, third draft lox chapter
2016/12/07 - second draft, entire lox chapter
2016/12/06 - finish first draft lox, design note, style design note
2016/12/05 - 1092 words first draft of lox
2016/12/04 - 1325 words first draft of lox
2016/12/03 - 916 words first draft of lox
2016/12/02 - 996 words first draft of lox
2016/12/01 - 881 words first draft of lox
2016/11/30 - delete pancake
2016/11/29 - 325 words first draft lox (sick gretch :( )
2016/11/28 - 385 words first draft lox, syntax highlighter
2016/11/27 - finish outlining lox chapter, toy with adding lambdas to lox
2016/11/26 - outline lox chapter
2016/11/25 - research and notes for lox chapter
2016/11/24 - research and notes for lox chapter
2016/11/23 - ~3000 words, finish third draft map of territory
2016/11/22 - start outlining lox chapter
2016/11/21 - ~2500 words, third draft map of territory
2016/11/20 - finish reorganizing map of territory, add numbers to sections
2016/11/19 - ~1000 words third draft of map of territory, reorganize a bunch
2016/11/18 - start third draft of map of territory
2016/11/17 - third draft of index, welcome, and introduction
2016/11/16 - ink and scan venn diagram
2016/11/15 - sketch languages venn diagram
2016/11/14 - finish and photoshop plant
2016/11/13 - more work on plant drawing
2016/11/12 - draw plants
2016/11/11 - process tokens, draw and scan ast, sketch plants
2016/11/10 - draw tokens
2016/11/09 - finish redoing characters illustration
2016/11/08 - work on redoing characters illustration
2016/11/07 - characters illustration, toy with illustration background color
2016/11/06 - little languages illustration
2016/11/05 - scan yak, illustrate bootstrapping
2016/11/04 - illustrate yak and elephant in tree
2016/11/03 - revise 1949 words, finish second draft map of territory
2016/11/02 - revise 601 words map of territory, add kildall aside
2016/11/01 - revise ~200 words map of territory :(
2016/10/31 - revise 917 words map of territory
2016/10/30 - revise 961 words map of territory
2016/10/29 - revise ~1000 words, and rewrite some of the beginning of map
2016/10/28 - 438 words revise map of territory
2016/10/27 - revise 1041 words introduction (banked on 10/24)
2016/10/26 - revise 948 words intro
2016/10/25 - revise 465 words intro
2016/10/24 - revise 1030 words, welcome and introduction
2016/10/23 - work on exercises, remove glossary
2016/10/22 - finish first draft intro, split into two chapters
2016/10/21 - 1062 words intro
2016/10/20 - 430 words transpiler
2016/10/19 - 235 words runtime, fix code highlighting, other edits
2016/10/18 - 599 words code generation
2016/10/17 - rewrite optimization section ~540 words
2016/10/16 - ~1000 words intro, tweak styles
2016/10/15 - ~1000 words of intro
2016/10/14 - reorganize and weave together existing intro prose
2016/10/13 - figure out new outline for introduction
2016/10/12 - find intro quote, start reorganizing
2016/10/11 - 814 words first draft introduction
2016/10/10 - 972 words first draft introduction
2016/10/09 - 165 words, draft welcome, 776 words first draft introduction
2016/10/08 - finish lettering, draw mountain
2016/10/07 - illustrating (mostly lettering)
2016/10/06 - start inking mountain
2016/10/05 - start sketching full size mountain illustration
2016/10/04 - test out illustration size on pages
2016/10/03 - practice illustrations
2016/10/02 - revise intro outline, outline welcome part
2016/10/01 - 1340 words, finish outlining introduction, rename parts
2016/09/30 - 1457 words outline introduction
2016/09/29 - split notes into chapters, 701 words outline introduction
2016/09/28 - finish topics, merge function call and user-defined functions chapters
2016/09/27 - tweak mobile arrow styles, more chapter topics
2016/09/26 - favicon, fill in more chapter topics
2016/09/25 - photoshop new index background
2016/09/24 - fix some todos, take photos for index background
2016/09/23 - set up mailchimp stuff
2016/09/22 - put sign-up form on pages
2016/09/21 - fix collapsing nav
2016/09/20 - responsive table of contents
2016/09/19 - redo font sizes and spacing for mobile
2016/09/18 - start setting up mailing list, more work on index
2016/09/17 - work on index page
2016/09/16 - get rid of "reaching the summit"
2016/09/15 - finish getting rid of chapter 4
2016/09/14 - combine chapters 4 and 5
2016/09/13 - finish putting new logotype into design
2016/09/12 - start working on putting new logotype into design
2016/09/11 - hand letter second logotype
2016/09/10 - hand letter logotype
2016/09/09 - table/mobile styles for toc, work on build.py
2016/09/08 - more work contents design
2016/09/07 - work on templates, toc, build script
2016/09/06 - stop using tombstones to delete from hash table
2016/09/05 - more poking around hash table benchmarks
2016/09/04 - more poking around hash table benchmarks
2016/09/03 - benchmark hash table implementation
2016/09/02 - work on table of contents template
2016/09/01 - work on styles, headers, table of contents, etc.
2016/08/31 - work on restyling navigation
2016/08/30 - unify "statements" and "global variables" chapters, rename "inheritance" chapter
2016/08/29 - move native functions into function call chapter
2016/08/28 - try storing ip in register, added benchmark runner
2016/08/27 - add optimization chapter -- nan tagging, hash masking
2016/08/26 - hack on adding stack traces to jlox
2016/08/25 - vox -> lox everywhere
2016/08/24 - more name noodling, clean up repo
2016/08/23 - noodle on language names
2016/08/22 - start writing pancake language, update book chapters
2016/08/21 - put bytecode tracing into chapters, "native functions" chapter
2016/08/20 - get "inheritance" split out
2016/08/19 - get "methods and initializers" split out
2016/08/18 - track down uninitialized memory bug in split chapters
2016/08/17 - get "classes and instances" split out
2016/08/16 - get "garbage collection" split out, running, and tested
2016/08/15 - get "closures" split out, running, and tested
2016/08/14 - get "functions" split out, running, and tested
2016/08/13 - get "jumping forward and back" split out, running, and tested
2016/08/12 - get "local variables" running and tested
2016/08/11 - option to run all interpreters in test runner
2016/08/10 - "global variables" chapter, with tests
2016/08/09 - finish "statements"
2016/08/08 - start working on "statements" chapter for cvox
2016/08/07 - split up and land changes, generate diffs for c chapters
2016/08/06 - add diff generation to makefile, reorganize natives in jvox, toy with hasField() native
2016/08/05 - toy with adding "delete" statement
2016/08/04 - update hash table code to handle delete and tombstones correctly
2016/08/03 - work on hash table deletion, write hash table test code
2016/08/02 - work on "hash tables" and string interning
2016/08/01 - get "strings" working and start on "hash tables"
2016/07/31 - get "types of values" working
2016/07/30 - get compiling expressions working
2016/07/29 - get scanning on demand chapter working, start work on compiling expressions
2016/07/28 - get virtual machine chapter working following chunks, update chapters
2016/07/27 - get chunks chapter working
2016/07/26 - noodle on whether to introduce chunks
2016/07/25 - get virtual machine chapter working, plan chapters
2016/07/24 - work on virtual machine chapter
2016/07/23 - start working on splitting up c chapters
2016/07/22 - reorganize stuff now that we have a print statement
2016/07/21 - optimize empty for clauses in cvox
2016/07/20 - add a dedicated print statement, implement for in cvox
2016/07/19 - renumber and reorder chapters
2016/07/18 - move block scope to statements chapter and closures to functions
2016/07/17 - implement c-style for loop in jvox, tests
2016/07/16 - rework resolver, start working on for
2016/07/15 - get inheritance chapter working
2016/07/14 - get chapter 10 (classes) working
2016/07/13 - tests for chapters 6, 7, and 8
2016/07/12 - tests for chapters 2, 4, and 5
2016/07/11 - get test.py set up to run and track chapter versions
2016/07/10 - work on function and block chapters
2016/07/09 - rename chapters and get control flow chapter working
2016/07/08 - more sorting out sections around variables and block scope
2016/07/07 - start separating out block scope from other scopes
2016/07/06 - hack on resolution and variable lookup
2016/07/05 - work on chapter 6, reorganize ast generation stuff
2016/07/04 - finish removing context, get chapter 5 running
2016/07/03 - get split chapter 4 working, start on 5, get rid of context in visitors
2016/07/02 - clean up ast printer and generator, more chapter splitting
2016/07/01 - clean up framework code for jvox
2016/06/30 - work on getting intellij set up for split chapters
*** every day after this ***
2016/06/24 - finish adding chapter markers, start writing script to split
2016/06/23 - start interleaving chapter notes into real jvox code
2016/06/22 - use stack of maps for local scopes in jvox
2016/06/21 - more work organizing java code into chapters
2016/06/17 - start organizing java code into chapters
2016/06/16 - fit java code in 72 columns
2016/06/13 - tweak css to fit 72 columns of code
2016/06/10 - lots of clean up
2016/06/09 - make the gc strategy more realistic
2016/06/07 - try to clean up some stuff in cvox
2016/06/04 - fix some bugs, work on outline, split out Chunk and value.h
2016/05/31 - clean up after unboxing work
2016/05/30 - more work on unboxing values
2016/05/29 - start working on unboxed values
2016/05/28 - more mobile tweaks, handle limits in cvox
2016/05/24 - start working on mobile layout
2016/05/19 - show and style file name by code samples
2016/05/18 - null -> nil in jvox
2016/05/17 - null -> nil in cvox
2016/05/14 - handle rebound superclass
2016/05/12 - cache hashes in strings
2016/05/11 - include function name in stack traces, other todos
2016/05/10 - take advantage of string interning
2016/05/09 - constructors -> init()
2016/05/08 - more work on super
2016/05/07 - finish cleaning up errors
2016/05/06 - super calls in cvox
2016/05/05 - better cvox error reporting
2016/05/04 - start improving cvox error reporting
2016/05/03 - revamp jvox scanner/parser
2016/05/02 - more error improvements
2016/05/01 - work on better syntax error reporting
2016/04/30 - hash table
2016/04/29 - minor code clean up
2016/04/27 - string interning
2016/04/26 - hash strings
2016/04/24 - constructors in cvox
2016/04/23 - copy-down inheritance, closurizing methods
2016/04/12 - inheritance
2016/04/08 - property -> field, start working on inheritance
2016/04/07 - make tables not objects, other clean up
2016/04/06 - method calls, this, port more wren tests
2016/04/05 - finish resolving in jvox
2016/04/04 - add resolving step to jvox
2016/04/03 - more scope corner cases, handle wrong types in operators
2016/04/02 - get jvox working mostly like cvox
2016/04/01 - start syncing up jvox to latest semantics
2016/03/31 - fix a bunch of corner cases around scope
2016/03/30 - miscellaneous clean up
2016/03/29 - more work on classes, mainly fields
2016/03/28 - more work on classes
2016/03/27 - start working on classes
2016/03/26 - finish closures
2016/03/25 - start working on closures
2016/03/24 - simplify compiling constants
2016/03/23 - pointers instead of array indices to refer to stack, bug fixes
2016/03/22 - return statement, go back to single stack and upvalues
2016/03/21 - function calls, arguments, parameters
2016/03/20 - more work on functions
2016/03/19 - start working on functions, heap allocate frames
2016/03/18 - runtime error locations, call frames, fix bugs around variables
2016/03/14 - local variables, calls, native functions, run scripts, makefile, etc.
2016/03/13 - scheme-like semantics for top level variables, branching
2016/03/06 - start implementing global variables
2016/03/04 - bools, comparison, strings, grouping, unary
2016/03/03 - switch to mark/sweep
2016/03/02 - create ObjFunction at beginning of compile
2016/03/01 - start writing compiler, compile infix operators
2016/02/29 - sketch out more object types
2016/02/28 - start slapping together gc and vm for cvox
2016/02/06 - lots of parser/scanner clean up, start collecting quotes
2016/02/05 - split out java interpreter into per-package versions
2016/02/04 - work on resolver
2016/02/03 - work on resolver
2016/02/01 - cleaner implementation of locals
2016/01/30 - runtime error reporting
2016/01/29 - get rest of java interpreter working, tests, etc.
2016/01/28 - port parser and half of interpreter to java, repl
2016/01/27 - start porting lexer to java
2016/01/19 - null literal
2016/01/18 - return statement
2016/01/16 - comments
2016/01/15 - run from files, properties, classes, lots of other stuff
2016/01/14 - start writing interpreter, error reporting, flow control, etc.
2016/01/13 - parse functions and classes, repl
2016/01/09 - properties and tests for calls and properties
2016/01/08 - assignment
2016/01/07 - parse expression and block statements
2016/01/06 - logical expressions
2016/01/05 - parser tests
2016/01/04 - use metaprogramming for ast types
2016/01/02 - get js working in browser again, unary expressions
2015/11/19 - more work on styles and toc in build script
2015/11/18 - css for narrow desktop, build script

lots of untracked stuff before this...

================================================
FILE: note/names.txt
================================================
music:
-----
fuzz
jive
jam
mojo
haze
quid
moxy
pick
howl
funk
vox

rocks and minerals:
------------------
agate
beryl
flint
galena
jet
jade
jasper
marl
mica
nickel
onyx
peridot
quartz
sard
shale
tufa

mountain-related:
----------------
crag
tor

climbing:
--------
bolt - A point of protection permanently installed in a hole drilled into the rock, to which a metal hanger is attached, having a hole for a carabiner or ring.
cairn - A distinctive pile of stones placed to designate a summit or mark a trail, often above the treeline.
Cam - A spring-loaded device used as protection.
Chock - A mechanical device, or a wedge, used as anchors in cracks. A naturally occurring stone wedged in a crack.
Col - A small pass or "saddle" between two peaks. Excellent for navigation as when standing on one it's always down in two, opposite, directions and up in the two directions in between those.
Crag - A small area with climbing routes, often just a small cliff face or a few boulders.
Crux - The most difficult portion of a climb.
Deck - The ground. To hit the ground, usually the outcome of a fall.
Dyno - A dynamic move to grab a hold that would otherwise be out of reach. Generally both feet will leave the rock face and return again once the target hold is caught. Non-climbers would call it a jump or a leap.
Jib - A particularly small foothold, usually only large enough for the big toe, sometimes relying heavily on friction to support weight.
Jug - A shortened term for Jug Hold, both noun and verb.
Jug hold - A large, easily held hold. Also known simply as a jug.
Nub - A little hold that only a few fingers can grip, or the tips of the toes.
Peg - A piton.
Serac - A large ice tower.
Sprag - A type of hand position where the fingers and thumb are opposed.

food:
----
pancake
crepe
roux
lox

pnw:
---
moss
elk
pika
marmot
fir
cedar
hemlock


================================================
FILE: note/objects.txt
================================================
A couple of issues related to working with objects:

* How do we distinguish field access from "getters"?
* How do we distinguish method calls from invoking a function stored in a field?
* Can methods be torn off?
* How do we handle properties/methods on built-in types?
* How do we handle operators? Are they methods or special?
* What object represents a class?
* How are objects constructed?

Within a method, there are a few namespaces in play:
- The lexical namespace of local variables and then the surrounding global oens.
- The namespace of fields on the instance.
- The namespace of methods on the class (and then any inherited methods).

The simplest way to handle these is to have distinct syntaxes for each. We
could do:

- Bare names for variables.
- "@" or some other sigil for fields.
- Explicit property access (including on "this") for methods.

That still leaves an ambiguity between a nullary method that returns a function
versus a method call with parameters. The simplest solution is to do what Java
does and not have getters, though that's gross for stuff like list.length(). If
we don't want to have tear-offs, that might be the best solution.

---

For the class object, let's take a page from JS and make it the constructor.
You then invoke it just like a function to create an instance. (I.e. no "new"
keyword.)

This is the bare minimum needed by the "class object" -- to be a generator for
instances. Since this is just a teaching language, we can mention but not
deal with metaclasses, static methods, etc.

class Vector {
  Vector(x, y) {
    @x = x; @y = y;
  }

  length() {

  }
}

This won't be *just* a function. It needs some additional stuff: in particular
the method set that instances use and a superclass reference. But it's
a superset of what a function can do.

---

It is nice to use based "." style for properties, both on this and other
objects. It's familiar to users coming from C, JS, etc:

    foo.bar = "value";

It does cause a couple of ambiguities:

    foo.bar;

Is that:

A. Accessing a field "bar" on foo?
B. Closurizing a reference to the method "bar" on foo?

Likewise:

    foo.bar();

Is that:

C. Invoking the method "bar" on foo with zero arguments?
D. Accessing the field "bar" on foo, which returns a function, then calling
   that with zero arguments?

We obviously need to support A and C. B is really difficult given how we
currently compile "this". We'd need to compile it to an upvalue for every
method just in case it gets closed over. But then for normal invocations, we
would then have to allocate a closure every time. We don't want to do that.

A couple of options:

1. Come up with a separate OP_THIS that handles both the closurized and regular
   call cases. But how?
2. Always store this in the first slot, even when it's closed over. When
   calling a function (not a method), copy the caller's zero slot into the
   callee's. In other words, instead of storing the function in slot zero for
   function calls, store the surrounding "this".

   Hmm, wait. That's wrong. A function should use lexical scope to find "this",
   not dynamic.
3. Not allow closurization.

Currently leaning towards 3.

D is a little annoying, but I think users would expect it to work. The only
downside is that it means we have to do two lookups on every method call, first
to look for a field (which shadows the method) and then to look for a method.

Another option is to have different syntax for fields (like @foo) and no
getters. That keeps all cases distinct, at the expense of making the very handy
foo.bar not even be valid syntax.

For now, let's try sticking with the current syntax and just not allow B. I'll
see what kind of perf hit D causes and go from there.


================================================
FILE: note/outline.md
================================================
**TODO: when can we introduce a print statement/function?**

needs to happen before statements and flow control otherwise those aren't
visible to user.

- Warming Up
    - Introduction
        - who book is for
        - who am i
            - doodling languages in notebook
            - always fascinated
            - seemed like magic
            - iStudio
            - paternity
            - Dart
        - why learn languages?
            - in full programming career, will end up doing something related to
              language
            - good way to learn lots of techniques: recursion, trees, graphs,
              state machines, memory management, optimization
            - hard, training with weights on
            - fun
            - dispell magic
        - structure of book
        - languages used in impl
        - what's in book
        - what's not in book
        - end goal is high quality, efficient interpreter suitable for real use
        - to get there, narrow path through space, not broad survey
        - will point to alternatives to explore on own
        - learn enough to carry conversation with professional lang person
    - The Vox Language
        - intro to full language we'll be implementing
        - ebnf
    - The Pancake Language
        - basic phases and terminology of interpreter
        - simple stack-based language
- Practice (Java)
    - Framework
        - repl
        - interpreters run from source
        - test framework
    - Scanning
        - tokens
        - whitespace
        - regex
        - comments
        - numbers
            - leading zeroes
            - floating point
            - leading and trailing "."
            - range
            - negative
        - token value
        - strings
        - token type
        - escaping
        - errors
        - maximal munch
        - fortran parsing identifiers without whitespace
        - significant indentation
        - state machine for identifiers
        - ex: self-assignment and increment
        - ex: scientific and hex
        - ex: significant indentation and newlines
        - ex: escapes
        - eagerly scan to list of tokens
    - Parsing Expressions
        - ast
        - metaprogramming the ast types
        - recursive descent
        - lookahead
        - ex: "needs more input" for multi-line repl
    - Tree Walk Interpreting
        - evaluating operands
        - recursion
        - arithmetic
        - visitor pattern
        - aside: interpreter pattern is putting interpret methods on nodes
          - makes it possible to add new node types
        - values versus ast nodes for literals
        - dynamic typing and conversions
        - errors
    - Variables
        - statements versus expressions
        - declaration
        - assignment
        - variable references
        - scope
        - undefined names
        - block scope
    - Control Flow
        - if
        - and and or
        - while
        - for
    - Functions
        - parsing calls
        - '(' as infix operator
        - built in fns
        - user-defined fns
        - parameters and arguments
        - call stack
        - closures
        - ffi?
        - tail call optimization
        - arity mismatch
    - Resolution
        - compile errors
        - recursion and mutual recursion
        - decorating an ast
        - symbol tables
        - name binding
        - early versus late binding
    - Classes
        - classes
        - prototypes?
        - this
        - properties
        - methods
        - dynamic dispatch
        - constructors
    - Inheritance
        - inheritance
        - super calls
    - Lists and Loops
        - list type
        - subscript operator
        - subscript setter
        - for syntax
        - iterator protocol
        - desugaring
        - ex: make string implement protocol
        TODO: Cut this?

TODO: Still needs a lot of work:

- Performance (C)
    - Framework
    - A Virtual Machine
        - stack
        - for now, Value is just a double and OP_CONSTANT uses the argument as
          an immediate int value so we don't need a constant table
        - bytecode
        - hand-author and run some bytecode
    - Scanning
        - pull based lazy scanning
        - zero-alloc tokens
        - talk about state machine for keywords?
    - Compiling Expressions
        - top-down operator precedence parsing
        - single-pass compiling
        ^ can now compile and run arithmetic exprs
    - Representing Objects
        - numbers, strings, bools, null
        - dynamic typing
        ^ now can handle "str" + "another"
    - Garbage Collection
        ^ since previous chapter needs to heap alloc stuff, need to manage it
        - roots
        ??? we don't have any objects that store references to other objects
            yet, so there is no traversal happening
    - String Interning and Symbols
        - string interning
        - fast equality
        - hashing?
        - separate symbol types
        - intern all or some strings
        - gcing interned strings
    - Variables
        - statements versus expressions
    - Control Flow
        - branching instructions

    - Functions
        - upvalues

    TODO: other stuff...
    - constant pools
    - functions
    - symbol tables
    - nan tagging
    - copy down inheritance


principles

- each top-level section builds one interpreter starting from scratch
- since the book will be "published" online serially, the chapters should be
  ordered such that they are useful even while the book is incomplete. that
  probably means doing all of parsing for the whole grammar isn't a good idea:
  it's boring until later chapters do something with it.

- kinds of content in a chapter
  - main narrative with prose and code
  - historical context and people
  - further things to learn
  - omitted alternatives
  - review questions: ask things chapter did explain
  - challenges: add new features or compare other languages
  - quotation at beginning of each chapter
  - engineering considerations: error handling, maintainability, etc.
  - design and pyschology: usability, aesthetics, popularity, learnability, etc.

stuff to maybe include:

- error-handling
    - stack traces and line information
    - runtime errors
- variables
    - scopes as dictionaries
    - name binding of locals
    - variables and assignment
    - scope
- object model
    - objects as dictionaries
    - objects
    - classes
    - prototypes
    - nan tagging
    - object representation
    - symbol tables and hash tables
    - strings
    - arrays
    - hash tables (for internal use and as object in language)
    - dynamic dispatch
- syntax
    - aesthetics and usability of syntax design
    - backjumping and infinite lookahead or context-sensitive grammars


================================================
FILE: note/research.txt
================================================
http://en.wikipedia.org/wiki/PL/0

"Structure and Interpretation of Efficient Interpreters" (in dropbox)

http://blog.analogmachine.org/2011/09/20/lets-build-a-compiler/

"Compiler Construction" by Wirth

================================================
FILE: note/scope.txt
================================================
Mostly following Scheme (R5RS):

- Accessing an undefined name is a runtime error. It is not a compile time
  error.

  (define (eval-foo) foo) ; OK, though foo is undefined.
  (eval-foo)              ; Runtime error.
  (define foo "ok")       ; Now foo is defined.
  (eval-foo)              ; Now it works.

  - Allows mutual recursion at the top level.
  - Does so in a way that's friendly to the REPL and incremental evaluation.

- Assigning to an undefined name is a runtime error. It is not a compile time
  error.

  (define (setbar) (set! bar "wat"))  ; OK, though bar is undefined.
  (setbar)                            ; Runtime error.
  (define bar "ok")                   ; Now bar is defined.
  (setbar)                            ; Now it works.

  - Avoids the mushiness of treating a typo in an assignment as "let's just
    create a new global variable", which is probably not what the user wants.

- Top level variables can be defined multiple times.

  (define foo "1")
  foo               ; 1
  (define foo "2")  ; OK.
  foo               ; 2

  - REPL friendly.

- A variable is not in scope in its own initializer. It is declared after its
  initializer is run.

  (define foo foo) ; Error: foo is not defined.

  (define bar "outer")
  (let ((bar bar)) bar) ; "outer"

  (define baz 1)
  (define baz (+ baz 1))  ; Refer to previous definition of baz.
  baz                     ; 2

  - This is different from classes and functions, but that's probably OK.

  - If we want it to be a compile error for a local to be in scope in its own
    initializer, we'd need some sort of resolving step in jvox.

  - If we want it to be a runtime error, that error would always occur, so it
    feels weird to defer that. And it would make cvox slower, or require us to
    emit some special case "always throw runtime error here" code.


================================================
FILE: note/struct sizes.txt
================================================
typedef struct Obj2 {
  ObjType type;
} Obj2;

typedef struct ObjString2 {
  Obj2 obj;
  int length;
  char* chars;

} ObjString2;

int main(int argc, const char* argv[]) {
  printf("sizeof(Obj) %ld\n", sizeof(Obj2));
  printf("sizeof(ObjType) %ld\n", sizeof(ObjType));
  printf("offset(Obj, type) %ld\n", offsetof(Obj2, type));
  printf("sizeof(ObjString) %ld\n", sizeof(ObjString2));
  printf("offset(ObjString, obj) %ld\n", offsetof(ObjString2, obj));
  printf("offset(ObjString, length) %ld\n", offsetof(ObjString2, length));
  printf("offset(ObjString, chars) %ld\n", offsetof(ObjString2, chars));
  printf("sizeof(Value) %ld\n", sizeof(Value));
  printf("sizeof(ValueType) %ld\n", sizeof(ValueType));
  printf("offset(Value, ValueType) %ld\n", offsetof(Value, type));
  printf("offset(Value, as) %ld\n", offsetof(Value, as));
  printf("sizeof(Obj*) %ld\n", sizeof(Obj*));
}

sizeof(Obj) 4
sizeof(ObjType) 4
offset(Obj, type) 0
sizeof(ObjString) 16
offset(ObjString, obj) 0
offset(ObjString, length) 4
offset(ObjString, chars) 8
sizeof(Value) 16
sizeof(ValueType) 4
offset(Value, ValueType) 0
offset(Value, as) 8
sizeof(Obj*) 8

================================================
FILE: note/style guide.md
================================================
## Person

Figuring out when to use "we" versus "it" when talking about the code is hard.
It's important to be clear because the prose talks about what the reader needs
to do "define this method", "replace this line", etc. and what the code needs
to do while it's running "match this token", etc.

But it gets really awkward to always use "it" for describing what the code does.
So the rough rules are:

1.  When walking through a hypothetical execution of the code, use "we". Most
    prose explaining the code is like this.

2.  When describing how the code must be changed, what the reader must
    mechanically do, use "we" (and not "you").

3.  When describing how a piece of code works in general, or if it otherwise
    reads better, use "it".

## Formatting

*   Class names are not in code font: "The PrettyPrinter class". Type names in C
    are also formatted normally: Value, Obj, etc. Even built-in types like
    double and uint16_t.

*   File names and extensions are quoted:

    > The file "Expr.java" has extension ".java".

*   C module names are quoted:

    > The "debug" module.

TODO: How do we style keywords used in headers and subheaders?

### Bold and italics

*   The first time a technical term is defined, make it bold. Don't quote it,
    even when referring to the term directly.

*   Consider using italics for a new technical term that isn't explicitly
    defined in order to highlight that it is jargon.

*   In a bullet list, if the bold part is a sentence or part of a sentence,
    emphasize it like normal prose. If the bullet item starts with a standalone
    term, separate it from the subsequent prose with an en dash.

*   Big-O notation: "*O(n)*".

### Code font

*   References to statements like "`if` statement" and "`switch`". Use "`else`
    clause" to refer to that part of an `if` statement's *sytax*, but "then
    branch" and "else branch" to refer to those *concepts*.

*   Use "`return` statement", but "early return". In almost all other cases,
    "return" uses normal type ("return value", "return from", etc.), except when
    "the `return`" refers to a return statement.

*   "Class declaration", but "`class` statement".

*   When referring to the Boolean values true and false, put them in code font,
    as in "returns `true`". Use normal text when referring to truth or falsehood
    in general.

*   Opcodes: "`OP_RETURN`".

*   `nil`, `null` (Java), and `NULL` (C). Simply "null" when used as a verb as
    in "null out the field".

## Punctuation

*   Prose before a Java or C code snippet ends in `:` if the last sentence is
    not a complete sentence or directly refers to the subsequent code. End in
    `.` if it is a reasonable-sounding sentence on its own. This is mainly so
    that we don't use a gratuitous amount of `:` at the end of nearly every
    paragraph.

*   On the other hand, prose before illustrations, Lox examples, and grammar
    snippets can use `:` even when a complete sentence, if the sentence refers
    to the subsequent code or picture.

### Hyphenation

*   If part of a word is emphasized, like "*re*-define", hyphenate at the point
    where the italics change.

*   Hyphenate "left-hand side" and "right-hand side".

*   Always hyphenate:

    *   left-associative
    *   right-associative
    *   non-associative
    *   left-recursive
    *   l-value
    *   r-value
    *   finite-state machine

*   Never hyphenate:

    *    left recursion
    *    call stack
    *    call frame (but "CallFrame" when referring to the struct)

*   Hyphenate when preceding a noun, but not otherwise ("A first-class function
    is first class."):

    *    first class
    *    lowest precedence
    *    start up ("start up the interpreter" versus "startup time")

## Usage

*   Numbers in prose are usually spelled as words when there is a single word
    for them: one, eleven, etc. However, numbers that refer to binary digits are
    always 0 or 1.

### Capitalization

*   Follow common usage to determine which acronyms and abbreviations are all
    caps or not. "COBOL", "Fortran", etc.

*   Design pattern names are capitalized when referring to the pattern itself,
    but not code that implements the pattern (unless the code is the name of the
    actual class). As in: "ExpressionVisitor is a visitor class that implements
    the Visitor pattern."

### Word list

*     opcode
*     Boolean
*     lookup
*     I/O
*     "null" when referring to the null byte at the end of a string


================================================
FILE: note/todo.txt
================================================
Print:

- Order proof from IngramSpark.

eBook:

- Fix all TODOs in asset/ebook/*.
- Create wider cover image for non-Kindle.
- Style TOC page.
- Assign ISBN numbers:
  https://www.myidentifiers.com/title_registration?isbn=978-0-9905829-3-9&icon_type=New
- Get example snippet in chapter one looking right.
- Make eBook exporter handle Kindle.
- Generate PDF version.
  - Include cover.
  - Get table of contents links working.
- Style tables (see "Chunks of Bytecode").
- Check out on a few different readers.
- Go through whole book and see how it looks.
- Make sure inline images look OK.

Web:

- Link to stores.
- Export sample chapter PDF.
- Remove "not done" script, templates and styles.
- Replace "work in progress" header with something about print edition.
- Add snippets for remaining chapters to compile_snippets to pin down which
  snippets reach a working point.


================================================
FILE: site/.htaccess
================================================
ErrorDocument 404 /404.html

Redirect /beta http://journal.stuffwithstuff.com/2012/12/19/the-impoliteness-of-overriding-methods/
Redirect /budget https://steveklabnik.com/writing/the-language-strangeness-budget
Redirect /dragon https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools
Redirect /finch http://finch.stuffwithstuff.com/
Redirect /koan http://wiki.c2.com/?ClosuresAndObjectsAreEquivalent
Redirect /locality http://gameprogrammingpatterns.com/data-locality.html
Redirect /lua5 https://www.lua.org/doc/jucs05.pdf
Redirect /ports https://github.com/munificent/craftinginterpreters/wiki/Lox-implementations
Redirect /pratt http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
Redirect /prototypes http://gameprogrammingpatterns.com/prototype.html
Redirect /repo https://github.com/munificent/craftinginterpreters
Redirect /singleton http://gameprogrammingpatterns.com/singleton.html
Redirect /state http://gameprogrammingpatterns.com/state.html
Redirect /tests https://github.com/munificent/craftinginterpreters/tree/master/test
Redirect /wizard https://mitpress.mit.edu/sites/default/files/sicp/index.html
Redirect /wren https://wren.io/


================================================
FILE: site/404.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>404 Page Not Found &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />

</head>
<body>

<nav class="wide">
  <a href="/"><img src="image/logotype-small.png" title="Crafting Interpreters"></a>
  <div class="contents">
    <h2><a href="#top"><small>&nbsp;</small> Table of Contents</a></h2>
    <ul>
      <li><a href="#welcome"><small>I</small>Welcome</a></li>
      <li><a href="#a-tree-walk-interpreter"><small>II</small>A Tree-Walk Interpreter</a></li>
      <li><a href="#a-bytecode-virtual-machine"><small>III</small>A Bytecode Virtual Machine</a></li>
      <li><a href="#backmatter"><small>&#10087;</small>Backmatter</a></li>
    </ul>
        <div class="prev-next">
        <a href="index.html" title="Crafting Interpreters" class="left">&larr;&nbsp;Previous</a>
        <a href="index.html" title="Crafting Interpreters">&uarr;&nbsp;Up</a>
        <a href="welcome.html" title="Welcome" class="right">Next&nbsp;&rarr;</a>
    </div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype-small.png" title="Crafting Interpreters"></a>
<a href="index.html" title="Crafting Interpreters" class="prev">←</a>
<a href="welcome.html" title="Welcome" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype-small.png" title="Crafting Interpreters"></a>
  <div class="expandable">
    <h2><a href="#top"><small>&nbsp;</small> Table of Contents</a></h2>
    <ul>
      <li><a href="#welcome"><small>I</small>Welcome</a></li>
      <li><a href="#a-tree-walk-interpreter"><small>II</small>A Tree-Walk Interpreter</a></li>
      <li><a href="#a-bytecode-virtual-machine"><small>III</small>A Bytecode Virtual Machine</a></li>
      <li><a href="#backmatter"><small>&#10087;</small>Backmatter</a></li>
    </ul>
        <div class="prev-next">
        <a href="index.html" title="Crafting Interpreters" class="left">&larr;&nbsp;Previous</a>
        <a href="index.html" title="Crafting Interpreters">&uarr;&nbsp;Up</a>
        <a href="welcome.html" title="Welcome" class="right">Next&nbsp;&rarr;</a>
    </div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="contents">

<h1>404 &ndash; Page Not Found</h1>

<p>You seem to have reached a dead end. Did you get lost? Did I mislead you?
Either way, you probably want to <a href="/">go back to the start.</a></p>

<footer>
Hand-crafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2020</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/a-bytecode-virtual-machine.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>A Bytecode Virtual Machine &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h2><small>III</small>A Bytecode Virtual Machine</h2>

<ul>
    <li><a href="chunks-of-bytecode.html"><small>14</small>Chunks of Bytecode</a></li>
    <li><a href="a-virtual-machine.html"><small>15</small>A Virtual Machine</a></li>
    <li><a href="scanning-on-demand.html"><small>16</small>Scanning on Demand</a></li>
    <li><a href="compiling-expressions.html"><small>17</small>Compiling Expressions</a></li>
    <li><a href="types-of-values.html"><small>18</small>Types of Values</a></li>
    <li><a href="strings.html"><small>19</small>Strings</a></li>
    <li><a href="hash-tables.html"><small>20</small>Hash Tables</a></li>
    <li><a href="global-variables.html"><small>21</small>Global Variables</a></li>
    <li><a href="local-variables.html"><small>22</small>Local Variables</a></li>
    <li><a href="jumping-back-and-forth.html"><small>23</small>Jumping Back and Forth</a></li>
    <li><a href="calls-and-functions.html"><small>24</small>Calls and Functions</a></li>
    <li><a href="closures.html"><small>25</small>Closures</a></li>
    <li><a href="garbage-collection.html"><small>26</small>Garbage Collection</a></li>
    <li><a href="classes-and-instances.html"><small>27</small>Classes and Instances</a></li>
    <li><a href="methods-and-initializers.html"><small>28</small>Methods and Initializers</a></li>
    <li><a href="superclasses.html"><small>29</small>Superclasses</a></li>
    <li><a href="optimization.html"><small>30</small>Optimization</a></li>
</ul>


<div class="prev-next">
    <a href="inheritance.html" title="Inheritance" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="chunks-of-bytecode.html" title="Chunks of Bytecode" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="inheritance.html" title="Inheritance" class="prev">←</a>
<a href="chunks-of-bytecode.html" title="Chunks of Bytecode" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h2><small>III</small>A Bytecode Virtual Machine</h2>

<ul>
    <li><a href="chunks-of-bytecode.html"><small>14</small>Chunks of Bytecode</a></li>
    <li><a href="a-virtual-machine.html"><small>15</small>A Virtual Machine</a></li>
    <li><a href="scanning-on-demand.html"><small>16</small>Scanning on Demand</a></li>
    <li><a href="compiling-expressions.html"><small>17</small>Compiling Expressions</a></li>
    <li><a href="types-of-values.html"><small>18</small>Types of Values</a></li>
    <li><a href="strings.html"><small>19</small>Strings</a></li>
    <li><a href="hash-tables.html"><small>20</small>Hash Tables</a></li>
    <li><a href="global-variables.html"><small>21</small>Global Variables</a></li>
    <li><a href="local-variables.html"><small>22</small>Local Variables</a></li>
    <li><a href="jumping-back-and-forth.html"><small>23</small>Jumping Back and Forth</a></li>
    <li><a href="calls-and-functions.html"><small>24</small>Calls and Functions</a></li>
    <li><a href="closures.html"><small>25</small>Closures</a></li>
    <li><a href="garbage-collection.html"><small>26</small>Garbage Collection</a></li>
    <li><a href="classes-and-instances.html"><small>27</small>Classes and Instances</a></li>
    <li><a href="methods-and-initializers.html"><small>28</small>Methods and Initializers</a></li>
    <li><a href="superclasses.html"><small>29</small>Superclasses</a></li>
    <li><a href="optimization.html"><small>30</small>Optimization</a></li>
</ul>


<div class="prev-next">
    <a href="inheritance.html" title="Inheritance" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="chunks-of-bytecode.html" title="Chunks of Bytecode" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">III</div>
  <h1 class="part">A Bytecode Virtual Machine</h1>

<p>Our Java interpreter, jlox, taught us many of the fundamentals of programming
languages, but we still have much to learn. First, if you run any interesting
Lox programs in jlox, you&rsquo;ll discover it&rsquo;s achingly slow. The style of
interpretation it uses<span class="em">&mdash;</span>walking the AST directly<span class="em">&mdash;</span>is good enough for <em>some</em>
real-world uses, but leaves a lot to be desired for a general-purpose scripting
language.</p>
<p>Also, we implicitly rely on runtime features of the JVM itself. We take for
granted that things like <code>instanceof</code> in Java work <em>somehow</em>. And we never for a
second worry about memory management because the JVM&rsquo;s garbage collector takes
care of it for us.</p>
<p>When we were focused on high-level concepts, it was fine to gloss over those.
But now that we know our way around an interpreter, it&rsquo;s time to dig down to
those lower layers and build our own virtual machine from scratch using nothing
more than the C standard library<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>

<footer>
<a href="chunks-of-bytecode.html" class="next">
  Next Chapter: &ldquo;Chunks of Bytecode&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/a-map-of-the-territory.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>A Map of the Territory &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">A Map of the Territory<small>2</small></a></h3>

<ul>
    <li><a href="#the-parts-of-a-language"><small>2.1</small> The Parts of a Language</a></li>
    <li><a href="#shortcuts-and-alternate-routes"><small>2.2</small> Shortcuts and Alternate Routes</a></li>
    <li><a href="#compilers-and-interpreters"><small>2.3</small> Compilers and Interpreters</a></li>
    <li><a href="#our-journey"><small>2.4</small> Our Journey</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="introduction.html" title="Introduction" class="left">&larr;&nbsp;Previous</a>
    <a href="welcome.html" title="Welcome">&uarr;&nbsp;Up</a>
    <a href="the-lox-language.html" title="The Lox Language" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="introduction.html" title="Introduction" class="prev">←</a>
<a href="the-lox-language.html" title="The Lox Language" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">A Map of the Territory<small>2</small></a></h3>

<ul>
    <li><a href="#the-parts-of-a-language"><small>2.1</small> The Parts of a Language</a></li>
    <li><a href="#shortcuts-and-alternate-routes"><small>2.2</small> Shortcuts and Alternate Routes</a></li>
    <li><a href="#compilers-and-interpreters"><small>2.3</small> Compilers and Interpreters</a></li>
    <li><a href="#our-journey"><small>2.4</small> Our Journey</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="introduction.html" title="Introduction" class="left">&larr;&nbsp;Previous</a>
    <a href="welcome.html" title="Welcome">&uarr;&nbsp;Up</a>
    <a href="the-lox-language.html" title="The Lox Language" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">2</div>
  <h1>A Map of the Territory</h1>

<blockquote>
<p>You must have a map, no matter how rough. Otherwise you wander all over the
place. In <em>The Lord of the Rings</em> I never made anyone go farther than he could
on a given day.</p>
<p><cite>J. R. R. Tolkien</cite></p>
</blockquote>
<p>We don&rsquo;t want to wander all over the place, so before we set off, let&rsquo;s scan
the territory charted by previous language implementers. It will help us
understand where we are going and the alternate routes others have taken.</p>
<p>First, let me establish a shorthand. Much of this book is about a language&rsquo;s
<em>implementation</em>, which is distinct from the <em>language itself</em> in some sort of
Platonic ideal form. Things like &ldquo;stack&rdquo;, &ldquo;bytecode&rdquo;, and &ldquo;recursive descent&rdquo;,
are nuts and bolts one particular implementation might use. From the user&rsquo;s
perspective, as long as the resulting contraption faithfully follows the
language&rsquo;s specification, it&rsquo;s all implementation detail.</p>
<p>We&rsquo;re going to spend a lot of time on those details, so if I have to write
&ldquo;language <em>implementation</em>&rdquo; every single time I mention them, I&rsquo;ll wear my
fingers off. Instead, I&rsquo;ll use &ldquo;language&rdquo; to refer to either a language or an
implementation of it, or both, unless the distinction matters.</p>
<h2><a href="#the-parts-of-a-language" id="the-parts-of-a-language"><small>2&#8202;.&#8202;1</small>The Parts of a Language</a></h2>
<p>Engineers have been building programming languages since the Dark Ages of
computing. As soon as we could talk to computers, we discovered doing so was too
hard, and we enlisted their help. I find it fascinating that even though today&rsquo;s
machines are literally a million times faster and have orders of magnitude more
storage, the way we build programming languages is virtually unchanged.</p>
<p>Though the area explored by language designers is vast, the trails they&rsquo;ve
carved through it are <span name="dead">few</span>. Not every language takes the
exact same path<span class="em">&mdash;</span>some take a shortcut or two<span class="em">&mdash;</span>but otherwise they are
reassuringly similar, from Rear Admiral Grace Hopper&rsquo;s first COBOL compiler all
the way to some hot, new, transpile-to-JavaScript language whose &ldquo;documentation&rdquo;
consists entirely of a single, poorly edited README in a Git repository
somewhere.</p>
<aside name="dead">
<p>There are certainly dead ends, sad little cul-de-sacs of CS papers with zero
citations and now-forgotten optimizations that only made sense when memory was
measured in individual bytes.</p>
</aside>
<p>I visualize the network of paths an implementation may choose as climbing a
mountain. You start off at the bottom with the program as raw source text,
literally just a string of characters. Each phase analyzes the program and
transforms it to some higher-level representation where the semantics<span class="em">&mdash;</span>what
the author wants the computer to do<span class="em">&mdash;</span>become more apparent.</p>
<p>Eventually we reach the peak. We have a bird&rsquo;s-eye view of the user&rsquo;s program
and can see what their code <em>means</em>. We begin our descent down the other side of
the mountain. We transform this highest-level representation down to
successively lower-level forms to get closer and closer to something we know how
to make the CPU actually execute.</p><img src="image/a-map-of-the-territory/mountain.png" alt="The branching paths a language may take over the mountain." class="wide" />
<p>Let&rsquo;s trace through each of those trails and points of interest. Our journey
begins on the left with the bare text of the user&rsquo;s source code:</p><img src="image/a-map-of-the-territory/string.png" alt="var average = (min + max) / 2;" />
<h3><a href="#scanning" id="scanning"><small>2&#8202;.&#8202;1&#8202;.&#8202;1</small>Scanning</a></h3>
<p>The first step is <strong>scanning</strong>, also known as <strong>lexing</strong>, or (if you&rsquo;re trying
to impress someone) <strong>lexical analysis</strong>. They all mean pretty much the same
thing. I like &ldquo;lexing&rdquo; because it sounds like something an evil supervillain
would do, but I&rsquo;ll use &ldquo;scanning&rdquo; because it seems to be marginally more
commonplace.</p>
<p>A <strong>scanner</strong> (or <strong>lexer</strong>) takes in the linear stream of characters and chunks
them together into a series of something more akin to <span
name="word">&ldquo;words&rdquo;</span>. In programming languages, each of these words is
called a <strong>token</strong>. Some tokens are single characters, like <code>(</code> and <code>,</code>. Others
may be several characters long, like numbers (<code>123</code>), string literals (<code>"hi!"</code>),
and identifiers (<code>min</code>).</p>
<aside name="word">
<p>&ldquo;Lexical&rdquo; comes from the Greek root &ldquo;lex&rdquo;, meaning &ldquo;word&rdquo;.</p>
</aside>
<p>Some characters in a source file don&rsquo;t actually mean anything. Whitespace is
often insignificant, and comments, by definition, are ignored by the language.
The scanner usually discards these, leaving a clean sequence of meaningful
tokens.</p><img src="image/a-map-of-the-territory/tokens.png" alt="[var] [average] [=] [(] [min] [+] [max] [)] [/] [2] [;]" />
<h3><a href="#parsing" id="parsing"><small>2&#8202;.&#8202;1&#8202;.&#8202;2</small>Parsing</a></h3>
<p>The next step is <strong>parsing</strong>. This is where our syntax gets a <strong>grammar</strong><span class="em">&mdash;</span>the
ability to compose larger expressions and statements out of smaller parts. Did
you ever diagram sentences in English class? If so, you&rsquo;ve done what a parser
does, except that English has thousands and thousands of &ldquo;keywords&rdquo; and an
overflowing cornucopia of ambiguity. Programming languages are much simpler.</p>
<p>A <strong>parser</strong> takes the flat sequence of tokens and builds a tree structure that
mirrors the nested nature of the grammar. These trees have a couple of different
names<span class="em">&mdash;</span><strong>parse tree</strong> or <strong>abstract syntax tree</strong><span class="em">&mdash;</span>depending on how
close to the bare syntactic structure of the source language they are. In
practice, language hackers usually call them <strong>syntax trees</strong>, <strong>ASTs</strong>, or
often just <strong>trees</strong>.</p><img src="image/a-map-of-the-territory/ast.png" alt="An abstract syntax tree." />
<p>Parsing has a long, rich history in computer science that is closely tied to the
artificial intelligence community. Many of the techniques used today to parse
programming languages were originally conceived to parse <em>human</em> languages by AI
researchers who were trying to get computers to talk to us.</p>
<p>It turns out human languages were too messy for the rigid grammars those parsers
could handle, but they were a perfect fit for the simpler artificial grammars of
programming languages. Alas, we flawed humans still manage to use those simple
grammars incorrectly, so the parser&rsquo;s job also includes letting us know when we
do by reporting <strong>syntax errors</strong>.</p>
<h3><a href="#static-analysis" id="static-analysis"><small>2&#8202;.&#8202;1&#8202;.&#8202;3</small>Static analysis</a></h3>
<p>The first two stages are pretty similar across all implementations. Now, the
individual characteristics of each language start coming into play. At this
point, we know the syntactic structure of the code<span class="em">&mdash;</span>things like which
expressions are nested in which<span class="em">&mdash;</span>but we don&rsquo;t know much more than that.</p>
<p>In an expression like <code>a + b</code>, we know we are adding <code>a</code> and <code>b</code>, but we don&rsquo;t
know what those names refer to. Are they local variables? Global? Where are they
defined?</p>
<p>The first bit of analysis that most languages do is called <strong>binding</strong> or
<strong>resolution</strong>. For each <strong>identifier</strong>, we find out where that name is defined
and wire the two together. This is where <strong>scope</strong> comes into play<span class="em">&mdash;</span>the region
of source code where a certain name can be used to refer to a certain
declaration.</p>
<p>If the language is <span name="type">statically typed</span>, this is when we
type check. Once we know where <code>a</code> and <code>b</code> are declared, we can also figure out
their types. Then if those types don&rsquo;t support being added to each other, we
report a <strong>type error</strong>.</p>
<aside name="type">
<p>The language we&rsquo;ll build in this book is dynamically typed, so it will do its
type checking later, at runtime.</p>
</aside>
<p>Take a deep breath. We have attained the summit of the mountain and a sweeping
view of the user&rsquo;s program. All this semantic insight that is visible to us from
analysis needs to be stored somewhere. There are a few places we can squirrel it
away:</p>
<ul>
<li>
<p>Often, it gets stored right back as <strong>attributes</strong> on the syntax tree
itself<span class="em">&mdash;</span>extra fields in the nodes that aren&rsquo;t initialized during parsing
but get filled in later.</p>
</li>
<li>
<p>Other times, we may store data in a lookup table off to the side. Typically,
the keys to this table are identifiers<span class="em">&mdash;</span>names of variables and declarations.
In that case, we call it a <strong>symbol table</strong> and the values it associates with
each key tell us what that identifier refers to.</p>
</li>
<li>
<p>The most powerful bookkeeping tool is to transform the tree into an entirely
new data structure that more directly expresses the semantics of the code.
That&rsquo;s the next section.</p>
</li>
</ul>
<p>Everything up to this point is considered the <strong>front end</strong> of the
implementation. You might guess everything after this is the <strong>back end</strong>, but
no. Back in the days of yore when &ldquo;front end&rdquo; and &ldquo;back end&rdquo; were coined,
compilers were much simpler. Later researchers invented new phases to stuff
between the two halves. Rather than discard the old terms, William Wulf and
company lumped those new phases into the charming but spatially paradoxical name
<strong>middle end</strong>.</p>
<h3><a href="#intermediate-representations" id="intermediate-representations"><small>2&#8202;.&#8202;1&#8202;.&#8202;4</small>Intermediate representations</a></h3>
<p>You can think of the compiler as a pipeline where each stage&rsquo;s job is to
organize the data representing the user&rsquo;s code in a way that makes the next
stage simpler to implement. The front end of the pipeline is specific to the
source language the program is written in. The back end is concerned with the
final architecture where the program will run.</p>
<p>In the middle, the code may be stored in some <span name="ir"><strong>intermediate
representation</strong></span> (<strong>IR</strong>) that isn&rsquo;t tightly tied to either the source or
destination forms (hence &ldquo;intermediate&rdquo;). Instead, the IR acts as an interface
between these two languages.</p>
<aside name="ir">
<p>There are a few well-established styles of IRs out there. Hit your search engine
of choice and look for &ldquo;control flow graph&rdquo;, &ldquo;static single-assignment&rdquo;,
&ldquo;continuation-passing style&rdquo;, and &ldquo;three-address code&rdquo;.</p>
</aside>
<p>This lets you support multiple source languages and target platforms with less
effort. Say you want to implement Pascal, C, and Fortran compilers, and you want
to target x86, ARM, and, I dunno, SPARC. Normally, that means you&rsquo;re signing up
to write <em>nine</em> full compilers: Pascal&rarr;x86, C&rarr;ARM, and every other
combination.</p>
<p>A <span name="gcc">shared</span> intermediate representation reduces that
dramatically. You write <em>one</em> front end for each source language that produces
the IR. Then <em>one</em> back end for each target architecture. Now you can mix and
match those to get every combination.</p>
<aside name="gcc">
<p>If you&rsquo;ve ever wondered how <a href="https://en.wikipedia.org/wiki/GNU_Compiler_Collection">GCC</a> supports so many crazy languages and
architectures, like Modula-3 on Motorola 68k, now you know. Language front ends
target one of a handful of IRs, mainly <a href="https://gcc.gnu.org/onlinedocs/gccint/GIMPLE.html">GIMPLE</a> and <a href="https://gcc.gnu.org/onlinedocs/gccint/RTL.html">RTL</a>. Target back ends
like the one for 68k then take those IRs and produce native code.</p>
</aside>
<p>There&rsquo;s another big reason we might want to transform the code into a form that
makes the semantics more apparent<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h3><a href="#optimization" id="optimization"><small>2&#8202;.&#8202;1&#8202;.&#8202;5</small>Optimization</a></h3>
<p>Once we understand what the user&rsquo;s program means, we are free to swap it out
with a different program that has the <em>same semantics</em> but implements them more
efficiently<span class="em">&mdash;</span>we can <strong>optimize</strong> it.</p>
<p>A simple example is <strong>constant folding</strong>: if some expression always evaluates to
the exact same value, we can do the evaluation at compile time and replace the
code for the expression with its result. If the user typed in this:</p>
<div class="codehilite"><pre><span class="i">pennyArea</span> = <span class="n">3.14159</span> * (<span class="n">0.75</span> / <span class="n">2</span>) * (<span class="n">0.75</span> / <span class="n">2</span>);
</pre></div>
<p>we could do all of that arithmetic in the compiler and change the code to:</p>
<div class="codehilite"><pre><span class="i">pennyArea</span> = <span class="n">0.4417860938</span>;
</pre></div>
<p>Optimization is a huge part of the programming language business. Many language
hackers spend their entire careers here, squeezing every drop of performance
they can out of their compilers to get their benchmarks a fraction of a percent
faster. It can become a sort of obsession.</p>
<p>We&rsquo;re mostly going to <span name="rathole">hop over that rathole</span> in this
book. Many successful languages have surprisingly few compile-time
optimizations. For example, Lua and CPython generate relatively unoptimized
code, and focus most of their performance effort on the runtime.</p>
<aside name="rathole">
<p>If you can&rsquo;t resist poking your foot into that hole, some keywords to get you
started are &ldquo;constant propagation&rdquo;, &ldquo;common subexpression elimination&rdquo;, &ldquo;loop
invariant code motion&rdquo;, &ldquo;global value numbering&rdquo;, &ldquo;strength reduction&rdquo;, &ldquo;scalar
replacement of aggregates&rdquo;, &ldquo;dead code elimination&rdquo;, and &ldquo;loop unrolling&rdquo;.</p>
</aside>
<h3><a href="#code-generation" id="code-generation"><small>2&#8202;.&#8202;1&#8202;.&#8202;6</small>Code generation</a></h3>
<p>We have applied all of the optimizations we can think of to the user&rsquo;s program.
The last step is converting it to a form the machine can actually run. In other
words, <strong>generating code</strong> (or <strong>code gen</strong>), where &ldquo;code&rdquo; here usually refers to
the kind of primitive assembly-like instructions a CPU runs and not the kind of
&ldquo;source code&rdquo; a human might want to read.</p>
<p>Finally, we are in the <strong>back end</strong>, descending the other side of the mountain.
From here on out, our representation of the code becomes more and more
primitive, like evolution run in reverse, as we get closer to something our
simple-minded machine can understand.</p>
<p>We have a decision to make. Do we generate instructions for a real CPU or a
virtual one? If we generate real machine code, we get an executable that the OS
can load directly onto the chip. Native code is lightning fast, but generating
it is a lot of work. Today&rsquo;s architectures have piles of instructions, complex
pipelines, and enough <span name="aad">historical baggage</span> to fill a 747&rsquo;s
luggage bay.</p>
<p>Speaking the chip&rsquo;s language also means your compiler is tied to a specific
architecture. If your compiler targets <a href="https://en.wikipedia.org/wiki/X86">x86</a> machine code, it&rsquo;s not going to
run on an <a href="https://en.wikipedia.org/wiki/ARM_architecture">ARM</a> device. All the way back in the &rsquo;60s, during the
Cambrian explosion of computer architectures, that lack of portability was a
real obstacle.</p>
<aside name="aad">
<p>For example, the <a href="http://www.felixcloutier.com/x86/AAD.html">AAD</a> (&ldquo;ASCII Adjust AX Before Division&rdquo;) instruction lets
you perform division, which sounds useful. Except that instruction takes, as
operands, two binary-coded decimal digits packed into a single 16-bit register.
When was the last time <em>you</em> needed BCD on a 16-bit machine?</p>
</aside>
<p>To get around that, hackers like Martin Richards and Niklaus Wirth, of BCPL and
Pascal fame, respectively, made their compilers produce <em>virtual</em> machine code.
Instead of instructions for some real chip, they produced code for a
hypothetical, idealized machine. Wirth called this <strong>p-code</strong> for <em>portable</em>,
but today, we generally call it <strong>bytecode</strong> because each instruction is often a
single byte long.</p>
<p>These synthetic instructions are designed to map a little more closely to the
language&rsquo;s semantics, and not be so tied to the peculiarities of any one
computer architecture and its accumulated historical cruft. You can think of it
like a dense, binary encoding of the language&rsquo;s low-level operations.</p>
<h3><a href="#virtual-machine" id="virtual-machine"><small>2&#8202;.&#8202;1&#8202;.&#8202;7</small>Virtual machine</a></h3>
<p>If your compiler produces bytecode, your work isn&rsquo;t over once that&rsquo;s done. Since
there is no chip that speaks that bytecode, it&rsquo;s your job to translate. Again,
you have two options. You can write a little mini-compiler for each target
architecture that converts the bytecode to native code for that machine. You
still have to do work for <span name="shared">each</span> chip you support, but
this last stage is pretty simple and you get to reuse the rest of the compiler
pipeline across all of the machines you support. You&rsquo;re basically using your
bytecode as an intermediate representation.</p>
<aside name="shared" class="bottom">
<p>The basic principle here is that the farther down the pipeline you push the
architecture-specific work, the more of the earlier phases you can share across
architectures.</p>
<p>There is a tension, though. Many optimizations, like register allocation and
instruction selection, work best when they know the strengths and capabilities
of a specific chip. Figuring out which parts of your compiler can be shared and
which should be target-specific is an art.</p>
</aside>
<p>Or you can write a <span name="vm"><strong>virtual machine</strong></span> (<strong>VM</strong>), a
program that emulates a hypothetical chip supporting your virtual architecture
at runtime. Running bytecode in a VM is slower than translating it to native
code ahead of time because every instruction must be simulated at runtime each
time it executes. In return, you get simplicity and portability. Implement your
VM in, say, C, and you can run your language on any platform that has a C
compiler. This is how the second interpreter we build in this book works.</p>
<aside name="vm">
<p>The term &ldquo;virtual machine&rdquo; also refers to a different kind of abstraction. A
<strong>system virtual machine</strong> emulates an entire hardware platform and operating
system in software. This is how you can play Windows games on your Linux
machine, and how cloud providers give customers the user experience of
controlling their own &ldquo;server&rdquo; without needing to physically allocate separate
computers for each user.</p>
<p>The kind of VMs we&rsquo;ll talk about in this book are <strong>language virtual machines</strong>
or <strong>process virtual machines</strong> if you want to be unambiguous.</p>
</aside>
<h3><a href="#runtime" id="runtime"><small>2&#8202;.&#8202;1&#8202;.&#8202;8</small>Runtime</a></h3>
<p>We have finally hammered the user&rsquo;s program into a form that we can execute. The
last step is running it. If we compiled it to machine code, we simply tell the
operating system to load the executable and off it goes. If we compiled it to
bytecode, we need to start up the VM and load the program into that.</p>
<p>In both cases, for all but the basest of low-level languages, we usually need
some services that our language provides while the program is running. For
example, if the language automatically manages memory, we need a garbage
collector going in order to reclaim unused bits. If our language supports
&ldquo;instance of&rdquo; tests so you can see what kind of object you have, then we need
some representation to keep track of the type of each object during execution.</p>
<p>All of this stuff is going at runtime, so it&rsquo;s called, appropriately, the
<strong>runtime</strong>. In a fully compiled language, the code implementing the runtime
gets inserted directly into the resulting executable. In, say, <a href="https://golang.org/">Go</a>, each
compiled application has its own copy of Go&rsquo;s runtime directly embedded in it.
If the language is run inside an interpreter or VM, then the runtime lives
there. This is how most implementations of languages like Java, Python, and
JavaScript work.</p>
<h2><a href="#shortcuts-and-alternate-routes" id="shortcuts-and-alternate-routes"><small>2&#8202;.&#8202;2</small>Shortcuts and Alternate Routes</a></h2>
<p>That&rsquo;s the long path covering every possible phase you might implement. Many
languages do walk the entire route, but there are a few shortcuts and alternate
paths.</p>
<h3><a href="#single-pass-compilers" id="single-pass-compilers"><small>2&#8202;.&#8202;2&#8202;.&#8202;1</small>Single-pass compilers</a></h3>
<p>Some simple compilers interleave parsing, analysis, and code generation so that
they produce output code directly in the parser, without ever allocating any
syntax trees or other IRs. These <span name="sdt"><strong>single-pass
compilers</strong></span> restrict the design of the language. You have no intermediate
data structures to store global information about the program, and you don&rsquo;t
revisit any previously parsed part of the code. That means as soon as you see
some expression, you need to know enough to correctly compile it.</p>
<aside name="sdt">
<p><a href="https://en.wikipedia.org/wiki/Syntax-directed_translation"><strong>Syntax-directed translation</strong></a> is a structured technique for building
these all-at-once compilers. You associate an <em>action</em> with each piece of the
grammar, usually one that generates output code. Then, whenever the parser
matches that chunk of syntax, it executes the action, building up the target
code one rule at a time.</p>
</aside>
<p>Pascal and C were designed around this limitation. At the time, memory was so
precious that a compiler might not even be able to hold an entire <em>source file</em>
in memory, much less the whole program. This is why Pascal&rsquo;s grammar requires
type declarations to appear first in a block. It&rsquo;s why in C you can&rsquo;t call a
function above the code that defines it unless you have an explicit forward
declaration that tells the compiler what it needs to know to generate code for a
call to the later function.</p>
<h3><a href="#tree-walk-interpreters" id="tree-walk-interpreters"><small>2&#8202;.&#8202;2&#8202;.&#8202;2</small>Tree-walk interpreters</a></h3>
<p>Some programming languages begin executing code right after parsing it to an AST
(with maybe a bit of static analysis applied). To run the program, the
interpreter traverses the syntax tree one branch and leaf at a time, evaluating
each node as it goes.</p>
<p>This implementation style is common for student projects and little languages,
but is not widely used for <span name="ruby">general-purpose</span> languages
since it tends to be slow. Some people use &ldquo;interpreter&rdquo; to mean only these
kinds of implementations, but others define that word more generally, so I&rsquo;ll
use the inarguably explicit <strong>tree-walk interpreter</strong> to refer to these. Our
first interpreter rolls this way.</p>
<aside name="ruby">
<p>A notable exception is early versions of Ruby, which were tree walkers. At 1.9,
the canonical implementation of Ruby switched from the original MRI (Matz&rsquo;s Ruby
Interpreter) to Koichi Sasada&rsquo;s YARV (Yet Another Ruby VM). YARV is a
bytecode virtual machine.</p>
</aside>
<h3><a href="#transpilers" id="transpilers"><small>2&#8202;.&#8202;2&#8202;.&#8202;3</small>Transpilers</a></h3>
<p><span name="gary">Writing</span> a complete back end for a language can be a lot
of work. If you have some existing generic IR to target, you could bolt your
front end onto that. Otherwise, it seems like you&rsquo;re stuck. But what if you
treated some other <em>source language</em> as if it were an intermediate
representation?</p>
<p>You write a front end for your language. Then, in the back end, instead of doing
all the work to <em>lower</em> the semantics to some primitive target language, you
produce a string of valid source code for some other language that&rsquo;s about as
high level as yours. Then, you use the existing compilation tools for <em>that</em>
language as your escape route off the mountain and down to something you can
execute.</p>
<p>They used to call this a <strong>source-to-source compiler</strong> or a <strong>transcompiler</strong>.
After the rise of languages that compile to JavaScript in order to run in the
browser, they&rsquo;ve affected the hipster sobriquet <strong>transpiler</strong>.</p>
<aside name="gary">
<p>The first transcompiler, XLT86, translated 8080 assembly into 8086 assembly.
That might seem straightforward, but keep in mind the 8080 was an 8-bit chip and
the 8086 a 16-bit chip that could use each register as a pair of 8-bit ones.
XLT86 did data flow analysis to track register usage in the source program and
then efficiently map it to the register set of the 8086.</p>
<p>It was written by Gary Kildall, a tragic hero of computer science if there
ever was one. One of the first people to recognize the promise of
microcomputers, he created PL/M and CP/M, the first high-level language and OS
for them.</p>
<p>He was a sea captain, business owner, licensed pilot, and motorcyclist. A TV
host with the Kris Kristofferson-esque look sported by dashing bearded dudes in
the &rsquo;80s. He took on Bill Gates and, like many, lost, before meeting his end in
a biker bar under mysterious circumstances. He died too young, but sure as hell
lived before he did.</p>
</aside>
<p>While the first transcompiler translated one assembly language to another,
today, most transpilers work on higher-level languages. After the viral spread
of UNIX to machines various and sundry, there began a long tradition of
compilers that produced C as their output language. C compilers were available
everywhere UNIX was and produced efficient code, so targeting C was a good way
to get your language running on a lot of architectures.</p>
<p>Web browsers are the &ldquo;machines&rdquo; of today, and their &ldquo;machine code&rdquo; is
JavaScript, so these days it seems <a href="https://github.com/jashkenas/coffeescript/wiki/list-of-languages-that-compile-to-js">almost every language out there</a> has a
compiler that targets JS since that&rsquo;s the <span name="js">main</span> way to get
your code running in a browser.</p>
<aside name="js">
<p>JS used to be the <em>only</em> way to execute code in a browser. Thanks to
<a href="https://github.com/webassembly/">WebAssembly</a>, compilers now have a second, lower-level language they can
target that runs on the web.</p>
</aside>
<p>The front end<span class="em">&mdash;</span>scanner and parser<span class="em">&mdash;</span>of a transpiler looks like other
compilers. Then, if the source language is only a simple syntactic skin over the
target language, it may skip analysis entirely and go straight to outputting the
analogous syntax in the destination language.</p>
<p>If the two languages are more semantically different, you&rsquo;ll see more of the
typical phases of a full compiler including analysis and possibly even
optimization. Then, when it comes to code generation, instead of outputting some
binary language like machine code, you produce a string of grammatically correct
source (well, destination) code in the target language.</p>
<p>Either way, you then run that resulting code through the output language&rsquo;s
existing compilation pipeline, and you&rsquo;re good to go.</p>
<h3><a href="#just-in-time-compilation" id="just-in-time-compilation"><small>2&#8202;.&#8202;2&#8202;.&#8202;4</small>Just-in-time compilation</a></h3>
<p>This last one is less a shortcut and more a dangerous alpine scramble best
reserved for experts. The fastest way to execute code is by compiling it to
machine code, but you might not know what architecture your end user&rsquo;s machine
supports. What to do?</p>
<p>You can do the same thing that the HotSpot Java Virtual Machine (JVM),
Microsoft&rsquo;s Common Language Runtime (CLR), and most JavaScript interpreters do.
On the end user&rsquo;s machine, when the program is loaded<span class="em">&mdash;</span>either from source in
the case of JS, or platform-independent bytecode for the JVM and CLR<span class="em">&mdash;</span>you
compile it to native code for the architecture their computer supports.
Naturally enough, this is called <strong>just-in-time compilation</strong>. Most hackers just
say &ldquo;JIT&rdquo;, pronounced like it rhymes with &ldquo;fit&rdquo;.</p>
<p>The most sophisticated JITs insert profiling hooks into the generated code to
see which regions are most performance critical and what kind of data is flowing
through them. Then, over time, they will automatically recompile those <span
name="hot">hot spots</span> with more advanced optimizations.</p>
<aside name="hot">
<p>This is, of course, exactly where the HotSpot JVM gets its name.</p>
</aside>
<h2><a href="#compilers-and-interpreters" id="compilers-and-interpreters"><small>2&#8202;.&#8202;3</small>Compilers and Interpreters</a></h2>
<p>Now that I&rsquo;ve stuffed your head with a dictionary&rsquo;s worth of programming
language jargon, we can finally address a question that&rsquo;s plagued coders since
time immemorial: What&rsquo;s the difference between a compiler and an interpreter?</p>
<p>It turns out this is like asking the difference between a fruit and a vegetable.
That seems like a binary either-or choice, but actually &ldquo;fruit&rdquo; is a <em>botanical</em>
term and &ldquo;vegetable&rdquo; is <em>culinary</em>. One does not strictly imply the negation of
the other. There are fruits that aren&rsquo;t vegetables (apples) and vegetables that
aren&rsquo;t fruits (carrots), but also edible plants that are both fruits <em>and</em>
vegetables, like tomatoes.</p>
<p><span name="veg"></span></p><img src="image/a-map-of-the-territory/plants.png" alt="A Venn diagram of edible plants" />
<aside name="veg">
<p>Peanuts (which are not even nuts) and cereals like wheat are actually fruit, but
I got this drawing wrong. What can I say, I&rsquo;m a software engineer, not a
botanist. I should probably erase the little peanut guy, but he&rsquo;s so cute that I
can&rsquo;t bear to.</p>
<p>Now <em>pine nuts</em>, on the other hand, are plant-based foods that are neither
fruits nor vegetables. At least as far as I can tell.</p>
</aside>
<p>So, back to languages:</p>
<ul>
<li>
<p><strong>Compiling</strong> is an <em>implementation technique</em> that involves translating a
source language to some other<span class="em">&mdash;</span>usually lower-level<span class="em">&mdash;</span>form. When you
generate bytecode or machine code, you are compiling. When you transpile to
another high-level language, you are compiling too.</p>
</li>
<li>
<p>When we say a language implementation &ldquo;is a <strong>compiler</strong>&rdquo;, we mean it
translates source code to some other form but doesn&rsquo;t execute it. The user has
to take the resulting output and run it themselves.</p>
</li>
<li>
<p>Conversely, when we say an implementation &ldquo;is an <strong>interpreter</strong>&rdquo;, we mean it
takes in source code and executes it immediately. It runs programs &ldquo;from
source&rdquo;.</p>
</li>
</ul>
<p>Like apples and oranges, some implementations are clearly compilers and <em>not</em>
interpreters. GCC and Clang take your C code and compile it to machine code. An
end user runs that executable directly and may never even know which tool was
used to compile it. So those are <em>compilers</em> for C.</p>
<p>In older versions of Matz&rsquo;s canonical implementation of Ruby, the user ran Ruby
from source. The implementation parsed it and executed it directly by traversing
the syntax tree. No other translation occurred, either internally or in any
user-visible form. So this was definitely an <em>interpreter</em> for Ruby.</p>
<p>But what of CPython? When you run your Python program using it, the code is
parsed and converted to an internal bytecode format, which is then executed
inside the VM. From the user&rsquo;s perspective, this is clearly an interpreter<span class="em">&mdash;</span>they run their program from source. But if you look under CPython&rsquo;s scaly skin,
you&rsquo;ll see that there is definitely some compiling going on.</p>
<p>The answer is that it is <span name="go">both</span>. CPython <em>is</em> an
interpreter, and it <em>has</em> a compiler. In practice, most scripting languages work
this way, as you can see:</p>
<aside name="go">
<p>The <a href="https://golang.org/">Go tool</a> is even more of a horticultural curiosity. If you run <code>go build</code>, it compiles your Go source code to machine code and stops. If you type
<code>go run</code>, it does that, then immediately executes the generated executable.</p>
<p>So <code>go</code> <em>is</em> a compiler (you can use it as a tool to compile code without
running it), <em>is</em> an interpreter (you can invoke it to immediately run a program
from source), and also <em>has</em> a compiler (when you use it as an interpreter, it
is still compiling internally).</p>
</aside><img src="image/a-map-of-the-territory/venn.png" alt="A Venn diagram of compilers and interpreters" />
<p>That overlapping region in the center is where our second interpreter lives too,
since it internally compiles to bytecode. So while this book is nominally about
interpreters, we&rsquo;ll cover some compilation too.</p>
<h2><a href="#our-journey" id="our-journey"><small>2&#8202;.&#8202;4</small>Our Journey</a></h2>
<p>That&rsquo;s a lot to take in all at once. Don&rsquo;t worry. This isn&rsquo;t the chapter where
you&rsquo;re expected to <em>understand</em> all of these pieces and parts. I just want you
to know that they are out there and roughly how they fit together.</p>
<p>This map should serve you well as you explore the territory beyond the guided
path we take in this book. I want to leave you yearning to strike out on your
own and wander all over that mountain.</p>
<p>But, for now, it&rsquo;s time for our own journey to begin. Tighten your bootlaces,
cinch up your pack, and come along. From <span name="here">here</span> on out,
all you need to focus on is the path in front of you.</p>
<aside name="here">
<p>Henceforth, I promise to tone down the whole mountain metaphor thing.</p>
</aside>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Pick an open source implementation of a language you like. Download the
source code and poke around in it. Try to find the code that implements the
scanner and parser. Are they handwritten, or generated using tools like
Lex and Yacc? (<code>.l</code> or <code>.y</code> files usually imply the latter.)</p>
</li>
<li>
<p>Just-in-time compilation tends to be the fastest way to implement dynamically
typed languages, but not all of them use it. What reasons are there to <em>not</em>
JIT?</p>
</li>
<li>
<p>Most Lisp implementations that compile to C also contain an interpreter that
lets them execute Lisp code on the fly as well. Why?</p>
</li>
</ol>
</div>

<footer>
<a href="the-lox-language.html" class="next">
  Next Chapter: &ldquo;The Lox Language&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/a-tree-walk-interpreter.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>A Tree-Walk Interpreter &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h2><small>II</small>A Tree-Walk Interpreter</h2>

<ul>
    <li><a href="scanning.html"><small>4</small>Scanning</a></li>
    <li><a href="representing-code.html"><small>5</small>Representing Code</a></li>
    <li><a href="parsing-expressions.html"><small>6</small>Parsing Expressions</a></li>
    <li><a href="evaluating-expressions.html"><small>7</small>Evaluating Expressions</a></li>
    <li><a href="statements-and-state.html"><small>8</small>Statements and State</a></li>
    <li><a href="control-flow.html"><small>9</small>Control Flow</a></li>
    <li><a href="functions.html"><small>10</small>Functions</a></li>
    <li><a href="resolving-and-binding.html"><small>11</small>Resolving and Binding</a></li>
    <li><a href="classes.html"><small>12</small>Classes</a></li>
    <li><a href="inheritance.html"><small>13</small>Inheritance</a></li>
</ul>


<div class="prev-next">
    <a href="the-lox-language.html" title="The Lox Language" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="scanning.html" title="Scanning" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="the-lox-language.html" title="The Lox Language" class="prev">←</a>
<a href="scanning.html" title="Scanning" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h2><small>II</small>A Tree-Walk Interpreter</h2>

<ul>
    <li><a href="scanning.html"><small>4</small>Scanning</a></li>
    <li><a href="representing-code.html"><small>5</small>Representing Code</a></li>
    <li><a href="parsing-expressions.html"><small>6</small>Parsing Expressions</a></li>
    <li><a href="evaluating-expressions.html"><small>7</small>Evaluating Expressions</a></li>
    <li><a href="statements-and-state.html"><small>8</small>Statements and State</a></li>
    <li><a href="control-flow.html"><small>9</small>Control Flow</a></li>
    <li><a href="functions.html"><small>10</small>Functions</a></li>
    <li><a href="resolving-and-binding.html"><small>11</small>Resolving and Binding</a></li>
    <li><a href="classes.html"><small>12</small>Classes</a></li>
    <li><a href="inheritance.html"><small>13</small>Inheritance</a></li>
</ul>


<div class="prev-next">
    <a href="the-lox-language.html" title="The Lox Language" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="scanning.html" title="Scanning" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">II</div>
  <h1 class="part">A Tree-Walk Interpreter</h1>

<p>With this part, we begin jlox, the first of our two interpreters. Programming
languages are a huge topic with piles of concepts and terminology to cram into
your brain all at once. Programming language theory requires a level of mental
rigor that you probably haven&rsquo;t had to summon since your last calculus final.
(Fortunately there isn&rsquo;t too much theory in this book.)</p>
<p>Implementing an interpreter uses a few architectural tricks and design
patterns uncommon in other kinds of applications, so we&rsquo;ll be getting used to
the engineering side of things too. Given all of that, we&rsquo;ll keep the code we
have to write as simple and plain as possible.</p>
<p>In less than two thousand lines of clean Java code, we&rsquo;ll build a complete
interpreter for Lox that implements every single feature of the language,
exactly as we&rsquo;ve specified. The first few chapters work front-to-back through
the phases of the interpreter<span class="em">&mdash;</span><a href="scanning.html">scanning</a>, <a href="parsing-expressions.html">parsing</a>, and
<a href="evaluating-expressions.html">evaluating code</a>. After that, we add language features one at a time,
growing a simple calculator into a full-fledged scripting language.</p>

<footer>
<a href="scanning.html" class="next">
  Next Chapter: &ldquo;Scanning&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/a-virtual-machine.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>A Virtual Machine &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">A Virtual Machine<small>15</small></a></h3>

<ul>
    <li><a href="#an-instruction-execution-machine"><small>15.1</small> An Instruction Execution Machine</a></li>
    <li><a href="#a-value-stack-manipulator"><small>15.2</small> A Value Stack Manipulator</a></li>
    <li><a href="#an-arithmetic-calculator"><small>15.3</small> An Arithmetic Calculator</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Register-Based Bytecode</a></li>
</ul>


<div class="prev-next">
    <a href="chunks-of-bytecode.html" title="Chunks of Bytecode" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="scanning-on-demand.html" title="Scanning on Demand" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="chunks-of-bytecode.html" title="Chunks of Bytecode" class="prev">←</a>
<a href="scanning-on-demand.html" title="Scanning on Demand" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">A Virtual Machine<small>15</small></a></h3>

<ul>
    <li><a href="#an-instruction-execution-machine"><small>15.1</small> An Instruction Execution Machine</a></li>
    <li><a href="#a-value-stack-manipulator"><small>15.2</small> A Value Stack Manipulator</a></li>
    <li><a href="#an-arithmetic-calculator"><small>15.3</small> An Arithmetic Calculator</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Register-Based Bytecode</a></li>
</ul>


<div class="prev-next">
    <a href="chunks-of-bytecode.html" title="Chunks of Bytecode" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="scanning-on-demand.html" title="Scanning on Demand" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">15</div>
  <h1>A Virtual Machine</h1>

<blockquote>
<p>Magicians protect their secrets not because the secrets are large and
important, but because they are so small and trivial. The wonderful effects
created on stage are often the result of a secret so absurd that the magician
would be embarrassed to admit that that was how it was done.</p>
<p><cite>Christopher Priest, <em>The Prestige</em></cite></p>
</blockquote>
<p>We&rsquo;ve spent a lot of time talking about how to represent a program as a sequence
of bytecode instructions, but it feels like learning biology using only stuffed,
dead animals. We know what instructions are in theory, but we&rsquo;ve never seen them
in action, so it&rsquo;s hard to really understand what they <em>do</em>. It would be hard to
write a compiler that outputs bytecode when we don&rsquo;t have a good understanding
of how that bytecode behaves.</p>
<p>So, before we go and build the front end of our new interpreter, we will begin
with the back end<span class="em">&mdash;</span>the virtual machine that executes instructions. It breathes
life into the bytecode. Watching the instructions prance around gives us a
clearer picture of how a compiler might translate the user&rsquo;s source code into a
series of them.</p>
<h2><a href="#an-instruction-execution-machine" id="an-instruction-execution-machine"><small>15&#8202;.&#8202;1</small>An Instruction Execution Machine</a></h2>
<p>The virtual machine is one part of our interpreter&rsquo;s internal architecture. You
hand it a chunk of code<span class="em">&mdash;</span>literally a Chunk<span class="em">&mdash;</span>and it runs it. The code and
data structures for the VM reside in a new module.</p>
<div class="codehilite"><div class="source-file"><em>vm.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_vm_h</span>
<span class="a">#define clox_vm_h</span>

<span class="a">#include &quot;chunk.h&quot;</span>

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Chunk</span>* <span class="i">chunk</span>;
} <span class="a">VM</span>;

<span class="t">void</span> <span class="i">initVM</span>();
<span class="t">void</span> <span class="i">freeVM</span>();

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, create new file</div>

<p>As usual, we start simple. The VM will gradually acquire a whole pile of state
it needs to keep track of, so we define a struct now to stuff that all in.
Currently, all we store is the chunk that it executes.</p>
<p>Like we do with most of the data structures we create, we also define functions
to create and tear down a VM. Here&rsquo;s the implementation:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
create new file</div>
<pre><span class="a">#include &quot;common.h&quot;</span>
<span class="a">#include &quot;vm.h&quot;</span>

<span class="a">VM</span> <span class="i">vm</span>;<span name="one"> </span>

<span class="t">void</span> <span class="i">initVM</span>() {
}

<span class="t">void</span> <span class="i">freeVM</span>() {
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, create new file</div>

<p>OK, calling those functions &ldquo;implementations&rdquo; is a stretch. We don&rsquo;t have any
interesting state to initialize or free yet, so the functions are empty. Trust
me, we&rsquo;ll get there.</p>
<p>The slightly more interesting line here is that declaration of <code>vm</code>. This module
is eventually going to have a slew of functions and it would be a chore to pass
around a pointer to the VM to all of them. Instead, we declare a single global
VM object. We need only one anyway, and this keeps the code in the book a little
lighter on the page.</p>
<aside name="one">
<p>The choice to have a static VM instance is a concession for the book, but not
necessarily a sound engineering choice for a real language implementation. If
you&rsquo;re building a VM that&rsquo;s designed to be embedded in other host applications,
it gives the host more flexibility if you <em>do</em> explicitly take a VM pointer
and pass it around.</p>
<p>That way, the host app can control when and where memory for the VM is
allocated, run multiple VMs in parallel, etc.</p>
<p>What I&rsquo;m doing here is a global variable, and <a href="http://gameprogrammingpatterns.com/singleton.html">everything bad you&rsquo;ve heard about
global variables</a> is still true when programming in the large. But when
keeping things small for a book<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
</aside>
<p>Before we start pumping fun code into our VM, let&rsquo;s go ahead and wire it up to
the interpreter&rsquo;s main entrypoint.</p>
<div class="codehilite"><pre class="insert-before">int main(int argc, const char* argv[]) {
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">  <span class="i">initVM</span>();

</pre><pre class="insert-after">  Chunk chunk;
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>We spin up the VM when the interpreter first starts. Then when we&rsquo;re about to
exit, we wind it down.</p>
<div class="codehilite"><pre class="insert-before">  disassembleChunk(&amp;chunk, &quot;test chunk&quot;);
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">  <span class="i">freeVM</span>();
</pre><pre class="insert-after">  freeChunk(&amp;chunk);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>One last ceremonial obligation:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;debug.h&quot;
</pre><div class="source-file"><em>main.c</em></div>
<pre class="insert"><span class="a">#include &quot;vm.h&quot;</span>
</pre><pre class="insert-after">

int main(int argc, const char* argv[]) {
</pre></div>
<div class="source-file-narrow"><em>main.c</em></div>

<p>Now when you run clox, it starts up the VM before it creates that hand-authored
chunk from the <a href="chunks-of-bytecode.html#disassembling-chunks">last chapter</a>. The VM is ready and waiting, so let&rsquo;s teach it
to do something.</p>
<h3><a href="#executing-instructions" id="executing-instructions"><small>15&#8202;.&#8202;1&#8202;.&#8202;1</small>Executing instructions</a></h3>
<p>The VM springs into action when we command it to interpret a chunk of bytecode.</p>
<div class="codehilite"><pre class="insert-before">  disassembleChunk(&amp;chunk, &quot;test chunk&quot;);
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">  <span class="i">interpret</span>(&amp;<span class="i">chunk</span>);
</pre><pre class="insert-after">  freeVM();
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>This function is the main entrypoint into the VM. It&rsquo;s declared like so:</p>
<div class="codehilite"><pre class="insert-before">void freeVM();
</pre><div class="source-file"><em>vm.h</em><br>
add after <em>freeVM</em>()</div>
<pre class="insert"><span class="t">InterpretResult</span> <span class="i">interpret</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, add after <em>freeVM</em>()</div>

<p>The VM runs the chunk and then responds with a value from this enum:</p>
<div class="codehilite"><pre class="insert-before">} VM;

</pre><div class="source-file"><em>vm.h</em><br>
add after struct <em>VM</em></div>
<pre class="insert"><span class="k">typedef</span> <span class="k">enum</span> {
  <span class="a">INTERPRET_OK</span>,
  <span class="a">INTERPRET_COMPILE_ERROR</span>,
  <span class="a">INTERPRET_RUNTIME_ERROR</span>
} <span class="t">InterpretResult</span>;

</pre><pre class="insert-after">void initVM();
void freeVM();
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, add after struct <em>VM</em></div>

<p>We aren&rsquo;t using the result yet, but when we have a compiler that reports static
errors and a VM that detects runtime errors, the interpreter will use this to
know how to set the exit code of the process.</p>
<p>We&rsquo;re inching towards some actual implementation.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>freeVM</em>()</div>
<pre><span class="t">InterpretResult</span> <span class="i">interpret</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>) {
  <span class="i">vm</span>.<span class="i">chunk</span> = <span class="i">chunk</span>;
  <span class="i">vm</span>.<span class="i">ip</span> = <span class="i">vm</span>.<span class="i">chunk</span>-&gt;<span class="i">code</span>;
  <span class="k">return</span> <span class="i">run</span>();
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>freeVM</em>()</div>

<p>First, we store the chunk being executed in the VM. Then we call <code>run()</code>, an
internal helper function that actually runs the bytecode instructions. Between
those two parts is an intriguing line. What is this <code>ip</code> business?</p>
<p>As the VM works its way through the bytecode, it keeps track of where it is<span class="em">&mdash;</span>the location of the instruction currently being executed. We don&rsquo;t use a <span
name="local">local</span> variable inside <code>run()</code> for this because eventually
other functions will need to access it. Instead, we store it as a field in VM.</p>
<aside name="local">
<p>If we were trying to squeeze every ounce of speed out of our bytecode
interpreter, we would store <code>ip</code> in a local variable. It gets modified so often
during execution that we want the C compiler to keep it in a register.</p>
</aside>
<div class="codehilite"><pre class="insert-before">typedef struct {
  Chunk* chunk;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">uint8_t</span>* <span class="i">ip</span>;
</pre><pre class="insert-after">} VM;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>Its type is a byte pointer. We use an actual real C pointer pointing right into
the middle of the bytecode array instead of something like an integer index
because it&rsquo;s faster to dereference a pointer than look up an element in an array
by index.</p>
<p>The name &ldquo;IP&rdquo; is traditional, and<span class="em">&mdash;</span>unlike many traditional names in CS<span class="em">&mdash;</span>actually makes sense: it&rsquo;s an <strong><a href="https://en.wikipedia.org/wiki/Program_counter">instruction pointer</a></strong>. Almost every
instruction set in the <span name="ip">world</span>, real and virtual, has a
register or variable like this.</p>
<aside name="ip">
<p>x86, x64, and the CLR call it &ldquo;IP&rdquo;. 68k, PowerPC, ARM, p-code, and the JVM call
it &ldquo;PC&rdquo;, for <strong>program counter</strong>.</p>
</aside>
<p>We initialize <code>ip</code> by pointing it at the first byte of code in the chunk. We
haven&rsquo;t executed that instruction yet, so <code>ip</code> points to the instruction <em>about
to be executed</em>. This will be true during the entire time the VM is running: the
IP always points to the next instruction, not the one currently being handled.</p>
<p>The real fun happens in <code>run</code>().</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>freeVM</em>()</div>
<pre><span class="k">static</span> <span class="t">InterpretResult</span> <span class="i">run</span>() {
<span class="a">#define READ_BYTE() (*vm.ip++)</span>

  <span class="k">for</span> (;;) {
    <span class="t">uint8_t</span> <span class="i">instruction</span>;
    <span class="k">switch</span> (<span class="i">instruction</span> = <span class="a">READ_BYTE</span>()) {
      <span class="k">case</span> <span class="a">OP_RETURN</span>: {
        <span class="k">return</span> <span class="a">INTERPRET_OK</span>;
      }
    }
  }

<span class="a">#undef READ_BYTE</span>
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>freeVM</em>()</div>

<p>This is the single most <span name="important">important</span> function in all
of clox, by far. When the interpreter executes a user&rsquo;s program, it will spend
something like 90% of its time inside <code>run()</code>. It is the beating heart of the
VM.</p>
<aside name="important">
<p>Or, at least, it <em>will</em> be in a few chapters when it has enough content to be
useful. Right now, it&rsquo;s not exactly a wonder of software wizardry.</p>
</aside>
<p>Despite that dramatic intro, it&rsquo;s conceptually pretty simple. We have an outer
loop that goes and goes. Each turn through that loop, we read and execute a
single bytecode instruction.</p>
<p>To process an instruction, we first figure out what kind of instruction we&rsquo;re
dealing with. The <code>READ_BYTE</code> macro reads the byte currently pointed at by <code>ip</code>
and then <span name="next">advances</span> the instruction pointer. The first
byte of any instruction is the opcode. Given a numeric opcode, we need to get to
the right C code that implements that instruction&rsquo;s semantics. This process is
called <strong>decoding</strong> or <strong>dispatching</strong> the instruction.</p>
<aside name="next">
<p>Note that <code>ip</code> advances as soon as we read the opcode, before we&rsquo;ve actually
started executing the instruction. So, again, <code>ip</code> points to the <em>next</em>
byte of code to be used.</p>
</aside>
<p>We do that process for every single instruction, every single time one is
executed, so this is the most performance critical part of the entire virtual
machine. Programming language lore is filled with <span
name="dispatch">clever</span> techniques to do bytecode dispatch efficiently,
going all the way back to the early days of computers.</p>
<aside name="dispatch">
<p>If you want to learn some of these techniques, look up &ldquo;direct threaded code&rdquo;,
&ldquo;jump table&rdquo;, and &ldquo;computed goto&rdquo;.</p>
</aside>
<p>Alas, the fastest solutions require either non-standard extensions to C, or
handwritten assembly code. For clox, we&rsquo;ll keep it simple. Just like our
disassembler, we have a single giant <code>switch</code> statement with a case for each
opcode. The body of each case implements that opcode&rsquo;s behavior.</p>
<p>So far, we handle only a single instruction, <code>OP_RETURN</code>, and the only thing it
does is exit the loop entirely. Eventually, that instruction will be used to
return from the current Lox function, but we don&rsquo;t have functions yet, so we&rsquo;ll
repurpose it temporarily to end the execution.</p>
<p>Let&rsquo;s go ahead and support our one other instruction.</p>
<div class="codehilite"><pre class="insert-before">    switch (instruction = READ_BYTE()) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_CONSTANT</span>: {
        <span class="t">Value</span> <span class="i">constant</span> = <span class="a">READ_CONSTANT</span>();
        <span class="i">printValue</span>(<span class="i">constant</span>);
        <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We don&rsquo;t have enough machinery in place yet to do anything useful with a
constant. For now, we&rsquo;ll just print it out so we interpreter hackers can see
what&rsquo;s going on inside our VM. That call to <code>printf()</code> necessitates an include.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add to top of file</div>
<pre class="insert"><span class="a">#include &lt;stdio.h&gt;</span>

</pre><pre class="insert-after">#include &quot;common.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add to top of file</div>

<p>We also have a new macro to define.</p>
<div class="codehilite"><pre class="insert-before">#define READ_BYTE() (*vm.ip++)
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#define READ_CONSTANT() (vm.chunk-&gt;constants.values[READ_BYTE()])</span>
</pre><pre class="insert-after">

  for (;;) {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p><code>READ_CONSTANT()</code> reads the next byte from the bytecode, treats the resulting
number as an index, and looks up the corresponding Value in the chunk&rsquo;s constant
table. In later chapters, we&rsquo;ll add a few more instructions with operands that
refer to constants, so we&rsquo;re setting up this helper macro now.</p>
<p>Like the previous <code>READ_BYTE</code> macro, <code>READ_CONSTANT</code> is only used inside
<code>run()</code>. To make that scoping more explicit, the macro definitions themselves
are confined to that function. We <span name="macro">define</span> them at the
beginning and<span class="em">&mdash;</span>because we care<span class="em">&mdash;</span>undefine them at the end.</p>
<div class="codehilite"><pre class="insert-before">#undef READ_BYTE
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#undef READ_CONSTANT</span>
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<aside name="macro">
<p>Undefining these macros explicitly might seem needlessly fastidious, but C tends
to punish sloppy users, and the C preprocessor doubly so.</p>
</aside>
<h3><a href="#execution-tracing" id="execution-tracing"><small>15&#8202;.&#8202;1&#8202;.&#8202;2</small>Execution tracing</a></h3>
<p>If you run clox now, it executes the chunk we hand-authored in the last chapter
and spits out <code>1.2</code> to your terminal. We can see that it&rsquo;s working, but that&rsquo;s
only because our implementation of <code>OP_CONSTANT</code> has temporary code to log the
value. Once that instruction is doing what it&rsquo;s supposed to do and plumbing that
constant along to other operations that want to consume it, the VM will become a
black box. That makes our lives as VM implementers harder.</p>
<p>To help ourselves out, now is a good time to add some diagnostic logging to the
VM like we did with chunks themselves. In fact, we&rsquo;ll even reuse the same code.
We don&rsquo;t want this logging enabled all the time<span class="em">&mdash;</span>it&rsquo;s just for us VM hackers,
not Lox users<span class="em">&mdash;</span>so first we create a flag to hide it behind.</p>
<div class="codehilite"><pre class="insert-before">#include &lt;stdint.h&gt;
</pre><div class="source-file"><em>common.h</em></div>
<pre class="insert">

<span class="a">#define DEBUG_TRACE_EXECUTION</span>
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>common.h</em></div>

<p>When this flag is defined, the VM disassembles and prints each instruction right
before executing it. Where our previous disassembler walked an entire chunk
once, statically, this disassembles instructions dynamically, on the fly.</p>
<div class="codehilite"><pre class="insert-before">  for (;;) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#ifdef DEBUG_TRACE_EXECUTION</span>
    <span class="i">disassembleInstruction</span>(<span class="i">vm</span>.<span class="i">chunk</span>,
                           (<span class="t">int</span>)(<span class="i">vm</span>.<span class="i">ip</span> - <span class="i">vm</span>.<span class="i">chunk</span>-&gt;<span class="i">code</span>));
<span class="a">#endif</span>

</pre><pre class="insert-after">    uint8_t instruction;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Since <code>disassembleInstruction()</code> takes an integer byte <em>offset</em> and we store the
current instruction reference as a direct pointer, we first do a little pointer
math to convert <code>ip</code> back to a relative offset from the beginning of the
bytecode. Then we disassemble the instruction that begins at that byte.</p>
<p>As ever, we need to bring in the declaration of the function before we can call
it.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;
</pre><div class="source-file"><em>vm.c</em></div>
<pre class="insert"><span class="a">#include &quot;debug.h&quot;</span>
</pre><pre class="insert-after">#include &quot;vm.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em></div>

<p>I know this code isn&rsquo;t super impressive so far<span class="em">&mdash;</span>it&rsquo;s literally a switch
statement wrapped in a <code>for</code> loop but, believe it or not, this is one of the two
major components of our VM. With this, we can imperatively execute instructions.
Its simplicity is a virtue<span class="em">&mdash;</span>the less work it does, the faster it can do it.
Contrast this with all of the complexity and overhead we had in jlox with the
Visitor pattern for walking the AST.</p>
<h2><a href="#a-value-stack-manipulator" id="a-value-stack-manipulator"><small>15&#8202;.&#8202;2</small>A Value Stack Manipulator</a></h2>
<p>In addition to imperative side effects, Lox has expressions that produce,
modify, and consume values. Thus, our compiled bytecode needs a way to shuttle
values around between the different instructions that need them. For example:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="n">3</span> - <span class="n">2</span>;
</pre></div>
<p>We obviously need instructions for the constants 3 and 2, the <code>print</code> statement,
and the subtraction. But how does the subtraction instruction know that 3 is
the <span name="word">minuend</span> and 2 is the subtrahend? How does the print
instruction know to print the result of that?</p>
<aside name="word">
<p>Yes, I did have to look up &ldquo;subtrahend&rdquo; and &ldquo;minuend&rdquo; in a dictionary. But
aren&rsquo;t they delightful words? &ldquo;Minuend&rdquo; sounds like a kind of Elizabethan dance
and &ldquo;subtrahend&rdquo; might be some sort of underground Paleolithic monument.</p>
</aside>
<p>To put a finer point on it, look at this thing right here:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">echo</span>(<span class="i">n</span>) {
  <span class="k">print</span> <span class="i">n</span>;
  <span class="k">return</span> <span class="i">n</span>;
}

<span class="k">print</span> <span class="i">echo</span>(<span class="i">echo</span>(<span class="n">1</span>) + <span class="i">echo</span>(<span class="n">2</span>)) + <span class="i">echo</span>(<span class="i">echo</span>(<span class="n">4</span>) + <span class="i">echo</span>(<span class="n">5</span>));
</pre></div>
<p>I wrapped each subexpression in a call to <code>echo()</code> that prints and returns its
argument. That side effect means we can see the exact order of operations.</p>
<p>Don&rsquo;t worry about the VM for a minute. Think about just the semantics of Lox
itself. The operands to an arithmetic operator obviously need to be evaluated
before we can perform the operation itself. (It&rsquo;s pretty hard to add <code>a + b</code> if
you don&rsquo;t know what <code>a</code> and <code>b</code> are.) Also, when we implemented expressions in
jlox, we <span name="undefined">decided</span> that the left operand must be
evaluated before the right.</p>
<aside name="undefined">
<p>We could have left evaluation order unspecified and let each implementation
decide. That leaves the door open for optimizing compilers to reorder arithmetic
expressions for efficiency, even in cases where the operands have visible side
effects. C and Scheme leave evaluation order unspecified. Java specifies
left-to-right evaluation like we do for Lox.</p>
<p>I think nailing down stuff like this is generally better for users. When
expressions are not evaluated in the order users intuit<span class="em">&mdash;</span>possibly in different
orders across different implementations!<span class="em">&mdash;</span>it can be a burning hellscape of
pain to figure out what&rsquo;s going on.</p>
</aside>
<p>Here is the syntax tree for the <code>print</code> statement:</p>
<p><img src="image/a-virtual-machine/ast.png" alt="The AST for the example
statement, with numbers marking the order that the nodes are evaluated." /></p>
<p>Given left-to-right evaluation, and the way the expressions are nested, any
correct Lox implementation <em>must</em> print these numbers in this order:</p>
<div class="codehilite"><pre>1  // from echo(1)
2  // from echo(2)
3  // from echo(1 + 2)
4  // from echo(4)
5  // from echo(5)
9  // from echo(4 + 5)
12 // from print 3 + 9
</pre></div>
<p>Our old jlox interpreter accomplishes this by recursively traversing the AST. It
does a postorder traversal. First it recurses down the left operand branch,
then the right operand, then finally it evaluates the node itself.</p>
<p>After evaluating the left operand, jlox needs to store that result somewhere
temporarily while it&rsquo;s busy traversing down through the right operand tree. We
use a local variable in Java for that. Our recursive tree-walk interpreter
creates a unique Java call frame for each node being evaluated, so we could have
as many of these local variables as we needed.</p>
<p>In clox, our <code>run()</code> function is not recursive<span class="em">&mdash;</span>the nested expression tree is
flattened out into a linear series of instructions. We don&rsquo;t have the luxury of
using C local variables, so how and where should we store these temporary
values? You can probably <span name="guess">guess</span> already, but I want to
really drill into this because it&rsquo;s an aspect of programming that we take for
granted, but we rarely learn <em>why</em> computers are architected this way.</p>
<aside name="guess">
<p>Hint: it&rsquo;s in the name of this section, and it&rsquo;s how Java and C manage recursive
calls to functions.</p>
</aside>
<p>Let&rsquo;s do a weird exercise. We&rsquo;ll walk through the execution of the above program
a step at a time:</p>
<p><img src="image/a-virtual-machine/bars.png" alt="The series of instructions with
bars showing which numbers need to be preserved across which instructions." /></p>
<p>On the left are the steps of code. On the right are the values we&rsquo;re tracking.
Each bar represents a number. It starts when the value is first produced<span class="em">&mdash;</span>either a constant or the result of an addition. The length of the bar tracks
when a previously produced value needs to be kept around, and it ends when that
value finally gets consumed by an operation.</p>
<p>As you step through, you see values appear and then later get eaten. The
longest-lived ones are the values produced from the left-hand side of an
addition. Those stick around while we work through the right-hand operand
expression.</p>
<p>In the above diagram, I gave each unique number its own visual column. Let&rsquo;s be
a little more parsimonious. Once a number is consumed, we allow its column to be
reused for another later value. In other words, we take all of those gaps
up there and fill them in, pushing in numbers from the right:</p>
<p><img src="image/a-virtual-machine/bars-stacked.png" alt="Like the previous
diagram, but with number bars pushed to the left, forming a stack." /></p>
<p>There&rsquo;s some interesting stuff going on here. When we shift everything over,
each number still manages to stay in a single column for its entire life. Also,
there are no gaps left. In other words, whenever a number appears earlier than
another, then it will live at least as long as that second one. The first number
to appear is the last to be consumed. Hmm<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>last-in, first-out<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>why, that&rsquo;s a
<span name="pancakes">stack</span>!</p>
<aside name="pancakes">
<p>This is also a stack:</p><img src="image/a-virtual-machine/pancakes.png" alt="A stack... of pancakes." />
</aside>
<p>In the second diagram, each time we introduce a number, we push it onto the
stack from the right. When numbers are consumed, they are always popped off from
rightmost to left.</p>
<p>Since the temporary values we need to track naturally have stack-like behavior,
our VM will use a stack to manage them. When an instruction &ldquo;produces&rdquo; a value,
it pushes it onto the stack. When it needs to consume one or more values, it
gets them by popping them off the stack.</p>
<h3><a href="#the-vms-stack" id="the-vms-stack"><small>15&#8202;.&#8202;2&#8202;.&#8202;1</small>The VM&rsquo;s Stack</a></h3>
<p>Maybe this doesn&rsquo;t seem like a revelation, but I <em>love</em> stack-based VMs. When
you first see a magic trick, it feels like something actually magical. But then
you learn how it works<span class="em">&mdash;</span>usually some mechanical gimmick or misdirection<span class="em">&mdash;</span>and
the sense of wonder evaporates. There are a <span name="wonder">couple</span> of
ideas in computer science where even after I pulled them apart and learned all
the ins and outs, some of the initial sparkle remained. Stack-based VMs are one
of those.</p>
<aside name="wonder">
<p>Heaps<span class="em">&mdash;</span><a href="https://en.wikipedia.org/wiki/Heap_(data_structure)">the data structure</a>, not <a href="https://en.wikipedia.org/wiki/Memory_management#HEAP">the memory management thing</a><span class="em">&mdash;</span>are another. And Vaughan Pratt&rsquo;s top-down operator precedence parsing scheme,
which we&rsquo;ll learn about <a href="compiling-expressions.html">in due time</a>.</p>
</aside>
<p>As you&rsquo;ll see in this chapter, executing instructions in a stack-based VM is
dead <span name="cheat">simple</span>. In later chapters, you&rsquo;ll also discover
that compiling a source language to a stack-based instruction set is a piece of
cake. And yet, this architecture is fast enough to be used by production
language implementations. It almost feels like cheating at the programming
language game.</p>
<aside name="cheat">
<p>To take a bit of the sheen off: stack-based interpreters aren&rsquo;t a silver bullet.
They&rsquo;re often <em>adequate</em>, but modern implementations of the JVM, the CLR, and
JavaScript all use sophisticated <a href="https://en.wikipedia.org/wiki/Just-in-time_compilation">just-in-time compilation</a> pipelines to
generate <em>much</em> faster native code on the fly.</p>
</aside>
<p>Alrighty, it&rsquo;s codin&rsquo; time! Here&rsquo;s the stack:</p>
<div class="codehilite"><pre class="insert-before">typedef struct {
  Chunk* chunk;
  uint8_t* ip;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">Value</span> <span class="i">stack</span>[<span class="a">STACK_MAX</span>];
  <span class="t">Value</span>* <span class="i">stackTop</span>;
</pre><pre class="insert-after">} VM;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>We implement the stack semantics ourselves on top of a raw C array. The bottom
of the stack<span class="em">&mdash;</span>the first value pushed and the last to be popped<span class="em">&mdash;</span>is at
element zero in the array, and later pushed values follow it. If we push the
letters of &ldquo;crepe&rdquo;<span class="em">&mdash;</span>my favorite stackable breakfast item<span class="em">&mdash;</span>onto the stack, in
order, the resulting C array looks like this:</p>
<p><img src="image/a-virtual-machine/array.png" alt="An array containing the
letters in 'crepe' in order starting at element 0." /></p>
<p>Since the stack grows and shrinks as values are pushed and popped, we need to
track where the top of the stack is in the array. As with <code>ip</code>, we use a direct
pointer instead of an integer index since it&rsquo;s faster to dereference the pointer
than calculate the offset from the index each time we need it.</p>
<p>The pointer points at the array element just <em>past</em> the element containing the
top value on the stack. That seems a little odd, but almost every implementation
does this. It means we can indicate that the stack is empty by pointing at
element zero in the array.</p>
<p><img src="image/a-virtual-machine/stack-empty.png" alt="An empty array with
stackTop pointing at the first element." /></p>
<p>If we pointed to the top element, then for an empty stack we&rsquo;d need to point at
element -1. That&rsquo;s <span name="defined">undefined</span> in C. As we push values
onto the stack<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<aside name="defined">
<p>What about when the stack is <em>full</em>, you ask, Clever Reader? The C standard is
one step ahead of you. It <em>is</em> allowed and well-specified to have an array
pointer that points just past the end of an array.</p>
</aside>
<p><img src="image/a-virtual-machine/stack-c.png" alt="An array with 'c' at element
zero." /></p>
<p><span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span><code>stackTop</code> always points just past the last item.</p>
<p><img src="image/a-virtual-machine/stack-crepe.png" alt="An array with 'c', 'r',
'e', 'p', and 'e' in the first five elements." /></p>
<p>I remember it like this: <code>stackTop</code> points to where the next value to be pushed
will go. The maximum number of values we can store on the stack (for now, at
least) is:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;chunk.h&quot;
</pre><div class="source-file"><em>vm.h</em></div>
<pre class="insert">

<span class="a">#define STACK_MAX 256</span>
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>vm.h</em></div>

<p>Giving our VM a fixed stack size means it&rsquo;s possible for some sequence of
instructions to push too many values and run out of stack space<span class="em">&mdash;</span>the classic
&ldquo;stack overflow&rdquo;. We could grow the stack dynamically as needed, but for now
we&rsquo;ll keep it simple. Since VM uses Value, we need to include its declaration.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;chunk.h&quot;
</pre><div class="source-file"><em>vm.h</em></div>
<pre class="insert"><span class="a">#include &quot;value.h&quot;</span>
</pre><pre class="insert-after">

#define STACK_MAX 256
</pre></div>
<div class="source-file-narrow"><em>vm.h</em></div>

<p>Now that VM has some interesting state, we get to initialize it.</p>
<div class="codehilite"><pre class="insert-before">void initVM() {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">  <span class="i">resetStack</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>That uses this helper function:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after variable <em>vm</em></div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">resetStack</span>() {
  <span class="i">vm</span>.<span class="i">stackTop</span> = <span class="i">vm</span>.<span class="i">stack</span>;
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after variable <em>vm</em></div>

<p>Since the stack array is declared directly inline in the VM struct, we don&rsquo;t
need to allocate it. We don&rsquo;t even need to clear the unused cells in the
array<span class="em">&mdash;</span>we simply won&rsquo;t access them until after values have been stored in
them. The only initialization we need is to set <code>stackTop</code> to point to the
beginning of the array to indicate that the stack is empty.</p>
<p>The stack protocol supports two operations:</p>
<div class="codehilite"><pre class="insert-before">InterpretResult interpret(Chunk* chunk);
</pre><div class="source-file"><em>vm.h</em><br>
add after <em>interpret</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">push</span>(<span class="t">Value</span> <span class="i">value</span>);
<span class="t">Value</span> <span class="i">pop</span>();
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, add after <em>interpret</em>()</div>

<p>You can push a new value onto the top of the stack, and you can pop the most
recently pushed value back off. Here&rsquo;s the first function:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>freeVM</em>()</div>
<pre><span class="t">void</span> <span class="i">push</span>(<span class="t">Value</span> <span class="i">value</span>) {
  *<span class="i">vm</span>.<span class="i">stackTop</span> = <span class="i">value</span>;
  <span class="i">vm</span>.<span class="i">stackTop</span>++;
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>freeVM</em>()</div>

<p>If you&rsquo;re rusty on your C pointer syntax and operations, this is a good warm-up.
The first line stores <code>value</code> in the array element at the top of the stack.
Remember, <code>stackTop</code> points just <em>past</em> the last used element, at the next
available one. This stores the value in that slot. Then we increment the pointer
itself to point to the next unused slot in the array now that the previous slot
is occupied.</p>
<p>Popping is the mirror image.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>push</em>()</div>
<pre><span class="t">Value</span> <span class="i">pop</span>() {
  <span class="i">vm</span>.<span class="i">stackTop</span>--;
  <span class="k">return</span> *<span class="i">vm</span>.<span class="i">stackTop</span>;
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>push</em>()</div>

<p>First, we move the stack pointer <em>back</em> to get to the most recent used slot in
the array. Then we look up the value at that index and return it. We don&rsquo;t need
to explicitly &ldquo;remove&rdquo; it from the array<span class="em">&mdash;</span>moving <code>stackTop</code> down is enough to
mark that slot as no longer in use.</p>
<h3><a href="#stack-tracing" id="stack-tracing"><small>15&#8202;.&#8202;2&#8202;.&#8202;2</small>Stack tracing</a></h3>
<p>We have a working stack, but it&rsquo;s hard to <em>see</em> that it&rsquo;s working. When we start
implementing more complex instructions and compiling and running larger pieces
of code, we&rsquo;ll end up with a lot of values crammed into that array. It would
make our lives as VM hackers easier if we had some visibility into the stack.</p>
<p>To that end, whenever we&rsquo;re tracing execution, we&rsquo;ll also show the current
contents of the stack before we interpret each instruction.</p>
<div class="codehilite"><pre class="insert-before">#ifdef DEBUG_TRACE_EXECUTION
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">    <span class="i">printf</span>(<span class="s">&quot;          &quot;</span>);
    <span class="k">for</span> (<span class="t">Value</span>* <span class="i">slot</span> = <span class="i">vm</span>.<span class="i">stack</span>; <span class="i">slot</span> &lt; <span class="i">vm</span>.<span class="i">stackTop</span>; <span class="i">slot</span>++) {
      <span class="i">printf</span>(<span class="s">&quot;[ &quot;</span>);
      <span class="i">printValue</span>(*<span class="i">slot</span>);
      <span class="i">printf</span>(<span class="s">&quot; ]&quot;</span>);
    }
    <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
</pre><pre class="insert-after">    disassembleInstruction(vm.chunk,
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We loop, printing each value in the array, starting at the first (bottom of the
stack) and ending when we reach the top. This lets us observe the effect of each
instruction on the stack. The output is pretty verbose, but it&rsquo;s useful when
we&rsquo;re surgically extracting a nasty bug from the bowels of the interpreter.</p>
<p>Stack in hand, let&rsquo;s revisit our two instructions. First up:</p>
<div class="codehilite"><pre class="insert-before">      case OP_CONSTANT: {
        Value constant = READ_CONSTANT();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 2 lines</div>
<pre class="insert">        <span class="i">push</span>(<span class="i">constant</span>);
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 2 lines</div>

<p>In the last chapter, I was hand-wavey about how the <code>OP_CONSTANT</code> instruction
&ldquo;loads&rdquo; a constant. Now that we have a stack you know what it means to actually
produce a value: it gets pushed onto the stack.</p>
<div class="codehilite"><pre class="insert-before">      case OP_RETURN: {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">        <span class="i">printValue</span>(<span class="i">pop</span>());
        <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
</pre><pre class="insert-after">        return INTERPRET_OK;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Then we make <code>OP_RETURN</code> pop the stack and print the top value before exiting.
When we add support for real functions to clox, we&rsquo;ll change this code. But, for
now, it gives us a way to get the VM executing simple instruction sequences and
displaying the result.</p>
<h2><a href="#an-arithmetic-calculator" id="an-arithmetic-calculator"><small>15&#8202;.&#8202;3</small>An Arithmetic Calculator</a></h2>
<p>The heart and soul of our VM are in place now. The bytecode loop dispatches and
executes instructions. The stack grows and shrinks as values flow through it.
The two halves work, but it&rsquo;s hard to get a feel for how cleverly they interact
with only the two rudimentary instructions we have so far. So let&rsquo;s teach our
interpreter to do arithmetic.</p>
<p>We&rsquo;ll start with the simplest arithmetic operation, unary negation.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="n">1.2</span>;
<span class="k">print</span> -<span class="i">a</span>; <span class="c">// -1.2.</span>
</pre></div>
<p>The prefix <code>-</code> operator takes one operand, the value to negate. It produces a
single result. We aren&rsquo;t fussing with a parser yet, but we can add the
bytecode instruction that the above syntax will compile to.</p>
<div class="codehilite"><pre class="insert-before">  OP_CONSTANT,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_NEGATE</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>We execute it like so:</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_NEGATE</span>:   <span class="i">push</span>(-<span class="i">pop</span>()); <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>The instruction needs a value to operate on, which it gets by popping from the
stack. It negates that, then pushes the result back on for later instructions to
use. Doesn&rsquo;t get much easier than that. We can disassemble it too.</p>
<div class="codehilite"><pre class="insert-before">    case OP_CONSTANT:
      return constantInstruction(&quot;OP_CONSTANT&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_NEGATE</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_NEGATE&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>And we can try it out in our test chunk.</p>
<div class="codehilite"><pre class="insert-before">  writeChunk(&amp;chunk, constant, 123);
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_NEGATE</span>, <span class="n">123</span>);
</pre><pre class="insert-after">

  writeChunk(&amp;chunk, OP_RETURN, 123);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>After loading the constant, but before returning, we execute the negate
instruction. That replaces the constant on the stack with its negation. Then the
return instruction prints that out:</p>
<div class="codehilite"><pre>-1.2
</pre></div>
<p>Magical!</p>
<h3><a href="#binary-operators" id="binary-operators"><small>15&#8202;.&#8202;3&#8202;.&#8202;1</small>Binary operators</a></h3>
<p>OK, unary operators aren&rsquo;t <em>that</em> impressive. We still only ever have a single
value on the stack. To really see some depth, we need binary operators. Lox has
four binary <span name="ops">arithmetic</span> operators: addition, subtraction,
multiplication, and division. We&rsquo;ll go ahead and implement them all at the same
time.</p>
<aside name="ops">
<p>Lox has some other binary operators<span class="em">&mdash;</span>comparison and equality<span class="em">&mdash;</span>but those
don&rsquo;t produce numbers as a result, so we aren&rsquo;t ready for them yet.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  OP_CONSTANT,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_ADD</span>,
  <span class="a">OP_SUBTRACT</span>,
  <span class="a">OP_MULTIPLY</span>,
  <span class="a">OP_DIVIDE</span>,
</pre><pre class="insert-after">  OP_NEGATE,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>Back in the bytecode loop, they are executed like this:</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_ADD</span>:      <span class="a">BINARY_OP</span>(+); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_SUBTRACT</span>: <span class="a">BINARY_OP</span>(-); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_MULTIPLY</span>: <span class="a">BINARY_OP</span>(*); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_DIVIDE</span>:   <span class="a">BINARY_OP</span>(/); <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_NEGATE:   push(-pop()); break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>The only difference between these four instructions is which underlying C
operator they ultimately use to combine the two operands. Surrounding that core
arithmetic expression is some boilerplate code to pull values off the stack and
push the result. When we later add dynamic typing, that boilerplate will grow.
To avoid repeating that code four times, I wrapped it up in a macro.</p>
<div class="codehilite"><pre class="insert-before">#define READ_CONSTANT() (vm.chunk-&gt;constants.values[READ_BYTE()])
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#define BINARY_OP(op) \</span>
<span class="a">    do { \</span>
<span class="a">      double b = pop(); \</span>
<span class="a">      double a = pop(); \</span>
<span class="a">      push(a op b); \</span>
<span class="a">    } while (false)</span>
</pre><pre class="insert-after">

  for (;;) {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>I admit this is a fairly <span name="operator">adventurous</span> use of the C
preprocessor. I hesitated to do this, but you&rsquo;ll be glad in later chapters when
we need to add the type checking for each operand and stuff. It would be a chore
to walk you through the same code four times.</p>
<aside name="operator">
<p>Did you even know you can pass an <em>operator</em> as an argument to a macro? Now you
do. The preprocessor doesn&rsquo;t care that operators aren&rsquo;t first class in C. As far
as it&rsquo;s concerned, it&rsquo;s all just text tokens.</p>
<p>I know, you can just <em>feel</em> the temptation to abuse this, can&rsquo;t you?</p>
</aside>
<p>If you aren&rsquo;t familiar with the trick already, that outer <code>do while</code> loop
probably looks really weird. This macro needs to expand to a series of
statements. To be careful macro authors, we want to ensure those statements all
end up in the same scope when the macro is expanded. Imagine if you defined:</p>
<div class="codehilite"><pre><span class="a">#define WAKE_UP() makeCoffee(); drinkCoffee();</span>
</pre></div>
<p>And then used it like:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">morning</span>) <span class="a">WAKE_UP</span>();
</pre></div>
<p>The intent is to execute both statements of the macro body only if <code>morning</code> is
true. But it expands to:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">morning</span>) <span class="i">makeCoffee</span>(); <span class="i">drinkCoffee</span>();;
</pre></div>
<p>Oops. The <code>if</code> attaches only to the <em>first</em> statement. You might think you could
fix this using a block.</p>
<div class="codehilite"><pre><span class="a">#define WAKE_UP() { makeCoffee(); drinkCoffee(); }</span>
</pre></div>
<p>That&rsquo;s better, but you still risk:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">morning</span>)
  <span class="a">WAKE_UP</span>();
<span class="k">else</span>
  <span class="i">sleepIn</span>();
</pre></div>
<p>Now you get a compile error on the <code>else</code> because of that trailing <code>;</code> after the
macro&rsquo;s block. Using a <code>do while</code> loop in the macro looks funny, but it gives
you a way to contain multiple statements inside a block that <em>also</em> permits a
semicolon at the end.</p>
<p>Where were we? Right, so what the body of that macro does is straightforward. A
binary operator takes two operands, so it pops twice. It performs the operation
on those two values and then pushes the result.</p>
<p>Pay close attention to the <em>order</em> of the two pops. Note that we assign the
first popped operand to <code>b</code>, not <code>a</code>. It looks backwards. When the operands
themselves are calculated, the left is evaluated first, then the right. That
means the left operand gets pushed before the right operand. So the right
operand will be on top of the stack. Thus, the first value we pop is <code>b</code>.</p>
<p>For example, if we compile <code>3 - 1</code>, the data flow between the instructions looks
like so:</p>
<p><img src="image/a-virtual-machine/reverse.png" alt="A sequence of instructions
with the stack for each showing how pushing and then popping values reverses
their order." /></p>
<p>As we did with the other macros inside <code>run()</code>, we clean up after ourselves at
the end of the function.</p>
<div class="codehilite"><pre class="insert-before">#undef READ_CONSTANT
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#undef BINARY_OP</span>
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Last is disassembler support.</p>
<div class="codehilite"><pre class="insert-before">    case OP_CONSTANT:
      return constantInstruction(&quot;OP_CONSTANT&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_ADD</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_ADD&quot;</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_SUBTRACT</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_SUBTRACT&quot;</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_MULTIPLY</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_MULTIPLY&quot;</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_DIVIDE</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_DIVIDE&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_NEGATE:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>The arithmetic instruction formats are simple, like <code>OP_RETURN</code>. Even though the
arithmetic <em>operators</em> take operands<span class="em">&mdash;</span>which are found on the stack<span class="em">&mdash;</span>the
arithmetic <em>bytecode instructions</em> do not.</p>
<p>Let&rsquo;s put some of our new instructions through their paces by evaluating a
larger expression:</p>
<p><img src="image/a-virtual-machine/chunk.png" alt="The expression being
evaluated: -((1.2 + 3.4) / 5.6)" /></p>
<p>Building on our existing example chunk, here&rsquo;s the additional instructions we
need to hand-compile that AST to bytecode.</p>
<div class="codehilite"><pre class="insert-before">  int constant = addConstant(&amp;chunk, 1.2);
  writeChunk(&amp;chunk, OP_CONSTANT, 123);
  writeChunk(&amp;chunk, constant, 123);
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">

  <span class="i">constant</span> = <span class="i">addConstant</span>(&amp;<span class="i">chunk</span>, <span class="n">3.4</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_CONSTANT</span>, <span class="n">123</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="i">constant</span>, <span class="n">123</span>);

  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_ADD</span>, <span class="n">123</span>);

  <span class="i">constant</span> = <span class="i">addConstant</span>(&amp;<span class="i">chunk</span>, <span class="n">5.6</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_CONSTANT</span>, <span class="n">123</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="i">constant</span>, <span class="n">123</span>);

  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_DIVIDE</span>, <span class="n">123</span>);
</pre><pre class="insert-after">  writeChunk(&amp;chunk, OP_NEGATE, 123);

  writeChunk(&amp;chunk, OP_RETURN, 123);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>The addition goes first. The instruction for the left constant, 1.2, is already
there, so we add another for 3.4. Then we add those two using <code>OP_ADD</code>, leaving
it on the stack. That covers the left side of the division. Next we push the
5.6, and divide the result of the addition by it. Finally, we negate the result
of that.</p>
<p>Note how the output of the <code>OP_ADD</code> implicitly flows into being an operand of
<code>OP_DIVIDE</code> without either instruction being directly coupled to each other.
That&rsquo;s the magic of the stack. It lets us freely compose instructions without
them needing any complexity or awareness of the data flow. The stack acts like a
shared workspace that they all read from and write to.</p>
<p>In this tiny example chunk, the stack still only gets two values tall, but when
we start compiling Lox source to bytecode, we&rsquo;ll have chunks that use much more
of the stack. In the meantime, try playing around with this hand-authored chunk
to calculate different nested arithmetic expressions and see how values flow
through the instructions and stack.</p>
<p>You may as well get it out of your system now. This is the last chunk we&rsquo;ll
build by hand. When we next revisit bytecode, we will be writing a compiler to
generate it for us.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>What bytecode instruction sequences would you generate for the following
expressions:</p>
<div class="codehilite"><pre><span class="n">1</span> * <span class="n">2</span> + <span class="n">3</span>
<span class="n">1</span> + <span class="n">2</span> * <span class="n">3</span>
<span class="n">3</span> - <span class="n">2</span> - <span class="n">1</span>
<span class="n">1</span> + <span class="n">2</span> * <span class="n">3</span> - <span class="n">4</span> / -<span class="n">5</span>
</pre></div>
<p>(Remember that Lox does not have a syntax for negative number literals, so
the <code>-5</code> is negating the number 5.)</p>
</li>
<li>
<p>If we really wanted a minimal instruction set, we could eliminate either
<code>OP_NEGATE</code> or <code>OP_SUBTRACT</code>. Show the bytecode instruction sequence you
would generate for:</p>
<div class="codehilite"><pre><span class="n">4</span> - <span class="n">3</span> * -<span class="n">2</span>
</pre></div>
<p>First, without using <code>OP_NEGATE</code>. Then, without using <code>OP_SUBTRACT</code>.</p>
<p>Given the above, do you think it makes sense to have both instructions? Why
or why not? Are there any other redundant instructions you would consider
including?</p>
</li>
<li>
<p>Our VM&rsquo;s stack has a fixed size, and we don&rsquo;t check if pushing a value
overflows it. This means the wrong series of instructions could cause our
interpreter to crash or go into undefined behavior. Avoid that by
dynamically growing the stack as needed.</p>
<p>What are the costs and benefits of doing so?</p>
</li>
<li>
<p>To interpret <code>OP_NEGATE</code>, we pop the operand, negate the value, and then
push the result. That&rsquo;s a simple implementation, but it increments and
decrements <code>stackTop</code> unnecessarily, since the stack ends up the same height
in the end. It might be faster to simply negate the value in place on the
stack and leave <code>stackTop</code> alone. Try that and see if you can measure a
performance difference.</p>
<p>Are there other instructions where you can do a similar optimization?</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Register-Based Bytecode</a></h2>
<p>For the remainder of this book, we&rsquo;ll meticulously implement an interpreter
around a stack-based bytecode instruction set. There&rsquo;s another family of
bytecode architectures out there<span class="em">&mdash;</span><em>register-based</em>. Despite the name, these
bytecode instructions aren&rsquo;t quite as difficult to work with as the registers in
an actual chip like <span name="x64">x64</span>. With real hardware registers,
you usually have only a handful for the entire program, so you spend a lot of
effort <a href="https://en.wikipedia.org/wiki/Register_allocation">trying to use them efficiently and shuttling stuff in and out of
them</a>.</p>
<aside name="x64">
<p>Register-based bytecode is a little closer to the <a href="https://en.wikipedia.org/wiki/Register_window"><em>register windows</em></a>
supported by SPARC chips.</p>
</aside>
<p>In a register-based VM, you still have a stack. Temporary values still get
pushed onto it and popped when no longer needed. The main difference is that
instructions can read their inputs from anywhere in the stack and can store
their outputs into specific stack slots.</p>
<p>Take this little Lox script:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
<span class="k">var</span> <span class="i">b</span> = <span class="n">2</span>;
<span class="k">var</span> <span class="i">c</span> = <span class="i">a</span> + <span class="i">b</span>;
</pre></div>
<p>In our stack-based VM, the last statement will get compiled to something like:</p>
<div class="codehilite"><pre><span class="i">load</span> &lt;<span class="i">a</span>&gt;  <span class="c">// Read local variable a and push onto stack.</span>
<span class="i">load</span> &lt;<span class="i">b</span>&gt;  <span class="c">// Read local variable b and push onto stack.</span>
<span class="i">add</span>       <span class="c">// Pop two values, add, push result.</span>
<span class="i">store</span> &lt;<span class="i">c</span>&gt; <span class="c">// Pop value and store in local variable c.</span>
</pre></div>
<p>(Don&rsquo;t worry if you don&rsquo;t fully understand the load and store instructions yet.
We&rsquo;ll go over them in much greater detail <a href="global-variables.html">when we implement
variables</a>.) We have four separate instructions. That means four
times through the bytecode interpret loop, four instructions to decode and
dispatch. It&rsquo;s at least seven bytes of code<span class="em">&mdash;</span>four for the opcodes and another
three for the operands identifying which locals to load and store. Three pushes
and three pops. A lot of work!</p>
<p>In a register-based instruction set, instructions can read from and store
directly into local variables. The bytecode for the last statement above looks
like:</p>
<div class="codehilite"><pre><span class="i">add</span> &lt;<span class="i">a</span>&gt; &lt;<span class="i">b</span>&gt; &lt;<span class="i">c</span>&gt; <span class="c">// Read values from a and b, add, store in c.</span>
</pre></div>
<p>The add instruction is bigger<span class="em">&mdash;</span>it has three instruction operands that define
where in the stack it reads its inputs from and writes the result to. But since
local variables live on the stack, it can read directly from <code>a</code> and <code>b</code> and
then store the result right into <code>c</code>.</p>
<p>There&rsquo;s only a single instruction to decode and dispatch, and the whole thing
fits in four bytes. Decoding is more complex because of the additional operands,
but it&rsquo;s still a net win. There&rsquo;s no pushing and popping or other stack
manipulation.</p>
<p>The main implementation of Lua used to be stack-based. For <span name="lua">Lua
5.0</span>, the implementers switched to a register instruction set and noted a
speed improvement. The amount of improvement, naturally, depends heavily on the
details of the language semantics, specific instruction set, and compiler
sophistication, but that should get your attention.</p>
<aside name="lua">
<p>The Lua dev team<span class="em">&mdash;</span>Roberto Ierusalimschy, Waldemar Celes, and Luiz Henrique de
Figueiredo<span class="em">&mdash;</span>wrote a <em>fantastic</em> paper on this, one of my all time favorite
computer science papers, &ldquo;<a href="https://www.lua.org/doc/jucs05.pdf">The Implementation of Lua 5.0</a>&rdquo; (PDF).</p>
</aside>
<p>That raises the obvious question of why I&rsquo;m going to spend the rest of the book
doing a stack-based bytecode. Register VMs are neat, but they are quite a bit
harder to write a compiler for. For what is likely to be your very first
compiler, I wanted to stick with an instruction set that&rsquo;s easy to generate and
easy to execute. Stack-based bytecode is marvelously simple.</p>
<p>It&rsquo;s also <em>much</em> better known in the literature and the community. Even though
you may eventually move to something more advanced, it&rsquo;s a good common ground to
share with the rest of your language hacker peers.</p>
</div>

<footer>
<a href="scanning-on-demand.html" class="next">
  Next Chapter: &ldquo;Scanning on Demand&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/acknowledgements.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Acknowledgements &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h2><small></small>Acknowledgements</h2>
<hr>

<div class="prev-next">
    <a href="dedication.html" title="Dedication" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="contents.html" title="Table of Contents" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="dedication.html" title="Dedication" class="prev">←</a>
<a href="contents.html" title="Table of Contents" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h2><small></small>Acknowledgements</h2>
<hr>

<div class="prev-next">
    <a href="dedication.html" title="Dedication" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="contents.html" title="Table of Contents" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <h1 class="part">Acknowledgements</h1>

<p>When the first copy of &ldquo;<a href="https://gameprogrammingpatterns.com/">Game Programming Patterns</a>&rdquo; sold, I guess I had
the right to call myself an author. But it took time to feel comfortable with
that label. Thank you to everyone who bought copies of my first book, and to the
publishers and translators who brought it to other languages. You gave me the
confidence to believe I could tackle a project of this scope. Well, that, and
massively underestimating what I was getting myself into, but that&rsquo;s on me.</p>
<p>A fear particular to technical writing is <em>getting stuff wrong</em>. Tests and
static analysis only get you so far. Once the code and prose is in ink on paper,
there&rsquo;s no fixing it. I am deeply grateful to the many people who filed issues
and pull requests on the <a href="https://github.com/munificent/craftinginterpreters">open source repo</a> for the book. Special thanks
go to cm1776, who filed 145 tactfully worded issues pointing out hundreds of
code errors, typos, and unclear sentences. The book is more accurate and
readable because of you all.</p>
<p>I&rsquo;m grateful to my copy editor Kari Somerton who braved a heap of computer
science jargon and an unfamilar workflow in order to fix my many grammar errors
and stylistic inconsistencies.</p>
<p>When the pandemic turned everyone&rsquo;s life upside down, a number of people reached
out to tell me that my book provided a helpful distraction. This book that I
spent six years writing forms a chapter in my own life&rsquo;s story and I&rsquo;m grateful
to the readers who contacted me and made that chapter more meaningful.</p>
<p>Finally, the deepest thanks go to my wife Megan and my daughters Lily and
Gretchen. You patiently endured the time I had to sink into the book, and my
stress while writing it. There&rsquo;s no one I&rsquo;d rather be stuck at home with.</p>

<footer>
<a href="contents.html" class="next">
  Next Part: &ldquo;Table of Contents&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/appendix-i.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Appendix I &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Appendix I<small>A1</small></a></h3>

<ul>
    <li><a href="#syntax-grammar"><small>A1.1</small> Syntax Grammar</a></li>
    <li><a href="#lexical-grammar"><small>A1.2</small> Lexical Grammar</a></li>
</ul>


<div class="prev-next">
    <a href="backmatter.html" title="Backmatter" class="left">&larr;&nbsp;Previous</a>
    <a href="backmatter.html" title="Backmatter">&uarr;&nbsp;Up</a>
    <a href="appendix-ii.html" title="Appendix II" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="backmatter.html" title="Backmatter" class="prev">←</a>
<a href="appendix-ii.html" title="Appendix II" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Appendix I<small>A1</small></a></h3>

<ul>
    <li><a href="#syntax-grammar"><small>A1.1</small> Syntax Grammar</a></li>
    <li><a href="#lexical-grammar"><small>A1.2</small> Lexical Grammar</a></li>
</ul>


<div class="prev-next">
    <a href="backmatter.html" title="Backmatter" class="left">&larr;&nbsp;Previous</a>
    <a href="backmatter.html" title="Backmatter">&uarr;&nbsp;Up</a>
    <a href="appendix-ii.html" title="Appendix II" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">A1</div>
  <h1>Appendix I</h1>

<p>Here is a complete grammar for Lox. The chapters that introduce each part of the
language include the grammar rules there, but this collects them all into one
place.</p>
<h2><a href="#syntax-grammar" id="syntax-grammar"><small>A1&#8202;.&#8202;1</small>Syntax Grammar</a></h2>
<p>The syntactic grammar is used to parse the linear sequence of tokens into the
nested syntax tree structure. It starts with the first rule that matches an
entire Lox program (or a single REPL entry).</p>
<div class="codehilite"><pre><span class="i">program</span>        → <span class="i">declaration</span>* <span class="t">EOF</span> ;
</pre></div>
<h3><a href="#declarations" id="declarations"><small>A1&#8202;.&#8202;1&#8202;.&#8202;1</small>Declarations</a></h3>
<p>A program is a series of declarations, which are the statements that bind new
identifiers or any of the other statement types.</p>
<div class="codehilite"><pre><span class="i">declaration</span>    → <span class="i">classDecl</span>
               | <span class="i">funDecl</span>
               | <span class="i">varDecl</span>
               | <span class="i">statement</span> ;

<span class="i">classDecl</span>      → <span class="s">&quot;class&quot;</span> <span class="t">IDENTIFIER</span> ( <span class="s">&quot;&lt;&quot;</span> <span class="t">IDENTIFIER</span> )?
                 <span class="s">&quot;{&quot;</span> <span class="i">function</span>* <span class="s">&quot;}&quot;</span> ;
<span class="i">funDecl</span>        → <span class="s">&quot;fun&quot;</span> <span class="i">function</span> ;
<span class="i">varDecl</span>        → <span class="s">&quot;var&quot;</span> <span class="t">IDENTIFIER</span> ( <span class="s">&quot;=&quot;</span> <span class="i">expression</span> )? <span class="s">&quot;;&quot;</span> ;
</pre></div>
<h3><a href="#statements" id="statements"><small>A1&#8202;.&#8202;1&#8202;.&#8202;2</small>Statements</a></h3>
<p>The remaining statement rules produce side effects, but do not introduce
bindings.</p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">forStmt</span>
               | <span class="i">ifStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">returnStmt</span>
               | <span class="i">whileStmt</span>
               | <span class="i">block</span> ;

<span class="i">exprStmt</span>       → <span class="i">expression</span> <span class="s">&quot;;&quot;</span> ;
<span class="i">forStmt</span>        → <span class="s">&quot;for&quot;</span> <span class="s">&quot;(&quot;</span> ( <span class="i">varDecl</span> | <span class="i">exprStmt</span> | <span class="s">&quot;;&quot;</span> )
                           <span class="i">expression</span>? <span class="s">&quot;;&quot;</span>
                           <span class="i">expression</span>? <span class="s">&quot;)&quot;</span> <span class="i">statement</span> ;
<span class="i">ifStmt</span>         → <span class="s">&quot;if&quot;</span> <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> <span class="i">statement</span>
                 ( <span class="s">&quot;else&quot;</span> <span class="i">statement</span> )? ;
<span class="i">printStmt</span>      → <span class="s">&quot;print&quot;</span> <span class="i">expression</span> <span class="s">&quot;;&quot;</span> ;
<span class="i">returnStmt</span>     → <span class="s">&quot;return&quot;</span> <span class="i">expression</span>? <span class="s">&quot;;&quot;</span> ;
<span class="i">whileStmt</span>      → <span class="s">&quot;while&quot;</span> <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> <span class="i">statement</span> ;
<span class="i">block</span>          → <span class="s">&quot;{&quot;</span> <span class="i">declaration</span>* <span class="s">&quot;}&quot;</span> ;
</pre></div>
<p>Note that <code>block</code> is a statement rule, but is also used as a nonterminal in a
couple of other rules for things like function bodies.</p>
<h3><a href="#expressions" id="expressions"><small>A1&#8202;.&#8202;1&#8202;.&#8202;3</small>Expressions</a></h3>
<p>Expressions produce values. Lox has a number of unary and binary operators with
different levels of precedence. Some grammars for languages do not directly
encode the precedence relationships and specify that elsewhere. Here, we use a
separate rule for each precedence level to make it explicit.</p>
<div class="codehilite"><pre><span class="i">expression</span>     → <span class="i">assignment</span> ;

<span class="i">assignment</span>     → ( <span class="i">call</span> <span class="s">&quot;.&quot;</span> )? <span class="t">IDENTIFIER</span> <span class="s">&quot;=&quot;</span> <span class="i">assignment</span>
               | <span class="i">logic_or</span> ;

<span class="i">logic_or</span>       → <span class="i">logic_and</span> ( <span class="s">&quot;or&quot;</span> <span class="i">logic_and</span> )* ;
<span class="i">logic_and</span>      → <span class="i">equality</span> ( <span class="s">&quot;and&quot;</span> <span class="i">equality</span> )* ;
<span class="i">equality</span>       → <span class="i">comparison</span> ( ( <span class="s">&quot;!=&quot;</span> | <span class="s">&quot;==&quot;</span> ) <span class="i">comparison</span> )* ;
<span class="i">comparison</span>     → <span class="i">term</span> ( ( <span class="s">&quot;&gt;&quot;</span> | <span class="s">&quot;&gt;=&quot;</span> | <span class="s">&quot;&lt;&quot;</span> | <span class="s">&quot;&lt;=&quot;</span> ) <span class="i">term</span> )* ;
<span class="i">term</span>           → <span class="i">factor</span> ( ( <span class="s">&quot;-&quot;</span> | <span class="s">&quot;+&quot;</span> ) <span class="i">factor</span> )* ;
<span class="i">factor</span>         → <span class="i">unary</span> ( ( <span class="s">&quot;/&quot;</span> | <span class="s">&quot;*&quot;</span> ) <span class="i">unary</span> )* ;

<span class="i">unary</span>          → ( <span class="s">&quot;!&quot;</span> | <span class="s">&quot;-&quot;</span> ) <span class="i">unary</span> | <span class="i">call</span> ;
<span class="i">call</span>           → <span class="i">primary</span> ( <span class="s">&quot;(&quot;</span> <span class="i">arguments</span>? <span class="s">&quot;)&quot;</span> | <span class="s">&quot;.&quot;</span> <span class="t">IDENTIFIER</span> )* ;
<span class="i">primary</span>        → <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span> | <span class="s">&quot;this&quot;</span>
               | <span class="t">NUMBER</span> | <span class="t">STRING</span> | <span class="t">IDENTIFIER</span> | <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span>
               | <span class="s">&quot;super&quot;</span> <span class="s">&quot;.&quot;</span> <span class="t">IDENTIFIER</span> ;
</pre></div>
<h3><a href="#utility-rules" id="utility-rules"><small>A1&#8202;.&#8202;1&#8202;.&#8202;4</small>Utility rules</a></h3>
<p>In order to keep the above rules a little cleaner, some of the grammar is
split out into a few reused helper rules.</p>
<div class="codehilite"><pre><span class="i">function</span>       → <span class="t">IDENTIFIER</span> <span class="s">&quot;(&quot;</span> <span class="i">parameters</span>? <span class="s">&quot;)&quot;</span> <span class="i">block</span> ;
<span class="i">parameters</span>     → <span class="t">IDENTIFIER</span> ( <span class="s">&quot;,&quot;</span> <span class="t">IDENTIFIER</span> )* ;
<span class="i">arguments</span>      → <span class="i">expression</span> ( <span class="s">&quot;,&quot;</span> <span class="i">expression</span> )* ;
</pre></div>
<h2><a href="#lexical-grammar" id="lexical-grammar"><small>A1&#8202;.&#8202;2</small>Lexical Grammar</a></h2>
<p>The lexical grammar is used by the scanner to group characters into tokens.
Where the syntax is <a href="https://en.wikipedia.org/wiki/Context-free_grammar">context free</a>, the lexical grammar is <a href="https://en.wikipedia.org/wiki/Regular_grammar">regular</a><span class="em">&mdash;</span>note
that there are no recursive rules.</p>
<div class="codehilite"><pre><span class="t">NUMBER</span>         → <span class="t">DIGIT</span>+ ( <span class="s">&quot;.&quot;</span> <span class="t">DIGIT</span>+ )? ;
<span class="t">STRING</span>         → <span class="s">&quot;</span><span class="e">\&quot;</span><span class="s">&quot;</span> &lt;<span class="i">any</span> <span class="i">char</span> <span class="i">except</span> <span class="s">&quot;</span><span class="e">\&quot;</span><span class="s">&quot;</span>&gt;* <span class="s">&quot;</span><span class="e">\&quot;</span><span class="s">&quot;</span> ;
<span class="t">IDENTIFIER</span>     → <span class="t">ALPHA</span> ( <span class="t">ALPHA</span> | <span class="t">DIGIT</span> )* ;
<span class="t">ALPHA</span>          → <span class="s">&quot;a&quot;</span> ... <span class="s">&quot;z&quot;</span> | <span class="s">&quot;A&quot;</span> ... <span class="s">&quot;Z&quot;</span> | <span class="s">&quot;_&quot;</span> ;
<span class="t">DIGIT</span>          → <span class="s">&quot;0&quot;</span> ... <span class="s">&quot;9&quot;</span> ;
</pre></div>

<footer>
<a href="appendix-ii.html" class="next">
  Next Chapter: &ldquo;Appendix II&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/appendix-ii.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Appendix II &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Appendix II<small>A2</small></a></h3>

<ul>
    <li><a href="#expressions"><small>A2.1</small> Expressions</a></li>
    <li><a href="#statements"><small>A2.2</small> Statements</a></li>
</ul>


<div class="prev-next">
    <a href="appendix-i.html" title="Appendix I" class="left">&larr;&nbsp;Previous</a>
    <a href="backmatter.html" title="Backmatter">&uarr;&nbsp;Up</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="appendix-i.html" title="Appendix I" class="prev">←</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Appendix II<small>A2</small></a></h3>

<ul>
    <li><a href="#expressions"><small>A2.1</small> Expressions</a></li>
    <li><a href="#statements"><small>A2.2</small> Statements</a></li>
</ul>


<div class="prev-next">
    <a href="appendix-i.html" title="Appendix I" class="left">&larr;&nbsp;Previous</a>
    <a href="backmatter.html" title="Backmatter">&uarr;&nbsp;Up</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">A2</div>
  <h1>Appendix II</h1>

<p>For your edification, here is the code produced by <a href="representing-code.html#metaprogramming-the-trees">the little script
we built</a> to automate generating the syntax tree classes for jlox.</p>
<h2><a href="#expressions" id="expressions"><small>A2&#8202;.&#8202;1</small>Expressions</a></h2>
<p>Expressions are the first syntax tree nodes we see, introduced in &ldquo;<a href="representing-code.html">Representing
Code</a>&rdquo;. The main Expr class defines the visitor
interface used to dispatch against the specific expression types, and contains
the other expression subclasses as nested classes.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.List</span>;

<span class="k">abstract</span> <span class="k">class</span> <span class="t">Expr</span> {
  <span class="k">interface</span> <span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; {
    <span class="t">R</span> <span class="i">visitAssignExpr</span>(<span class="t">Assign</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitBinaryExpr</span>(<span class="t">Binary</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitCallExpr</span>(<span class="t">Call</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitGetExpr</span>(<span class="t">Get</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitGroupingExpr</span>(<span class="t">Grouping</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitLiteralExpr</span>(<span class="t">Literal</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitLogicalExpr</span>(<span class="t">Logical</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitSetExpr</span>(<span class="t">Set</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitSuperExpr</span>(<span class="t">Super</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitThisExpr</span>(<span class="t">This</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitUnaryExpr</span>(<span class="t">Unary</span> <span class="i">expr</span>);
    <span class="t">R</span> <span class="i">visitVariableExpr</span>(<span class="t">Variable</span> <span class="i">expr</span>);
  }

  <span class="c">// Nested Expr classes here...</span>

  <span class="k">abstract</span> &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>);
}
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, create new file</div>

<h3><a href="#assign-expression" id="assign-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;1</small>Assign expression</a></h3>
<p>Variable assignment is introduced in &ldquo;<a href="statements-and-state.html#assignment">Statements and
State</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Assign</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Assign</span>(<span class="t">Token</span> <span class="i">name</span>, <span class="t">Expr</span> <span class="i">value</span>) {
      <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
      <span class="k">this</span>.<span class="i">value</span> = <span class="i">value</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitAssignExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">name</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">value</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#binary-expression" id="binary-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;2</small>Binary expression</a></h3>
<p>Binary operators are introduced in &ldquo;<a href="representing-code.html">Representing
Code</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Binary</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Binary</span>(<span class="t">Expr</span> <span class="i">left</span>, <span class="t">Token</span> <span class="i">operator</span>, <span class="t">Expr</span> <span class="i">right</span>) {
      <span class="k">this</span>.<span class="i">left</span> = <span class="i">left</span>;
      <span class="k">this</span>.<span class="i">operator</span> = <span class="i">operator</span>;
      <span class="k">this</span>.<span class="i">right</span> = <span class="i">right</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitBinaryExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">left</span>;
    <span class="k">final</span> <span class="t">Token</span> <span class="i">operator</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">right</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#call-expression" id="call-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;3</small>Call expression</a></h3>
<p>Function call expressions are introduced in
&ldquo;<a href="functions.html#function-calls">Functions</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Call</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Call</span>(<span class="t">Expr</span> <span class="i">callee</span>, <span class="t">Token</span> <span class="i">paren</span>, <span class="t">List</span>&lt;<span class="t">Expr</span>&gt; <span class="i">arguments</span>) {
      <span class="k">this</span>.<span class="i">callee</span> = <span class="i">callee</span>;
      <span class="k">this</span>.<span class="i">paren</span> = <span class="i">paren</span>;
      <span class="k">this</span>.<span class="i">arguments</span> = <span class="i">arguments</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitCallExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">callee</span>;
    <span class="k">final</span> <span class="t">Token</span> <span class="i">paren</span>;
    <span class="k">final</span> <span class="t">List</span>&lt;<span class="t">Expr</span>&gt; <span class="i">arguments</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#get-expression" id="get-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;4</small>Get expression</a></h3>
<p>Property access, or &ldquo;get&rdquo; expressions are introduced in
&ldquo;<a href="classes.html#properties-on-instances">Classes</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Get</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Get</span>(<span class="t">Expr</span> <span class="i">object</span>, <span class="t">Token</span> <span class="i">name</span>) {
      <span class="k">this</span>.<span class="i">object</span> = <span class="i">object</span>;
      <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitGetExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">object</span>;
    <span class="k">final</span> <span class="t">Token</span> <span class="i">name</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#grouping-expression" id="grouping-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;5</small>Grouping expression</a></h3>
<p>Using parentheses to group expressions is introduced in &ldquo;<a href="representing-code.html">Representing
Code</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Grouping</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Grouping</span>(<span class="t">Expr</span> <span class="i">expression</span>) {
      <span class="k">this</span>.<span class="i">expression</span> = <span class="i">expression</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitGroupingExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">expression</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#literal-expression" id="literal-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;6</small>Literal expression</a></h3>
<p>Literal value expressions are introduced in &ldquo;<a href="representing-code.html">Representing
Code</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Literal</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Literal</span>(<span class="t">Object</span> <span class="i">value</span>) {
      <span class="k">this</span>.<span class="i">value</span> = <span class="i">value</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitLiteralExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Object</span> <span class="i">value</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#logical-expression" id="logical-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;7</small>Logical expression</a></h3>
<p>The logical <code>and</code> and <code>or</code> operators are introduced in &ldquo;<a href="control-flow.html#logical-operators">Control
Flow</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Logical</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Logical</span>(<span class="t">Expr</span> <span class="i">left</span>, <span class="t">Token</span> <span class="i">operator</span>, <span class="t">Expr</span> <span class="i">right</span>) {
      <span class="k">this</span>.<span class="i">left</span> = <span class="i">left</span>;
      <span class="k">this</span>.<span class="i">operator</span> = <span class="i">operator</span>;
      <span class="k">this</span>.<span class="i">right</span> = <span class="i">right</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitLogicalExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">left</span>;
    <span class="k">final</span> <span class="t">Token</span> <span class="i">operator</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">right</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#set-expression" id="set-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;8</small>Set expression</a></h3>
<p>Property assignment, or &ldquo;set&rdquo; expressions are introduced in
&ldquo;<a href="classes.html#properties-on-instances">Classes</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Set</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Set</span>(<span class="t">Expr</span> <span class="i">object</span>, <span class="t">Token</span> <span class="i">name</span>, <span class="t">Expr</span> <span class="i">value</span>) {
      <span class="k">this</span>.<span class="i">object</span> = <span class="i">object</span>;
      <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
      <span class="k">this</span>.<span class="i">value</span> = <span class="i">value</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitSetExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">object</span>;
    <span class="k">final</span> <span class="t">Token</span> <span class="i">name</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">value</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#super-expression" id="super-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;9</small>Super expression</a></h3>
<p>The <code>super</code> expression is introduced in
&ldquo;<a href="inheritance.html#calling-superclass-methods">Inheritance</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Super</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Super</span>(<span class="t">Token</span> <span class="i">keyword</span>, <span class="t">Token</span> <span class="i">method</span>) {
      <span class="k">this</span>.<span class="i">keyword</span> = <span class="i">keyword</span>;
      <span class="k">this</span>.<span class="i">method</span> = <span class="i">method</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitSuperExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">keyword</span>;
    <span class="k">final</span> <span class="t">Token</span> <span class="i">method</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#this-expression" id="this-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;10</small>This expression</a></h3>
<p>The <code>this</code> expression is introduced in &ldquo;<a href="classes.html#this">Classes</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">This</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">This</span>(<span class="t">Token</span> <span class="i">keyword</span>) {
      <span class="k">this</span>.<span class="i">keyword</span> = <span class="i">keyword</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitThisExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">keyword</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#unary-expression" id="unary-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;11</small>Unary expression</a></h3>
<p>Unary operators are introduced in &ldquo;<a href="representing-code.html">Representing Code</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Unary</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Unary</span>(<span class="t">Token</span> <span class="i">operator</span>, <span class="t">Expr</span> <span class="i">right</span>) {
      <span class="k">this</span>.<span class="i">operator</span> = <span class="i">operator</span>;
      <span class="k">this</span>.<span class="i">right</span> = <span class="i">right</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitUnaryExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">operator</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">right</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h3><a href="#variable-expression" id="variable-expression"><small>A2&#8202;.&#8202;1&#8202;.&#8202;12</small>Variable expression</a></h3>
<p>Variable access expressions are introduced in &ldquo;<a href="statements-and-state.html#variable-syntax">Statements and
State</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Expr.java</em><br>
nest inside class <em>Expr</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Variable</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Variable</span>(<span class="t">Token</span> <span class="i">name</span>) {
      <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitVariableExpr</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">name</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Expr.java</em>, nest inside class <em>Expr</em></div>

<h2><a href="#statements" id="statements"><small>A2&#8202;.&#8202;2</small>Statements</a></h2>
<p>Statements form a second hierarchy of syntax tree nodes independent of
expressions. We add the first couple of them in &ldquo;<a href="statements-and-state.html">Statements and
State</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.List</span>;

<span class="k">abstract</span> <span class="k">class</span> <span class="t">Stmt</span> {
  <span class="k">interface</span> <span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; {
    <span class="t">R</span> <span class="i">visitBlockStmt</span>(<span class="t">Block</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitClassStmt</span>(<span class="t">Class</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitExpressionStmt</span>(<span class="t">Expression</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitFunctionStmt</span>(<span class="t">Function</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitIfStmt</span>(<span class="t">If</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitPrintStmt</span>(<span class="t">Print</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitReturnStmt</span>(<span class="t">Return</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitVarStmt</span>(<span class="t">Var</span> <span class="i">stmt</span>);
    <span class="t">R</span> <span class="i">visitWhileStmt</span>(<span class="t">While</span> <span class="i">stmt</span>);
  }

  <span class="c">// Nested Stmt classes here...</span>

  <span class="k">abstract</span> &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>);
}
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, create new file</div>

<h3><a href="#block-statement" id="block-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;1</small>Block statement</a></h3>
<p>The curly-braced block statement that defines a local scope is introduced in
&ldquo;<a href="statements-and-state.html#block-syntax-and-semantics">Statements and State</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Block</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">Block</span>(<span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span>) {
      <span class="k">this</span>.<span class="i">statements</span> = <span class="i">statements</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitBlockStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#class-statement" id="class-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;2</small>Class statement</a></h3>
<p>Class declarations are introduced in, unsurprisingly,
&ldquo;<a href="classes.html#class-declarations">Classes</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Class</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">Class</span>(<span class="t">Token</span> <span class="i">name</span>,
          <span class="t">Expr</span>.<span class="t">Variable</span> <span class="i">superclass</span>,
          <span class="t">List</span>&lt;<span class="t">Stmt</span>.<span class="t">Function</span>&gt; <span class="i">methods</span>) {
      <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
      <span class="k">this</span>.<span class="i">superclass</span> = <span class="i">superclass</span>;
      <span class="k">this</span>.<span class="i">methods</span> = <span class="i">methods</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitClassStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">name</span>;
    <span class="k">final</span> <span class="t">Expr</span>.<span class="t">Variable</span> <span class="i">superclass</span>;
    <span class="k">final</span> <span class="t">List</span>&lt;<span class="t">Stmt</span>.<span class="t">Function</span>&gt; <span class="i">methods</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#expression-statement" id="expression-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;3</small>Expression statement</a></h3>
<p>The expression statement is introduced in &ldquo;<a href="statements-and-state.html#statements">Statements and
State</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Expression</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">Expression</span>(<span class="t">Expr</span> <span class="i">expression</span>) {
      <span class="k">this</span>.<span class="i">expression</span> = <span class="i">expression</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitExpressionStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">expression</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#function-statement" id="function-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;4</small>Function statement</a></h3>
<p>Function declarations are introduced in, you guessed it,
&ldquo;<a href="functions.html#function-declarations">Functions</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Function</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">Function</span>(<span class="t">Token</span> <span class="i">name</span>, <span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">params</span>, <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">body</span>) {
      <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
      <span class="k">this</span>.<span class="i">params</span> = <span class="i">params</span>;
      <span class="k">this</span>.<span class="i">body</span> = <span class="i">body</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitFunctionStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">name</span>;
    <span class="k">final</span> <span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">params</span>;
    <span class="k">final</span> <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">body</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#if-statement" id="if-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;5</small>If statement</a></h3>
<p>The <code>if</code> statement is introduced in &ldquo;<a href="control-flow.html#conditional-execution">Control
Flow</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">If</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">If</span>(<span class="t">Expr</span> <span class="i">condition</span>, <span class="t">Stmt</span> <span class="i">thenBranch</span>, <span class="t">Stmt</span> <span class="i">elseBranch</span>) {
      <span class="k">this</span>.<span class="i">condition</span> = <span class="i">condition</span>;
      <span class="k">this</span>.<span class="i">thenBranch</span> = <span class="i">thenBranch</span>;
      <span class="k">this</span>.<span class="i">elseBranch</span> = <span class="i">elseBranch</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitIfStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">condition</span>;
    <span class="k">final</span> <span class="t">Stmt</span> <span class="i">thenBranch</span>;
    <span class="k">final</span> <span class="t">Stmt</span> <span class="i">elseBranch</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#print-statement" id="print-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;6</small>Print statement</a></h3>
<p>The <code>print</code> statement is introduced in &ldquo;<a href="statements-and-state.html#statements">Statements and
State</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Print</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">Print</span>(<span class="t">Expr</span> <span class="i">expression</span>) {
      <span class="k">this</span>.<span class="i">expression</span> = <span class="i">expression</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitPrintStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">expression</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#return-statement" id="return-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;7</small>Return statement</a></h3>
<p>You need a function to return from, so <code>return</code> statements are introduced in
&ldquo;<a href="functions.html#return-statements">Functions</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Return</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">Return</span>(<span class="t">Token</span> <span class="i">keyword</span>, <span class="t">Expr</span> <span class="i">value</span>) {
      <span class="k">this</span>.<span class="i">keyword</span> = <span class="i">keyword</span>;
      <span class="k">this</span>.<span class="i">value</span> = <span class="i">value</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitReturnStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">keyword</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">value</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#variable-statement" id="variable-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;8</small>Variable statement</a></h3>
<p>Variable declarations are introduced in &ldquo;<a href="statements-and-state.html#variable-syntax">Statements and
State</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">Var</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">Var</span>(<span class="t">Token</span> <span class="i">name</span>, <span class="t">Expr</span> <span class="i">initializer</span>) {
      <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
      <span class="k">this</span>.<span class="i">initializer</span> = <span class="i">initializer</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitVarStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Token</span> <span class="i">name</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">initializer</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>

<h3><a href="#while-statement" id="while-statement"><small>A2&#8202;.&#8202;2&#8202;.&#8202;9</small>While statement</a></h3>
<p>The <code>while</code> statement is introduced in &ldquo;<a href="control-flow.html#while-loops">Control
Flow</a>&rdquo;.</p>
<div class="codehilite"><div class="source-file"><em>lox/Stmt.java</em><br>
nest inside class <em>Stmt</em></div>
<pre>  <span class="k">static</span> <span class="k">class</span> <span class="t">While</span> <span class="k">extends</span> <span class="t">Stmt</span> {
    <span class="t">While</span>(<span class="t">Expr</span> <span class="i">condition</span>, <span class="t">Stmt</span> <span class="i">body</span>) {
      <span class="k">this</span>.<span class="i">condition</span> = <span class="i">condition</span>;
      <span class="k">this</span>.<span class="i">body</span> = <span class="i">body</span>;
    }

    <span class="a">@Override</span>
    &lt;<span class="t">R</span>&gt; <span class="t">R</span> <span class="i">accept</span>(<span class="t">Visitor</span>&lt;<span class="t">R</span>&gt; <span class="i">visitor</span>) {
      <span class="k">return</span> <span class="i">visitor</span>.<span class="i">visitWhileStmt</span>(<span class="k">this</span>);
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">condition</span>;
    <span class="k">final</span> <span class="t">Stmt</span> <span class="i">body</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Stmt.java</em>, nest inside class <em>Stmt</em></div>


<footer>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/backmatter.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Backmatter &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h2><small></small>Backmatter</h2>

<ul>
    <li><a href="appendix-i.html"><small>A1</small>Appendix I</a></li>
    <li><a href="appendix-ii.html"><small>A2</small>Appendix II</a></li>
</ul>


<div class="prev-next">
    <a href="optimization.html" title="Optimization" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="appendix-i.html" title="Appendix I" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="optimization.html" title="Optimization" class="prev">←</a>
<a href="appendix-i.html" title="Appendix I" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h2><small></small>Backmatter</h2>

<ul>
    <li><a href="appendix-i.html"><small>A1</small>Appendix I</a></li>
    <li><a href="appendix-ii.html"><small>A2</small>Appendix II</a></li>
</ul>


<div class="prev-next">
    <a href="optimization.html" title="Optimization" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="appendix-i.html" title="Appendix I" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <h1 class="part">Backmatter</h1>

<p>You&rsquo;ve reached the end of the book! There are two pieces of supplementary
material you may find helpful:</p>
<ul>
<li>
<p><strong><a href="appendix-i.html">Appendix I</a></strong> contains a complete grammar for Lox, all in one place.</p>
</li>
<li>
<p><strong><a href="appendix-ii.html">Appendix II</a></strong> shows the Java classes produced by <a href="representing-code.html#metaprogramming-the-trees">the AST generator</a>
we use for jlox.</p>
</li>
</ul>

<footer>
<a href="appendix-i.html" class="next">
  Next Chapter: &ldquo;Appendix I&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/calls-and-functions.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Calls and Functions &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Calls and Functions<small>24</small></a></h3>

<ul>
    <li><a href="#function-objects"><small>24.1</small> Function Objects</a></li>
    <li><a href="#compiling-to-function-objects"><small>24.2</small> Compiling to Function Objects</a></li>
    <li><a href="#call-frames"><small>24.3</small> Call Frames</a></li>
    <li><a href="#function-declarations"><small>24.4</small> Function Declarations</a></li>
    <li><a href="#function-calls"><small>24.5</small> Function Calls</a></li>
    <li><a href="#return-statements"><small>24.6</small> Return Statements</a></li>
    <li><a href="#native-functions"><small>24.7</small> Native Functions</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="jumping-back-and-forth.html" title="Jumping Back and Forth" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="closures.html" title="Closures" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="jumping-back-and-forth.html" title="Jumping Back and Forth" class="prev">←</a>
<a href="closures.html" title="Closures" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Calls and Functions<small>24</small></a></h3>

<ul>
    <li><a href="#function-objects"><small>24.1</small> Function Objects</a></li>
    <li><a href="#compiling-to-function-objects"><small>24.2</small> Compiling to Function Objects</a></li>
    <li><a href="#call-frames"><small>24.3</small> Call Frames</a></li>
    <li><a href="#function-declarations"><small>24.4</small> Function Declarations</a></li>
    <li><a href="#function-calls"><small>24.5</small> Function Calls</a></li>
    <li><a href="#return-statements"><small>24.6</small> Return Statements</a></li>
    <li><a href="#native-functions"><small>24.7</small> Native Functions</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="jumping-back-and-forth.html" title="Jumping Back and Forth" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="closures.html" title="Closures" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">24</div>
  <h1>Calls and Functions</h1>

<blockquote>
<p>Any problem in computer science can be solved with another level of
indirection. Except for the problem of too many layers of indirection.</p>
<p><cite>David Wheeler</cite></p>
</blockquote>
<p>This chapter is a beast. I try to break features into bite-sized pieces, but
sometimes you gotta swallow the whole <span name="eat">meal</span>. Our next
task is functions. We could start with only function declarations, but that&rsquo;s
not very useful when you can&rsquo;t call them. We could do calls, but there&rsquo;s nothing
to call. And all of the runtime support needed in the VM to support both of
those isn&rsquo;t very rewarding if it isn&rsquo;t hooked up to anything you can see. So
we&rsquo;re going to do it all. It&rsquo;s a lot, but we&rsquo;ll feel good when we&rsquo;re done.</p>
<aside name="eat">
<p>Eating<span class="em">&mdash;</span>consumption<span class="em">&mdash;</span>is a weird metaphor for a creative act. But most of the
biological processes that produce &ldquo;output&rdquo; are a little less, ahem, decorous.</p>
</aside>
<h2><a href="#function-objects" id="function-objects"><small>24&#8202;.&#8202;1</small>Function Objects</a></h2>
<p>The most interesting structural change in the VM is around the stack. We already
<em>have</em> a stack for local variables and temporaries, so we&rsquo;re partway there. But
we have no notion of a <em>call</em> stack. Before we can make much progress, we&rsquo;ll
have to fix that. But first, let&rsquo;s write some code. I always feel better once I
start moving. We can&rsquo;t do much without having some kind of representation for
functions, so we&rsquo;ll start there. From the VM&rsquo;s perspective, what is a function?</p>
<p>A function has a body that can be executed, so that means some bytecode. We
could compile the entire program and all of its function declarations into one
big monolithic Chunk. Each function would have a pointer to the first
instruction of its code inside the Chunk.</p>
<p>This is roughly how compilation to native code works where you end up with one
solid blob of machine code. But for our bytecode VM, we can do something a
little higher level. I think a cleaner model is to give each function its own
Chunk. We&rsquo;ll want some other metadata too, so let&rsquo;s go ahead and stuff it all in
a struct now.</p>
<div class="codehilite"><pre class="insert-before">  struct Obj* next;
};
</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>Obj</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">int</span> <span class="i">arity</span>;
  <span class="t">Chunk</span> <span class="i">chunk</span>;
  <span class="t">ObjString</span>* <span class="i">name</span>;
} <span class="t">ObjFunction</span>;
</pre><pre class="insert-after">

struct ObjString {
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>Obj</em></div>

<p>Functions are first class in Lox, so they need to be actual Lox objects. Thus
ObjFunction has the same Obj header that all object types share. The <code>arity</code>
field stores the number of parameters the function expects. Then, in addition to
the chunk, we store the function&rsquo;s <span name="name">name</span>. That will be
handy for reporting readable runtime errors.</p>
<aside name="name">
<p>Humans don&rsquo;t seem to find numeric bytecode offsets particularly illuminating in
crash dumps.</p>
</aside>
<p>This is the first time the &ldquo;object&rdquo; module has needed to reference Chunk, so we
get an include.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#include &quot;chunk.h&quot;</span>
</pre><pre class="insert-after">#include &quot;value.h&quot;
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>Like we did with strings, we define some accessories to make Lox functions
easier to work with in C. Sort of a poor man&rsquo;s object orientation. First, we&rsquo;ll
declare a C function to create a new Lox function.</p>
<div class="codehilite"><pre class="insert-before">  uint32_t hash;
};

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjString</em></div>
<pre class="insert"><span class="t">ObjFunction</span>* <span class="i">newFunction</span>();
</pre><pre class="insert-after">ObjString* takeString(char* chars, int length);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjString</em></div>

<p>The implementation is over here:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>allocateObject</em>()</div>
<pre><span class="t">ObjFunction</span>* <span class="i">newFunction</span>() {
  <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjFunction</span>, <span class="a">OBJ_FUNCTION</span>);
  <span class="i">function</span>-&gt;<span class="i">arity</span> = <span class="n">0</span>;
  <span class="i">function</span>-&gt;<span class="i">name</span> = <span class="a">NULL</span>;
  <span class="i">initChunk</span>(&amp;<span class="i">function</span>-&gt;<span class="i">chunk</span>);
  <span class="k">return</span> <span class="i">function</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>allocateObject</em>()</div>

<p>We use our friend <code>ALLOCATE_OBJ()</code> to allocate memory and initialize the
object&rsquo;s header so that the VM knows what type of object it is. Instead of
passing in arguments to initialize the function like we did with ObjString, we
set the function up in a sort of blank state<span class="em">&mdash;</span>zero arity, no name, and no
code. That will get filled in later after the function is created.</p>
<p>Since we have a new kind of object, we need a new object type in the enum.</p>
<div class="codehilite"><pre class="insert-before">typedef enum {
</pre><div class="source-file"><em>object.h</em><br>
in enum <em>ObjType</em></div>
<pre class="insert">  <span class="a">OBJ_FUNCTION</span>,
</pre><pre class="insert-after">  OBJ_STRING,
} ObjType;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in enum <em>ObjType</em></div>

<p>When we&rsquo;re done with a function object, we must return the bits it borrowed back
to the operating system.</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_FUNCTION</span>: {
      <span class="t">ObjFunction</span>* <span class="i">function</span> = (<span class="t">ObjFunction</span>*)<span class="i">object</span>;
      <span class="i">freeChunk</span>(&amp;<span class="i">function</span>-&gt;<span class="i">chunk</span>);
      <span class="a">FREE</span>(<span class="t">ObjFunction</span>, <span class="i">object</span>);
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_STRING: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>This switch case is <span name="free-name">responsible</span> for freeing the
ObjFunction itself as well as any other memory it owns. Functions own their
chunk, so we call Chunk&rsquo;s destructor-like function.</p>
<aside name="free-name">
<p>We don&rsquo;t need to explicitly free the function&rsquo;s name because it&rsquo;s an ObjString.
That means we can let the garbage collector manage its lifetime for us. Or, at
least, we&rsquo;ll be able to once we <a href="garbage-collection.html">implement a garbage collector</a>.</p>
</aside>
<p>Lox lets you print any object, and functions are first-class objects, so we
need to handle them too.</p>
<div class="codehilite"><pre class="insert-before">  switch (OBJ_TYPE(value)) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>printObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_FUNCTION</span>:
      <span class="i">printFunction</span>(<span class="a">AS_FUNCTION</span>(<span class="i">value</span>));
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_STRING:
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printObject</em>()</div>

<p>This calls out to:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>copyString</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">printFunction</span>(<span class="t">ObjFunction</span>* <span class="i">function</span>) {
  <span class="i">printf</span>(<span class="s">&quot;&lt;fn %s&gt;&quot;</span>, <span class="i">function</span>-&gt;<span class="i">name</span>-&gt;<span class="i">chars</span>);
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>copyString</em>()</div>

<p>Since a function knows its name, it may as well say it.</p>
<p>Finally, we have a couple of macros for converting values to functions. First,
make sure your value actually <em>is</em> a function.</p>
<div class="codehilite"><pre class="insert-before">#define OBJ_TYPE(value)        (AS_OBJ(value)-&gt;type)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)</span>
</pre><pre class="insert-after">#define IS_STRING(value)       isObjType(value, OBJ_STRING)
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>Assuming that evaluates to true, you can then safely cast the Value to an
ObjFunction pointer using this:</p>
<div class="codehilite"><pre class="insert-before">#define IS_STRING(value)       isObjType(value, OBJ_STRING)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))</span>
</pre><pre class="insert-after">#define AS_STRING(value)       ((ObjString*)AS_OBJ(value))
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>With that, our object model knows how to represent functions. I&rsquo;m feeling warmed
up now. You ready for something a little harder?</p>
<h2><a href="#compiling-to-function-objects" id="compiling-to-function-objects"><small>24&#8202;.&#8202;2</small>Compiling to Function Objects</a></h2>
<p>Right now, our compiler assumes it is always compiling to one single chunk. With
each function&rsquo;s code living in separate chunks, that gets more complex. When the
compiler reaches a function declaration, it needs to emit code into the
function&rsquo;s chunk when compiling its body. At the end of the function body, the
compiler needs to return to the previous chunk it was working with.</p>
<p>That&rsquo;s fine for code inside function bodies, but what about code that isn&rsquo;t? The
&ldquo;top level&rdquo; of a Lox program is also imperative code and we need a chunk to
compile that into. We can simplify the compiler and VM by placing that top-level
code inside an automatically defined function too. That way, the compiler is
always within some kind of function body, and the VM always runs code by
invoking a function. It&rsquo;s as if the entire program is <span
name="wrap">wrapped</span> inside an implicit <code>main()</code> function.</p>
<aside name="wrap">
<p>One semantic corner where that analogy breaks down is global variables. They
have special scoping rules different from local variables, so in that way, the
top level of a script isn&rsquo;t like a function body.</p>
</aside>
<p>Before we get to user-defined functions, then, let&rsquo;s do the reorganization to
support that implicit top-level function. It starts with the Compiler struct.
Instead of pointing directly to a Chunk that the compiler writes to, it instead
has a reference to the function object being built.</p>
<div class="codehilite"><pre class="insert-before">typedef struct {
</pre><div class="source-file"><em>compiler.c</em><br>
in struct <em>Compiler</em></div>
<pre class="insert">  <span class="t">ObjFunction</span>* <span class="i">function</span>;
  <span class="t">FunctionType</span> <span class="i">type</span>;

</pre><pre class="insert-after">  Local locals[UINT8_COUNT];
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in struct <em>Compiler</em></div>

<p>We also have a little FunctionType enum. This lets the compiler tell when it&rsquo;s
compiling top-level code versus the body of a function. Most of the compiler
doesn&rsquo;t care about this<span class="em">&mdash;</span>that&rsquo;s why it&rsquo;s a useful abstraction<span class="em">&mdash;</span>but in one or
two places the distinction is meaningful. We&rsquo;ll get to one later.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after struct <em>Local</em></div>
<pre><span class="k">typedef</span> <span class="k">enum</span> {
  <span class="a">TYPE_FUNCTION</span>,
  <span class="a">TYPE_SCRIPT</span>
} <span class="t">FunctionType</span>;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after struct <em>Local</em></div>

<p>Every place in the compiler that was writing to the Chunk now needs to go
through that <code>function</code> pointer. Fortunately, many <span
name="current">chapters</span> ago, we encapsulated access to the chunk in the
<code>currentChunk()</code> function. We only need to fix that and the rest of the compiler
is happy.</p>
<aside name="current">
<p>It&rsquo;s almost like I had a crystal ball that could see into the future and knew
we&rsquo;d need to change the code later. But, really, it&rsquo;s because I wrote all the
code for the book before any of the text.</p>
</aside>
<div class="codehilite"><pre class="insert-before">Compiler* current = NULL;
</pre><div class="source-file"><em>compiler.c</em><br>
add after variable <em>current</em><br>
replace 5 lines</div>
<pre class="insert">

<span class="k">static</span> <span class="t">Chunk</span>* <span class="i">currentChunk</span>() {
  <span class="k">return</span> &amp;<span class="i">current</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>;
}
</pre><pre class="insert-after">

static void errorAt(Token* token, const char* message) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after variable <em>current</em>, replace 5 lines</div>

<p>The current chunk is always the chunk owned by the function we&rsquo;re in the middle
of compiling. Next, we need to actually create that function. Previously, the VM
passed a Chunk to the compiler which filled it with code. Instead, the compiler
will create and return a function that contains the compiled top-level code<span class="em">&mdash;</span>which is all we support right now<span class="em">&mdash;</span>of the user&rsquo;s program.</p>
<h3><a href="#creating-functions-at-compile-time" id="creating-functions-at-compile-time"><small>24&#8202;.&#8202;2&#8202;.&#8202;1</small>Creating functions at compile time</a></h3>
<p>We start threading this through in <code>compile()</code>, which is the main entry point
into the compiler.</p>
<div class="codehilite"><pre class="insert-before">  Compiler compiler;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="i">initCompiler</span>(&amp;<span class="i">compiler</span>, <span class="a">TYPE_SCRIPT</span>);
</pre><pre class="insert-after">

  parser.hadError = false;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>(), replace 2 lines</div>

<p>There are a bunch of changes in how the compiler is initialized. First, we
initialize the new Compiler fields.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>initCompiler</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">initCompiler</span>(<span class="t">Compiler</span>* <span class="i">compiler</span>, <span class="t">FunctionType</span> <span class="i">type</span>) {
  <span class="i">compiler</span>-&gt;<span class="i">function</span> = <span class="a">NULL</span>;
  <span class="i">compiler</span>-&gt;<span class="i">type</span> = <span class="i">type</span>;
</pre><pre class="insert-after">  compiler-&gt;localCount = 0;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>initCompiler</em>(), replace 1 line</div>

<p>Then we allocate a new function object to compile into.</p>
<div class="codehilite"><pre class="insert-before">  compiler-&gt;scopeDepth = 0;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>initCompiler</em>()</div>
<pre class="insert">  <span class="i">compiler</span>-&gt;<span class="i">function</span> = <span class="i">newFunction</span>();
</pre><pre class="insert-after">  current = compiler;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>initCompiler</em>()</div>

<p><span name="null"></span></p>
<aside name="null">
<p>I know, it looks dumb to null the <code>function</code> field only to immediately assign it
a value a few lines later. More garbage collection-related paranoia.</p>
</aside>
<p>Creating an ObjFunction in the compiler might seem a little strange. A function
object is the <em>runtime</em> representation of a function, but here we are creating
it at compile time. The way to think of it is that a function is similar to a
string or number literal. It forms a bridge between the compile time and runtime
worlds. When we get to function <em>declarations</em>, those really <em>are</em> literals<span class="em">&mdash;</span>they are a notation that produces values of a built-in type. So the <span
name="closure">compiler</span> creates function objects during compilation.
Then, at runtime, they are simply invoked.</p>
<aside name="closure">
<p>We can create functions at compile time because they contain only data available
at compile time. The function&rsquo;s code, name, and arity are all fixed. When we add
closures in the <a href="closures.html">next chapter</a>, which capture variables at runtime,
the story gets more complex.</p>
</aside>
<p>Here is another strange piece of code:</p>
<div class="codehilite"><pre class="insert-before">  current = compiler;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>initCompiler</em>()</div>
<pre class="insert">

  <span class="t">Local</span>* <span class="i">local</span> = &amp;<span class="i">current</span>-&gt;<span class="i">locals</span>[<span class="i">current</span>-&gt;<span class="i">localCount</span>++];
  <span class="i">local</span>-&gt;<span class="i">depth</span> = <span class="n">0</span>;
  <span class="i">local</span>-&gt;<span class="i">name</span>.<span class="i">start</span> = <span class="s">&quot;&quot;</span>;
  <span class="i">local</span>-&gt;<span class="i">name</span>.<span class="i">length</span> = <span class="n">0</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>initCompiler</em>()</div>

<p>Remember that the compiler&rsquo;s <code>locals</code> array keeps track of which stack slots are
associated with which local variables or temporaries. From now on, the compiler
implicitly claims stack slot zero for the VM&rsquo;s own internal use. We give it an
empty name so that the user can&rsquo;t write an identifier that refers to it. I&rsquo;ll
explain what this is about when it becomes useful.</p>
<p>That&rsquo;s the initialization side. We also need a couple of changes on the other
end when we finish compiling some code.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>endCompiler</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">ObjFunction</span>* <span class="i">endCompiler</span>() {
</pre><pre class="insert-after">  emitReturn();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>endCompiler</em>(), replace 1 line</div>

<p>Previously, when <code>interpret()</code> called into the compiler, it passed in a Chunk to
be written to. Now that the compiler creates the function object itself, we
return that function. We grab it from the current compiler here:</p>
<div class="codehilite"><pre class="insert-before">  emitReturn();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>endCompiler</em>()</div>
<pre class="insert">  <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="i">current</span>-&gt;<span class="i">function</span>;

</pre><pre class="insert-after">#ifdef DEBUG_PRINT_CODE
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>endCompiler</em>()</div>

<p>And then return it to <code>compile()</code> like so:</p>
<div class="codehilite"><pre class="insert-before">#endif
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>endCompiler</em>()</div>
<pre class="insert">

  <span class="k">return</span> <span class="i">function</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>endCompiler</em>()</div>

<p>Now is a good time to make another tweak in this function. Earlier, we added
some diagnostic code to have the VM dump the disassembled bytecode so we could
debug the compiler. We should fix that to keep working now that the generated
chunk is wrapped in a function.</p>
<div class="codehilite"><pre class="insert-before">#ifdef DEBUG_PRINT_CODE
  if (!parser.hadError) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>endCompiler</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">disassembleChunk</span>(<span class="i">currentChunk</span>(), <span class="i">function</span>-&gt;<span class="i">name</span> != <span class="a">NULL</span>
        ? <span class="i">function</span>-&gt;<span class="i">name</span>-&gt;<span class="i">chars</span> : <span class="s">&quot;&lt;script&gt;&quot;</span>);
</pre><pre class="insert-after">  }
#endif
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>endCompiler</em>(), replace 1 line</div>

<p>Notice the check in here to see if the function&rsquo;s name is <code>NULL</code>? User-defined
functions have names, but the implicit function we create for the top-level code
does not, and we need to handle that gracefully even in our own diagnostic code.
Speaking of which:</p>
<div class="codehilite"><pre class="insert-before">static void printFunction(ObjFunction* function) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>printFunction</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">function</span>-&gt;<span class="i">name</span> == <span class="a">NULL</span>) {
    <span class="i">printf</span>(<span class="s">&quot;&lt;script&gt;&quot;</span>);
    <span class="k">return</span>;
  }
</pre><pre class="insert-after">  printf(&quot;&lt;fn %s&gt;&quot;, function-&gt;name-&gt;chars);
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printFunction</em>()</div>

<p>There&rsquo;s no way for a <em>user</em> to get a reference to the top-level function and try
to print it, but our <code>DEBUG_TRACE_EXECUTION</code> <span
name="debug">diagnostic</span> code that prints the entire stack can and does.</p>
<aside name="debug">
<p>It is no fun if the diagnostic code we use to find bugs itself causes the VM to
segfault!</p>
</aside>
<p>Bumping up a level to <code>compile()</code>, we adjust its signature.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;vm.h&quot;

</pre><div class="source-file"><em>compiler.h</em><br>
function <em>compile</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="t">ObjFunction</span>* <span class="i">compile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>compiler.h</em>, function <em>compile</em>(), replace 1 line</div>

<p>Instead of taking a chunk, now it returns a function. Over in the
implementation:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>compile</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="t">ObjFunction</span>* <span class="i">compile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>) {
</pre><pre class="insert-after">  initScanner(source);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>compile</em>(), replace 1 line</div>

<p>Finally we get to some actual code. We change the very end of the function to
this:</p>
<div class="codehilite"><pre class="insert-before">  while (!match(TOKEN_EOF)) {
    declaration();
  }

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="i">endCompiler</span>();
  <span class="k">return</span> <span class="i">parser</span>.<span class="i">hadError</span> ? <span class="a">NULL</span> : <span class="i">function</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>(), replace 2 lines</div>

<p>We get the function object from the compiler. If there were no compile errors,
we return it. Otherwise, we signal an error by returning <code>NULL</code>. This way, the
VM doesn&rsquo;t try to execute a function that may contain invalid bytecode.</p>
<p>Eventually, we will update <code>interpret()</code> to handle the new declaration of
<code>compile()</code>, but first we have some other changes to make.</p>
<h2><a href="#call-frames" id="call-frames"><small>24&#8202;.&#8202;3</small>Call Frames</a></h2>
<p>It&rsquo;s time for a big conceptual leap. Before we can implement function
declarations and calls, we need to get the VM ready to handle them. There are
two main problems we need to worry about:</p>
<h3><a href="#allocating-local-variables" id="allocating-local-variables"><small>24&#8202;.&#8202;3&#8202;.&#8202;1</small>Allocating local variables</a></h3>
<p>The compiler allocates stack slots for local variables. How should that work
when the set of local variables in a program is distributed across multiple
functions?</p>
<p>One option would be to keep them totally separate. Each function would get its
own dedicated set of slots in the VM stack that it would own <span
name="static">forever</span>, even when the function isn&rsquo;t being called. Each
local variable in the entire program would have a bit of memory in the VM that
it keeps to itself.</p>
<aside name="static">
<p>It&rsquo;s basically what you&rsquo;d get if you declared every local variable in a C
program using <code>static</code>.</p>
</aside>
<p>Believe it or not, early programming language implementations worked this way.
The first Fortran compilers statically allocated memory for each variable. The
obvious problem is that it&rsquo;s really inefficient. Most functions are not in the
middle of being called at any point in time, so sitting on unused memory for
them is wasteful.</p>
<p>The more fundamental problem, though, is recursion. With recursion, you can be
&ldquo;in&rdquo; multiple calls to the same function at the same time. Each needs its <span
name="fortran">own</span> memory for its local variables. In jlox, we solved
this by dynamically allocating memory for an environment each time a function
was called or a block entered. In clox, we don&rsquo;t want that kind of performance
cost on every function call.</p>
<aside name="fortran">
<p>Fortran avoided this problem by disallowing recursion entirely. Recursion was
considered an advanced, esoteric feature at the time.</p>
</aside>
<p>Instead, our solution lies somewhere between Fortran&rsquo;s static allocation and
jlox&rsquo;s dynamic approach. The value stack in the VM works on the observation that
local variables and temporaries behave in a last-in first-out fashion.
Fortunately for us, that&rsquo;s still true even when you add function calls into the
mix. Here&rsquo;s an example:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">first</span>() {
  <span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
  <span class="i">second</span>();
  <span class="k">var</span> <span class="i">b</span> = <span class="n">2</span>;
}

<span class="k">fun</span> <span class="i">second</span>() {
  <span class="k">var</span> <span class="i">c</span> = <span class="n">3</span>;
  <span class="k">var</span> <span class="i">d</span> = <span class="n">4</span>;
}

<span class="i">first</span>();
</pre></div>
<p>Step through the program and look at which variables are in memory at each point
in time:</p><img src="image/calls-and-functions/calls.png" alt="Tracing through the execution of the previous program, showing the stack of variables at each step." />
<p>As execution flows through the two calls, every local variable obeys the
principle that any variable declared after it will be discarded before the first
variable needs to be. This is true even across calls. We know we&rsquo;ll be done with
<code>c</code> and <code>d</code> before we are done with <code>a</code>. It seems we should be able to allocate
local variables on the VM&rsquo;s value stack.</p>
<p>Ideally, we still determine <em>where</em> on the stack each variable will go at
compile time. That keeps the bytecode instructions for working with variables
simple and fast. In the above example, we could <span
name="imagine">imagine</span> doing so in a straightforward way, but that
doesn&rsquo;t always work out. Consider:</p>
<aside name="imagine">
<p>I say &ldquo;imagine&rdquo; because the compiler can&rsquo;t actually figure this out. Because
functions are first class in Lox, we can&rsquo;t determine which functions call which
others at compile time.</p>
</aside>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">first</span>() {
  <span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
  <span class="i">second</span>();
  <span class="k">var</span> <span class="i">b</span> = <span class="n">2</span>;
  <span class="i">second</span>();
}

<span class="k">fun</span> <span class="i">second</span>() {
  <span class="k">var</span> <span class="i">c</span> = <span class="n">3</span>;
  <span class="k">var</span> <span class="i">d</span> = <span class="n">4</span>;
}

<span class="i">first</span>();
</pre></div>
<p>In the first call to <code>second()</code>, <code>c</code> and <code>d</code> would go into slots 1 and 2. But in
the second call, we need to have made room for <code>b</code>, so <code>c</code> and <code>d</code> need to be in
slots 2 and 3. Thus the compiler can&rsquo;t pin down an exact slot for each local
variable across function calls. But <em>within</em> a given function, the <em>relative</em>
locations of each local variable are fixed. Variable <code>d</code> is always in the slot
right after <code>c</code>. This is the key insight.</p>
<p>When a function is called, we don&rsquo;t know where the top of the stack will be
because it can be called from different contexts. But, wherever that top happens
to be, we do know where all of the function&rsquo;s local variables will be relative
to that starting point. So, like many problems, we solve our allocation problem
with a level of indirection.</p>
<p>At the beginning of each function call, the VM records the location of the first
slot where that function&rsquo;s own locals begin. The instructions for working with
local variables access them by a slot index relative to that, instead of
relative to the bottom of the stack like they do today. At compile time, we
calculate those relative slots. At runtime, we convert that relative slot to an
absolute stack index by adding the function call&rsquo;s starting slot.</p>
<p>It&rsquo;s as if the function gets a &ldquo;window&rdquo; or &ldquo;frame&rdquo; within the larger stack where
it can store its locals. The position of the <strong>call frame</strong> is determined at
runtime, but within and relative to that region, we know where to find things.</p><img src="image/calls-and-functions/window.png" alt="The stack at the two points when second() is called, with a window hovering over each one showing the pair of stack slots used by the function." />
<p>The historical name for this recorded location where the function&rsquo;s locals start
is a <strong>frame pointer</strong> because it points to the beginning of the function&rsquo;s call
frame. Sometimes you hear <strong>base pointer</strong>, because it points to the base stack
slot on top of which all of the function&rsquo;s variables live.</p>
<p>That&rsquo;s the first piece of data we need to track. Every time we call a function,
the VM determines the first stack slot where that function&rsquo;s variables begin.</p>
<h3><a href="#return-addresses" id="return-addresses"><small>24&#8202;.&#8202;3&#8202;.&#8202;2</small>Return addresses</a></h3>
<p>Right now, the VM works its way through the instruction stream by incrementing
the <code>ip</code> field. The only interesting behavior is around control flow
instructions which offset the <code>ip</code> by larger amounts. <em>Calling</em> a function is
pretty straightforward<span class="em">&mdash;</span>simply set <code>ip</code> to point to the first instruction in
that function&rsquo;s chunk. But what about when the function is done?</p>
<p>The VM needs to <span name="return">return</span> back to the chunk where the
function was called from and resume execution at the instruction immediately
after the call. Thus, for each function call, we need to track where we jump
back to when the call completes. This is called a <strong>return address</strong> because
it&rsquo;s the address of the instruction that the VM returns to after the call.</p>
<p>Again, thanks to recursion, there may be multiple return addresses for a single
function, so this is a property of each <em>invocation</em> and not the function
itself.</p>
<aside name="return">
<p>The authors of early Fortran compilers had a clever trick for implementing
return addresses. Since they <em>didn&rsquo;t</em> support recursion, any given function
needed only a single return address at any point in time. So when a function was
called at runtime, the program would <em>modify its own code</em> to change a jump
instruction at the end of the function to jump back to its caller. Sometimes the
line between genius and madness is hair thin.</p>
</aside>
<h3><a href="#the-call-stack" id="the-call-stack"><small>24&#8202;.&#8202;3&#8202;.&#8202;3</small>The call stack</a></h3>
<p>So for each live function invocation<span class="em">&mdash;</span>each call that hasn&rsquo;t returned yet<span class="em">&mdash;</span>we
need to track where on the stack that function&rsquo;s locals begin, and where the
caller should resume. We&rsquo;ll put this, along with some other stuff, in a new
struct.</p>
<div class="codehilite"><pre class="insert-before">#define STACK_MAX 256
</pre><div class="source-file"><em>vm.h</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">ObjFunction</span>* <span class="i">function</span>;
  <span class="t">uint8_t</span>* <span class="i">ip</span>;
  <span class="t">Value</span>* <span class="i">slots</span>;
} <span class="t">CallFrame</span>;
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>vm.h</em></div>

<p>A CallFrame represents a single ongoing function call. The <code>slots</code> field points
into the VM&rsquo;s value stack at the first slot that this function can use. I gave
it a plural name because<span class="em">&mdash;</span>thanks to C&rsquo;s weird &ldquo;pointers are sort of arrays&rdquo;
thing<span class="em">&mdash;</span>we&rsquo;ll treat it like an array.</p>
<p>The implementation of return addresses is a little different from what I
described above. Instead of storing the return address in the callee&rsquo;s frame,
the caller stores its own <code>ip</code>. When we return from a function, the VM will jump
to the <code>ip</code> of the caller&rsquo;s CallFrame and resume from there.</p>
<p>I also stuffed a pointer to the function being called in here. We&rsquo;ll use that to
look up constants and for a few other things.</p>
<p>Each time a function is called, we create one of these structs. We could <span
name="heap">dynamically</span> allocate them on the heap, but that&rsquo;s slow.
Function calls are a core operation, so they need to be as fast as possible.
Fortunately, we can make the same observation we made for variables: function
calls have stack semantics. If <code>first()</code> calls <code>second()</code>, the call to
<code>second()</code> will complete before <code>first()</code> does.</p>
<aside name="heap">
<p>Many Lisp implementations dynamically allocate stack frames because it
simplifies implementing <a href="https://en.wikipedia.org/wiki/Continuation">continuations</a>. If your language supports
continuations, then function calls do <em>not</em> always have stack semantics.</p>
</aside>
<p>So over in the VM, we create an array of these CallFrame structs up front and
treat it as a stack, like we do with the value array.</p>
<div class="codehilite"><pre class="insert-before">typedef struct {
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em><br>
replace 2 lines</div>
<pre class="insert">  <span class="t">CallFrame</span> <span class="i">frames</span>[<span class="a">FRAMES_MAX</span>];
  <span class="t">int</span> <span class="i">frameCount</span>;

</pre><pre class="insert-after">  Value stack[STACK_MAX];
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em>, replace 2 lines</div>

<p>This array replaces the <code>chunk</code> and <code>ip</code> fields we used to have directly in the
VM. Now each CallFrame has its own <code>ip</code> and its own pointer to the ObjFunction
that it&rsquo;s executing. From there, we can get to the function&rsquo;s chunk.</p>
<p>The new <code>frameCount</code> field in the VM stores the current height of the CallFrame
stack<span class="em">&mdash;</span>the number of ongoing function calls. To keep clox simple, the array&rsquo;s
capacity is fixed. This means, as in many language implementations, there is a
maximum call depth we can handle. For clox, it&rsquo;s defined here:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;value.h&quot;

</pre><div class="source-file"><em>vm.h</em><br>
replace 1 line</div>
<pre class="insert"><span class="a">#define FRAMES_MAX 64</span>
<span class="a">#define STACK_MAX (FRAMES_MAX * UINT8_COUNT)</span>
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, replace 1 line</div>

<p>We also redefine the value stack&rsquo;s <span name="plenty">size</span> in terms of
that to make sure we have plenty of stack slots even in very deep call trees.
When the VM starts up, the CallFrame stack is empty.</p>
<aside name="plenty">
<p>It is still possible to overflow the stack if enough function calls use enough
temporaries in addition to locals. A robust implementation would guard against
this, but I&rsquo;m trying to keep things simple.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  vm.stackTop = vm.stack;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>resetStack</em>()</div>
<pre class="insert">  <span class="i">vm</span>.<span class="i">frameCount</span> = <span class="n">0</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>resetStack</em>()</div>

<p>The &ldquo;vm.h&rdquo; header needs access to ObjFunction, so we add an include.</p>
<div class="codehilite"><pre class="insert-before">#define clox_vm_h

</pre><div class="source-file"><em>vm.h</em><br>
replace 1 line</div>
<pre class="insert"><span class="a">#include &quot;object.h&quot;</span>
</pre><pre class="insert-after">#include &quot;table.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, replace 1 line</div>

<p>Now we&rsquo;re ready to move over to the VM&rsquo;s implementation file. We&rsquo;ve got some
grunt work ahead of us. We&rsquo;ve moved <code>ip</code> out of the VM struct and into
CallFrame. We need to fix every line of code in the VM that touches <code>ip</code> to
handle that. Also, the instructions that access local variables by stack slot
need to be updated to do so relative to the current CallFrame&rsquo;s <code>slots</code> field.</p>
<p>We&rsquo;ll start at the top and plow through it.</p>
<div class="codehilite"><pre class="insert-before">static InterpretResult run() {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 4 lines</div>
<pre class="insert">  <span class="t">CallFrame</span>* <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span> - <span class="n">1</span>];

<span class="a">#define READ_BYTE() (*frame-&gt;ip++)</span>

<span class="a">#define READ_SHORT() \</span>
<span class="a">    (frame-&gt;ip += 2, \</span>
<span class="a">    (uint16_t)((frame-&gt;ip[-2] &lt;&lt; 8) | frame-&gt;ip[-1]))</span>

<span class="a">#define READ_CONSTANT() \</span>
<span class="a">    (frame-&gt;function-&gt;chunk.constants.values[READ_BYTE()])</span>

</pre><pre class="insert-after">#define READ_STRING() AS_STRING(READ_CONSTANT())
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 4 lines</div>

<p>First, we store the current topmost CallFrame in a <span
name="local">local</span> variable inside the main bytecode execution function.
Then we replace the bytecode access macros with versions that access <code>ip</code>
through that variable.</p>
<aside name="local">
<p>We could access the current frame by going through the CallFrame array every
time, but that&rsquo;s verbose. More importantly, storing the frame in a local
variable encourages the C compiler to keep that pointer in a register. That
speeds up access to the frame&rsquo;s <code>ip</code>. There&rsquo;s no <em>guarantee</em> that the compiler
will do this, but there&rsquo;s a good chance it will.</p>
</aside>
<p>Now onto each instruction that needs a little tender loving care.</p>
<div class="codehilite"><pre class="insert-before">      case OP_GET_LOCAL: {
        uint8_t slot = READ_BYTE();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">        <span class="i">push</span>(<span class="i">frame</span>-&gt;<span class="i">slots</span>[<span class="i">slot</span>]);
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 1 line</div>

<p>Previously, <code>OP_GET_LOCAL</code> read the given local slot directly from the VM&rsquo;s
stack array, which meant it indexed the slot starting from the bottom of the
stack. Now, it accesses the current frame&rsquo;s <code>slots</code> array, which means it
accesses the given numbered slot relative to the beginning of that frame.</p>
<p>Setting a local variable works the same way.</p>
<div class="codehilite"><pre class="insert-before">      case OP_SET_LOCAL: {
        uint8_t slot = READ_BYTE();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">        <span class="i">frame</span>-&gt;<span class="i">slots</span>[<span class="i">slot</span>] = <span class="i">peek</span>(<span class="n">0</span>);
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 1 line</div>

<p>The jump instructions used to modify the VM&rsquo;s <code>ip</code> field. Now, they do the same
for the current frame&rsquo;s <code>ip</code>.</p>
<div class="codehilite"><pre class="insert-before">      case OP_JUMP: {
        uint16_t offset = READ_SHORT();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">        <span class="i">frame</span>-&gt;<span class="i">ip</span> += <span class="i">offset</span>;
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 1 line</div>

<p>Same with the conditional jump:</p>
<div class="codehilite"><pre class="insert-before">      case OP_JUMP_IF_FALSE: {
        uint16_t offset = READ_SHORT();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">        <span class="k">if</span> (<span class="i">isFalsey</span>(<span class="i">peek</span>(<span class="n">0</span>))) <span class="i">frame</span>-&gt;<span class="i">ip</span> += <span class="i">offset</span>;
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 1 line</div>

<p>And our backward-jumping loop instruction:</p>
<div class="codehilite"><pre class="insert-before">      case OP_LOOP: {
        uint16_t offset = READ_SHORT();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">        <span class="i">frame</span>-&gt;<span class="i">ip</span> -= <span class="i">offset</span>;
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 1 line</div>

<p>We have some diagnostic code that prints each instruction as it executes to help
us debug our VM. That needs to work with the new structure too.</p>
<div class="codehilite"><pre class="insert-before">    printf(&quot;\n&quot;);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 2 lines</div>
<pre class="insert">    <span class="i">disassembleInstruction</span>(&amp;<span class="i">frame</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>,
        (<span class="t">int</span>)(<span class="i">frame</span>-&gt;<span class="i">ip</span> - <span class="i">frame</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">code</span>));
</pre><pre class="insert-after">#endif
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 2 lines</div>

<p>Instead of passing in the VM&rsquo;s <code>chunk</code> and <code>ip</code> fields, now we read from the
current CallFrame.</p>
<p>You know, that wasn&rsquo;t too bad, actually. Most instructions just use the macros
so didn&rsquo;t need to be touched. Next, we jump up a level to the code that calls
<code>run()</code>.</p>
<div class="codehilite"><pre class="insert-before">InterpretResult interpret(const char* source) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>interpret</em>()<br>
replace 10 lines</div>
<pre class="insert">  <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="i">compile</span>(<span class="i">source</span>);
  <span class="k">if</span> (<span class="i">function</span> == <span class="a">NULL</span>) <span class="k">return</span> <span class="a">INTERPRET_COMPILE_ERROR</span>;

  <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">function</span>));
  <span class="t">CallFrame</span>* <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span>++];
  <span class="i">frame</span>-&gt;<span class="i">function</span> = <span class="i">function</span>;
  <span class="i">frame</span>-&gt;<span class="i">ip</span> = <span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">code</span>;
  <span class="i">frame</span>-&gt;<span class="i">slots</span> = <span class="i">vm</span>.<span class="i">stack</span>;
</pre><pre class="insert-after">

  InterpretResult result = run();
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>interpret</em>(), replace 10 lines</div>

<p>We finally get to wire up our earlier compiler changes to the back-end changes
we just made. First, we pass the source code to the compiler. It returns us a
new ObjFunction containing the compiled top-level code. If we get <code>NULL</code> back,
it means there was some compile-time error which the compiler has already
reported. In that case, we bail out since we can&rsquo;t run anything.</p>
<p>Otherwise, we store the function on the stack and prepare an initial CallFrame
to execute its code. Now you can see why the compiler sets aside stack slot zero<span class="em">&mdash;</span>that stores the function being called. In the new CallFrame, we point to the
function, initialize its <code>ip</code> to point to the beginning of the function&rsquo;s
bytecode, and set up its stack window to start at the very bottom of the VM&rsquo;s
value stack.</p>
<p>This gets the interpreter ready to start executing code. After finishing, the VM
used to free the hardcoded chunk. Now that the ObjFunction owns that code, we
don&rsquo;t need to do that anymore, so the end of <code>interpret()</code> is simply this:</p>
<div class="codehilite"><pre class="insert-before">  frame-&gt;slots = vm.stack;

</pre><div class="source-file"><em>vm.c</em><br>
in <em>interpret</em>()<br>
replace 4 lines</div>
<pre class="insert">  <span class="k">return</span> <span class="i">run</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>interpret</em>(), replace 4 lines</div>

<p>The last piece of code referring to the old VM fields is <code>runtimeError()</code>. We&rsquo;ll
revisit that later in the chapter, but for now let&rsquo;s change it to this:</p>
<div class="codehilite"><pre class="insert-before">  fputs(&quot;\n&quot;, stderr);

</pre><div class="source-file"><em>vm.c</em><br>
in <em>runtimeError</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="t">CallFrame</span>* <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span> - <span class="n">1</span>];
  <span class="t">size_t</span> <span class="i">instruction</span> = <span class="i">frame</span>-&gt;<span class="i">ip</span> - <span class="i">frame</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">code</span> - <span class="n">1</span>;
  <span class="t">int</span> <span class="i">line</span> = <span class="i">frame</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">lines</span>[<span class="i">instruction</span>];
</pre><pre class="insert-after">  fprintf(stderr, &quot;[line %d] in script\n&quot;, line);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>runtimeError</em>(), replace 2 lines</div>

<p>Instead of reading the chunk and <code>ip</code> directly from the VM, it pulls those from
the topmost CallFrame on the stack. That should get the function working again
and behaving as it did before.</p>
<p>Assuming we did all of that correctly, we got clox back to a runnable
state. Fire it up and it does<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>exactly what it did before. We haven&rsquo;t added
any new features yet, so this is kind of a let down. But all of the
infrastructure is there and ready for us now. Let&rsquo;s take advantage of it.</p>
<h2><a href="#function-declarations" id="function-declarations"><small>24&#8202;.&#8202;4</small>Function Declarations</a></h2>
<p>Before we can do call expressions, we need something to call, so we&rsquo;ll do
function declarations first. The <span name="fun">fun</span> starts with a
keyword.</p>
<aside name="fun">
<p>Yes, I am going to make a dumb joke about the <code>fun</code> keyword every time it
comes up.</p>
</aside>
<div class="codehilite"><pre class="insert-before">static void declaration() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>declaration</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_FUN</span>)) {
    <span class="i">funDeclaration</span>();
  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_VAR</span>)) {
</pre><pre class="insert-after">    varDeclaration();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>declaration</em>(), replace 1 line</div>

<p>That passes control to here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>block</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">funDeclaration</span>() {
  <span class="t">uint8_t</span> <span class="i">global</span> = <span class="i">parseVariable</span>(<span class="s">&quot;Expect function name.&quot;</span>);
  <span class="i">markInitialized</span>();
  <span class="i">function</span>(<span class="a">TYPE_FUNCTION</span>);
  <span class="i">defineVariable</span>(<span class="i">global</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>block</em>()</div>

<p>Functions are first-class values, and a function declaration simply creates and
stores one in a newly declared variable. So we parse the name just like any
other variable declaration. A function declaration at the top level will bind
the function to a global variable. Inside a block or other function, a function
declaration creates a local variable.</p>
<p>In an earlier chapter, I explained how variables <a href="local-variables.html#another-scope-edge-case">get defined in two
stages</a>. This ensures you can&rsquo;t access a variable&rsquo;s value inside the
variable&rsquo;s own initializer. That would be bad because the variable doesn&rsquo;t
<em>have</em> a value yet.</p>
<p>Functions don&rsquo;t suffer from this problem. It&rsquo;s safe for a function to refer to
its own name inside its body. You can&rsquo;t <em>call</em> the function and execute the body
until after it&rsquo;s fully defined, so you&rsquo;ll never see the variable in an
uninitialized state. Practically speaking, it&rsquo;s useful to allow this in order to
support recursive local functions.</p>
<p>To make that work, we mark the function declaration&rsquo;s variable &ldquo;initialized&rdquo; as
soon as we compile the name, before we compile the body. That way the name can
be referenced inside the body without generating an error.</p>
<p>We do need one check, though.</p>
<div class="codehilite"><pre class="insert-before">static void markInitialized() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>markInitialized</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">scopeDepth</span> == <span class="n">0</span>) <span class="k">return</span>;
</pre><pre class="insert-after">  current-&gt;locals[current-&gt;localCount - 1].depth =
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>markInitialized</em>()</div>

<p>Before, we called <code>markInitialized()</code> only when we already knew we were in a
local scope. Now, a top-level function declaration will also call this function.
When that happens, there is no local variable to mark initialized<span class="em">&mdash;</span>the
function is bound to a global variable.</p>
<p>Next, we compile the function itself<span class="em">&mdash;</span>its parameter list and block body. For
that, we use a separate helper function. That helper generates code that
leaves the resulting function object on top of the stack. After that, we call
<code>defineVariable()</code> to store that function back into the variable we declared for
it.</p>
<p>I split out the code to compile the parameters and body because we&rsquo;ll reuse it
later for parsing method declarations inside classes. Let&rsquo;s build it
incrementally, starting with this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>block</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">function</span>(<span class="t">FunctionType</span> <span class="i">type</span>) {
  <span class="t">Compiler</span> <span class="i">compiler</span>;
  <span class="i">initCompiler</span>(&amp;<span class="i">compiler</span>, <span class="i">type</span>);
  <span class="i">beginScope</span>();<span name="no-end-scope"> </span>

  <span class="i">consume</span>(<span class="a">TOKEN_LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after function name.&quot;</span>);
  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after parameters.&quot;</span>);
  <span class="i">consume</span>(<span class="a">TOKEN_LEFT_BRACE</span>, <span class="s">&quot;Expect &#39;{&#39; before function body.&quot;</span>);
  <span class="i">block</span>();

  <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="i">endCompiler</span>();
  <span class="i">emitBytes</span>(<span class="a">OP_CONSTANT</span>, <span class="i">makeConstant</span>(<span class="a">OBJ_VAL</span>(<span class="i">function</span>)));
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>block</em>()</div>

<aside name="no-end-scope">
<p>This <code>beginScope()</code> doesn&rsquo;t have a corresponding <code>endScope()</code> call. Because we
end Compiler completely when we reach the end of the function body, there&rsquo;s no
need to close the lingering outermost scope.</p>
</aside>
<p>For now, we won&rsquo;t worry about parameters. We parse an empty pair of parentheses
followed by the body. The body starts with a left curly brace, which we parse
here. Then we call our existing <code>block()</code> function, which knows how to compile
the rest of a block including the closing brace.</p>
<h3><a href="#a-stack-of-compilers" id="a-stack-of-compilers"><small>24&#8202;.&#8202;4&#8202;.&#8202;1</small>A stack of compilers</a></h3>
<p>The interesting parts are the compiler stuff at the top and bottom. The Compiler
struct stores data like which slots are owned by which local variables, how many
blocks of nesting we&rsquo;re currently in, etc. All of that is specific to a single
function. But now the front end needs to handle compiling multiple functions
<span name="nested">nested</span> within each other.</p>
<aside name="nested">
<p>Remember that the compiler treats top-level code as the body of an implicit
function, so as soon as we add <em>any</em> function declarations, we&rsquo;re in a world of
nested functions.</p>
</aside>
<p>The trick for managing that is to create a separate Compiler for each function
being compiled. When we start compiling a function declaration, we create a new
Compiler on the C stack and initialize it. <code>initCompiler()</code> sets that Compiler
to be the current one. Then, as we compile the body, all of the functions that
emit bytecode write to the chunk owned by the new Compiler&rsquo;s function.</p>
<p>After we reach the end of the function&rsquo;s block body, we call <code>endCompiler()</code>.
That yields the newly compiled function object, which we store as a constant in
the <em>surrounding</em> function&rsquo;s constant table. But, wait, how do we get back to
the surrounding function? We lost it when <code>initCompiler()</code> overwrote the current
compiler pointer.</p>
<p>We fix that by treating the series of nested Compiler structs as a stack. Unlike
the Value and CallFrame stacks in the VM, we won&rsquo;t use an array. Instead, we use
a linked list. Each Compiler points back to the Compiler for the function that
encloses it, all the way back to the root Compiler for the top-level code.</p>
<div class="codehilite"><pre class="insert-before">} FunctionType;

</pre><div class="source-file"><em>compiler.c</em><br>
add after enum <em>FunctionType</em><br>
replace 1 line</div>
<pre class="insert"><span class="k">typedef</span> <span class="k">struct</span> <span class="t">Compiler</span> {
  <span class="k">struct</span> <span class="t">Compiler</span>* <span class="i">enclosing</span>;
</pre><pre class="insert-after">  ObjFunction* function;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after enum <em>FunctionType</em>, replace 1 line</div>

<p>Inside the Compiler struct, we can&rsquo;t reference the Compiler <em>typedef</em> since that
declaration hasn&rsquo;t finished yet. Instead, we give a name to the struct itself
and use that for the field&rsquo;s type. C is weird.</p>
<p>When initializing a new Compiler, we capture the about-to-no-longer-be-current
one in that pointer.</p>
<div class="codehilite"><pre class="insert-before">static void initCompiler(Compiler* compiler, FunctionType type) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>initCompiler</em>()</div>
<pre class="insert">  <span class="i">compiler</span>-&gt;<span class="i">enclosing</span> = <span class="i">current</span>;
</pre><pre class="insert-after">  compiler-&gt;function = NULL;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>initCompiler</em>()</div>

<p>Then when a Compiler finishes, it pops itself off the stack by restoring the
previous compiler to be the new current one.</p>
<div class="codehilite"><pre class="insert-before">#endif

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>endCompiler</em>()</div>
<pre class="insert">  <span class="i">current</span> = <span class="i">current</span>-&gt;<span class="i">enclosing</span>;
</pre><pre class="insert-after">  return function;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>endCompiler</em>()</div>

<p>Note that we don&rsquo;t even need to <span name="compiler">dynamically</span>
allocate the Compiler structs. Each is stored as a local variable in the C stack<span class="em">&mdash;</span>either in <code>compile()</code> or <code>function()</code>. The linked list of Compilers threads
through the C stack. The reason we can get an unbounded number of them is
because our compiler uses recursive descent, so <code>function()</code> ends up calling
itself recursively when you have nested function declarations.</p>
<aside name="compiler">
<p>Using the native stack for Compiler structs does mean our compiler has a
practical limit on how deeply nested function declarations can be. Go too far
and you could overflow the C stack. If we want the compiler to be more robust
against pathological or even malicious code<span class="em">&mdash;</span>a real concern for tools like
JavaScript VMs<span class="em">&mdash;</span>it would be good to have our compiler artificially limit the
amount of function nesting it permits.</p>
</aside>
<h3><a href="#function-parameters" id="function-parameters"><small>24&#8202;.&#8202;4&#8202;.&#8202;2</small>Function parameters</a></h3>
<p>Functions aren&rsquo;t very useful if you can&rsquo;t pass arguments to them, so let&rsquo;s do
parameters next.</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_LEFT_PAREN, &quot;Expect '(' after function name.&quot;);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>function</em>()</div>
<pre class="insert">  <span class="k">if</span> (!<span class="i">check</span>(<span class="a">TOKEN_RIGHT_PAREN</span>)) {
    <span class="k">do</span> {
      <span class="i">current</span>-&gt;<span class="i">function</span>-&gt;<span class="i">arity</span>++;
      <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">function</span>-&gt;<span class="i">arity</span> &gt; <span class="n">255</span>) {
        <span class="i">errorAtCurrent</span>(<span class="s">&quot;Can&#39;t have more than 255 parameters.&quot;</span>);
      }
      <span class="t">uint8_t</span> <span class="i">constant</span> = <span class="i">parseVariable</span>(<span class="s">&quot;Expect parameter name.&quot;</span>);
      <span class="i">defineVariable</span>(<span class="i">constant</span>);
    } <span class="k">while</span> (<span class="i">match</span>(<span class="a">TOKEN_COMMA</span>));
  }
</pre><pre class="insert-after">  consume(TOKEN_RIGHT_PAREN, &quot;Expect ')' after parameters.&quot;);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>function</em>()</div>

<p>Semantically, a parameter is simply a local variable declared in the outermost
lexical scope of the function body. We get to use the existing compiler support
for declaring named local variables to parse and compile parameters. Unlike
local variables, which have initializers, there&rsquo;s no code here to initialize the
parameter&rsquo;s value. We&rsquo;ll see how they are initialized later when we do argument
passing in function calls.</p>
<p>While we&rsquo;re at it, we note the function&rsquo;s arity by counting how many parameters
we parse. The other piece of metadata we store with a function is its name. When
compiling a function declaration, we call <code>initCompiler()</code> right after we parse
the function&rsquo;s name. That means we can grab the name right then from the
previous token.</p>
<div class="codehilite"><pre class="insert-before">  current = compiler;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>initCompiler</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">type</span> != <span class="a">TYPE_SCRIPT</span>) {
    <span class="i">current</span>-&gt;<span class="i">function</span>-&gt;<span class="i">name</span> = <span class="i">copyString</span>(<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">start</span>,
                                         <span class="i">parser</span>.<span class="i">previous</span>.<span class="i">length</span>);
  }
</pre><pre class="insert-after">

  Local* local = &amp;current-&gt;locals[current-&gt;localCount++];
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>initCompiler</em>()</div>

<p>Note that we&rsquo;re careful to create a copy of the name string. Remember, the
lexeme points directly into the original source code string. That string may get
freed once the code is finished compiling. The function object we create in the
compiler outlives the compiler and persists until runtime. So it needs its own
heap-allocated name string that it can keep around.</p>
<p>Rad. Now we can compile function declarations, like this:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">areWeHavingItYet</span>() {
  <span class="k">print</span> <span class="s">&quot;Yes we are!&quot;</span>;
}

<span class="k">print</span> <span class="i">areWeHavingItYet</span>;
</pre></div>
<p>We just can&rsquo;t do anything <span name="useful">useful</span> with them.</p>
<aside name="useful">
<p>We can print them! I guess that&rsquo;s not very useful, though.</p>
</aside>
<h2><a href="#function-calls" id="function-calls"><small>24&#8202;.&#8202;5</small>Function Calls</a></h2>
<p>By the end of this section, we&rsquo;ll start to see some interesting behavior. The
next step is calling functions. We don&rsquo;t usually think of it this way, but a
function call expression is kind of an infix <code>(</code> operator. You have a
high-precedence expression on the left for the thing being called<span class="em">&mdash;</span>usually
just a single identifier. Then the <code>(</code> in the middle, followed by the argument
expressions separated by commas, and a final <code>)</code> to wrap it up at the end.</p>
<p>That odd grammatical perspective explains how to hook the syntax into our
parsing table.</p>
<div class="codehilite"><pre class="insert-before">ParseRule rules[] = {
</pre><div class="source-file"><em>compiler.c</em><br>
add after <em>unary</em>()<br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_LEFT_PAREN</span>]    = {<span class="i">grouping</span>, <span class="i">call</span>,   <span class="a">PREC_CALL</span>},
</pre><pre class="insert-after">  [TOKEN_RIGHT_PAREN]   = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>unary</em>(), replace 1 line</div>

<p>When the parser encounters a left parenthesis following an expression, it
dispatches to a new parser function.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>binary</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">call</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
  <span class="t">uint8_t</span> <span class="i">argCount</span> = <span class="i">argumentList</span>();
  <span class="i">emitBytes</span>(<span class="a">OP_CALL</span>, <span class="i">argCount</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>binary</em>()</div>

<p>We&rsquo;ve already consumed the <code>(</code> token, so next we compile the arguments using a
separate <code>argumentList()</code> helper. That function returns the number of arguments
it compiled. Each argument expression generates code that leaves its value on
the stack in preparation for the call. After that, we emit a new <code>OP_CALL</code>
instruction to invoke the function, using the argument count as an operand.</p>
<p>We compile the arguments using this friend:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>defineVariable</em>()</div>
<pre><span class="k">static</span> <span class="t">uint8_t</span> <span class="i">argumentList</span>() {
  <span class="t">uint8_t</span> <span class="i">argCount</span> = <span class="n">0</span>;
  <span class="k">if</span> (!<span class="i">check</span>(<span class="a">TOKEN_RIGHT_PAREN</span>)) {
    <span class="k">do</span> {
      <span class="i">expression</span>();
      <span class="i">argCount</span>++;
    } <span class="k">while</span> (<span class="i">match</span>(<span class="a">TOKEN_COMMA</span>));
  }
  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after arguments.&quot;</span>);
  <span class="k">return</span> <span class="i">argCount</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>defineVariable</em>()</div>

<p>That code should look familiar from jlox. We chew through arguments as long as
we find commas after each expression. Once we run out, we consume the final
closing parenthesis and we&rsquo;re done.</p>
<p>Well, almost. Back in jlox, we added a compile-time check that you don&rsquo;t pass
more than 255 arguments to a call. At the time, I said that was because clox
would need a similar limit. Now you can see why<span class="em">&mdash;</span>since we stuff the argument
count into the bytecode as a single-byte operand, we can only go up to 255. We
need to verify that in this compiler too.</p>
<div class="codehilite"><pre class="insert-before">      expression();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>argumentList</em>()</div>
<pre class="insert">      <span class="k">if</span> (<span class="i">argCount</span> == <span class="n">255</span>) {
        <span class="i">error</span>(<span class="s">&quot;Can&#39;t have more than 255 arguments.&quot;</span>);
      }
</pre><pre class="insert-after">      argCount++;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>argumentList</em>()</div>

<p>That&rsquo;s the front end. Let&rsquo;s skip over to the back end, with a quick stop in the
middle to declare the new instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_LOOP,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_CALL</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<h3><a href="#binding-arguments-to-parameters" id="binding-arguments-to-parameters"><small>24&#8202;.&#8202;5&#8202;.&#8202;1</small>Binding arguments to parameters</a></h3>
<p>Before we get to the implementation, we should think about what the stack looks
like at the point of a call and what we need to do from there. When we reach the
call instruction, we have already executed the expression for the function being
called, followed by its arguments. Say our program looks like this:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">sum</span>(<span class="i">a</span>, <span class="i">b</span>, <span class="i">c</span>) {
  <span class="k">return</span> <span class="i">a</span> + <span class="i">b</span> + <span class="i">c</span>;
}

<span class="k">print</span> <span class="n">4</span> + <span class="i">sum</span>(<span class="n">5</span>, <span class="n">6</span>, <span class="n">7</span>);
</pre></div>
<p>If we pause the VM right on the <code>OP_CALL</code> instruction for that call to <code>sum()</code>,
the stack looks like this:</p><img src="image/calls-and-functions/argument-stack.png" alt="Stack: 4, fn sum, 5, 6, 7." />
<p>Picture this from the perspective of <code>sum()</code> itself. When the compiler compiled
<code>sum()</code>, it automatically allocated slot zero. Then, after that, it allocated
local slots for the parameters <code>a</code>, <code>b</code>, and <code>c</code>, in order. To perform a call to
<code>sum()</code>, we need a CallFrame initialized with the function being called and a
region of stack slots that it can use. Then we need to collect the arguments
passed to the function and get them into the corresponding slots for the
parameters.</p>
<p>When the VM starts executing the body of <code>sum()</code>, we want its stack window to
look like this:</p><img src="image/calls-and-functions/parameter-window.png" alt="The same stack with the sum() function's call frame window surrounding fn sum, 5, 6, and 7." />
<p>Do you notice how the argument slots that the caller sets up and the parameter
slots the callee needs are both in exactly the right order? How convenient! This
is no coincidence. When I talked about each CallFrame having its own window into
the stack, I never said those windows must be <em>disjoint</em>. There&rsquo;s nothing
preventing us from overlapping them, like this:</p><img src="image/calls-and-functions/overlapping-windows.png" alt="The same stack with the top-level call frame covering the entire stack and the sum() function's call frame window surrounding fn sum, 5, 6, and 7." />
<p><span name="lua">The</span> top of the caller&rsquo;s stack contains the function
being called followed by the arguments in order. We know the caller doesn&rsquo;t have
any other slots above those in use because any temporaries needed when
evaluating argument expressions have been discarded by now. The bottom of the
callee&rsquo;s stack overlaps so that the parameter slots exactly line up with where
the argument values already live.</p>
<aside name="lua">
<p>Different bytecode VMs and real CPU architectures have different <em>calling
conventions</em>, which is the specific mechanism they use to pass arguments, store
the return address, etc. The mechanism I use here is based on Lua&rsquo;s clean, fast
virtual machine.</p>
</aside>
<p>This means that we don&rsquo;t need to do <em>any</em> work to &ldquo;bind an argument to a
parameter&rdquo;. There&rsquo;s no copying values between slots or across environments. The
arguments are already exactly where they need to be. It&rsquo;s hard to beat that for
performance.</p>
<p>Time to implement the call instruction.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_CALL</span>: {
        <span class="t">int</span> <span class="i">argCount</span> = <span class="a">READ_BYTE</span>();
        <span class="k">if</span> (!<span class="i">callValue</span>(<span class="i">peek</span>(<span class="i">argCount</span>), <span class="i">argCount</span>)) {
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We need to know the function being called and the number of arguments passed to
it. We get the latter from the instruction&rsquo;s operand. That also tells us where
to find the function on the stack by counting past the argument slots from the
top of the stack. We hand that data off to a separate <code>callValue()</code> function. If
that returns <code>false</code>, it means the call caused some sort of runtime error. When
that happens, we abort the interpreter.</p>
<p>If <code>callValue()</code> is successful, there will be a new frame on the CallFrame stack
for the called function. The <code>run()</code> function has its own cached pointer to the
current frame, so we need to update that.</p>
<div class="codehilite"><pre class="insert-before">          return INTERPRET_RUNTIME_ERROR;
        }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">        <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span> - <span class="n">1</span>];
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Since the bytecode dispatch loop reads from that <code>frame</code> variable, when the VM
goes to execute the next instruction, it will read the <code>ip</code> from the newly
called function&rsquo;s CallFrame and jump to its code. The work for executing that
call begins here:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>peek</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">callValue</span>(<span class="t">Value</span> <span class="i">callee</span>, <span class="t">int</span> <span class="i">argCount</span>) {
  <span class="k">if</span> (<span class="a">IS_OBJ</span>(<span class="i">callee</span>)) {
    <span class="k">switch</span> (<span class="a">OBJ_TYPE</span>(<span class="i">callee</span>)) {
      <span class="k">case</span> <span class="a">OBJ_FUNCTION</span>:<span name="switch"> </span>
        <span class="k">return</span> <span class="i">call</span>(<span class="a">AS_FUNCTION</span>(<span class="i">callee</span>), <span class="i">argCount</span>);
      <span class="k">default</span>:
        <span class="k">break</span>; <span class="c">// Non-callable object type.</span>
    }
  }
  <span class="i">runtimeError</span>(<span class="s">&quot;Can only call functions and classes.&quot;</span>);
  <span class="k">return</span> <span class="k">false</span>;
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>peek</em>()</div>

<aside name="switch">
<p>Using a <code>switch</code> statement to check a single type is overkill now, but will make
sense when we add cases to handle other callable types.</p>
</aside>
<p>There&rsquo;s more going on here than just initializing a new CallFrame. Because Lox
is dynamically typed, there&rsquo;s nothing to prevent a user from writing bad code
like:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">notAFunction</span> = <span class="n">123</span>;
<span class="i">notAFunction</span>();
</pre></div>
<p>If that happens, the runtime needs to safely report an error and halt. So the
first thing we do is check the type of the value that we&rsquo;re trying to call. If
it&rsquo;s not a function, we error out. Otherwise, the actual call happens here:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>peek</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">call</span>(<span class="t">ObjFunction</span>* <span class="i">function</span>, <span class="t">int</span> <span class="i">argCount</span>) {
  <span class="t">CallFrame</span>* <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span>++];
  <span class="i">frame</span>-&gt;<span class="i">function</span> = <span class="i">function</span>;
  <span class="i">frame</span>-&gt;<span class="i">ip</span> = <span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">code</span>;
  <span class="i">frame</span>-&gt;<span class="i">slots</span> = <span class="i">vm</span>.<span class="i">stackTop</span> - <span class="i">argCount</span> - <span class="n">1</span>;
  <span class="k">return</span> <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>peek</em>()</div>

<p>This simply initializes the next CallFrame on the stack. It stores a pointer to
the function being called and points the frame&rsquo;s <code>ip</code> to the beginning of the
function&rsquo;s bytecode. Finally, it sets up the <code>slots</code> pointer to give the frame
its window into the stack. The arithmetic there ensures that the arguments
already on the stack line up with the function&rsquo;s parameters:</p><img src="image/calls-and-functions/arithmetic.png" alt="The arithmetic to calculate frame-&gt;slots from stackTop and argCount." />
<p>The funny little <code>- 1</code> is to account for stack slot zero which the compiler set
aside for when we add methods later. The parameters start at slot one so we
make the window start one slot earlier to align them with the arguments.</p>
<p>Before we move on, let&rsquo;s add the new instruction to our disassembler.</p>
<div class="codehilite"><pre class="insert-before">      return jumpInstruction(&quot;OP_LOOP&quot;, -1, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_CALL</span>:
      <span class="k">return</span> <span class="i">byteInstruction</span>(<span class="s">&quot;OP_CALL&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>And one more quick side trip. Now that we have a handy function for initiating a
CallFrame, we may as well use it to set up the first frame for executing the
top-level code.</p>
<div class="codehilite"><pre class="insert-before">  push(OBJ_VAL(function));
</pre><div class="source-file"><em>vm.c</em><br>
in <em>interpret</em>()<br>
replace 4 lines</div>
<pre class="insert">  <span class="i">call</span>(<span class="i">function</span>, <span class="n">0</span>);
</pre><pre class="insert-after">

  return run();
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>interpret</em>(), replace 4 lines</div>

<p>OK, now back to calls<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h3><a href="#runtime-error-checking" id="runtime-error-checking"><small>24&#8202;.&#8202;5&#8202;.&#8202;2</small>Runtime error checking</a></h3>
<p>The overlapping stack windows work based on the assumption that a call passes
exactly one argument for each of the function&rsquo;s parameters. But, again, because
Lox ain&rsquo;t statically typed, a foolish user could pass too many or too few
arguments. In Lox, we&rsquo;ve defined that to be a runtime error, which we report
like so:</p>
<div class="codehilite"><pre class="insert-before">static bool call(ObjFunction* function, int argCount) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>call</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">argCount</span> != <span class="i">function</span>-&gt;<span class="i">arity</span>) {
    <span class="i">runtimeError</span>(<span class="s">&quot;Expected %d arguments but got %d.&quot;</span>,
        <span class="i">function</span>-&gt;<span class="i">arity</span>, <span class="i">argCount</span>);
    <span class="k">return</span> <span class="k">false</span>;
  }

</pre><pre class="insert-after">  CallFrame* frame = &amp;vm.frames[vm.frameCount++];
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>call</em>()</div>

<p>Pretty straightforward. This is why we store the arity of each function inside
the ObjFunction for it.</p>
<p>There&rsquo;s another error we need to report that&rsquo;s less to do with the user&rsquo;s
foolishness than our own. Because the CallFrame array has a fixed size, we need
to ensure a deep call chain doesn&rsquo;t overflow it.</p>
<div class="codehilite"><pre class="insert-before">  }

</pre><div class="source-file"><em>vm.c</em><br>
in <em>call</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">vm</span>.<span class="i">frameCount</span> == <span class="a">FRAMES_MAX</span>) {
    <span class="i">runtimeError</span>(<span class="s">&quot;Stack overflow.&quot;</span>);
    <span class="k">return</span> <span class="k">false</span>;
  }

</pre><pre class="insert-after">  CallFrame* frame = &amp;vm.frames[vm.frameCount++];
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>call</em>()</div>

<p>In practice, if a program gets anywhere close to this limit, there&rsquo;s most likely
a bug in some runaway recursive code.</p>
<h3><a href="#printing-stack-traces" id="printing-stack-traces"><small>24&#8202;.&#8202;5&#8202;.&#8202;3</small>Printing stack traces</a></h3>
<p>While we&rsquo;re on the subject of runtime errors, let&rsquo;s spend a little time making
them more useful. Stopping on a runtime error is important to prevent the VM
from crashing and burning in some ill-defined way. But simply aborting doesn&rsquo;t
help the user fix their code that <em>caused</em> that error.</p>
<p>The classic tool to aid debugging runtime failures is a <strong>stack trace</strong><span class="em">&mdash;</span>a
print out of each function that was still executing when the program died, and
where the execution was at the point that it died. Now that we have a call stack
and we&rsquo;ve conveniently stored each function&rsquo;s name, we can show that entire
stack when a runtime error disrupts the harmony of the user&rsquo;s existence. It
looks like this:</p>
<div class="codehilite"><pre class="insert-before">  fputs(&quot;\n&quot;, stderr);

</pre><div class="source-file"><em>vm.c</em><br>
in <em>runtimeError</em>()<br>
replace 4 lines</div>
<pre class="insert">  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="i">vm</span>.<span class="i">frameCount</span> - <span class="n">1</span>; <span class="i">i</span> &gt;= <span class="n">0</span>; <span class="i">i</span>--) {
    <span class="t">CallFrame</span>* <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">i</span>];
    <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="i">frame</span>-&gt;<span class="i">function</span>;
    <span class="t">size_t</span> <span class="i">instruction</span> = <span class="i">frame</span>-&gt;<span class="i">ip</span> - <span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">code</span> - <span class="n">1</span>;
    <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;[line %d] in &quot;</span>,<span name="minus"> </span>
            <span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">lines</span>[<span class="i">instruction</span>]);
    <span class="k">if</span> (<span class="i">function</span>-&gt;<span class="i">name</span> == <span class="a">NULL</span>) {
      <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;script</span><span class="e">\n</span><span class="s">&quot;</span>);
    } <span class="k">else</span> {
      <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;%s()</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">function</span>-&gt;<span class="i">name</span>-&gt;<span class="i">chars</span>);
    }
  }

</pre><pre class="insert-after">  resetStack();
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>runtimeError</em>(), replace 4 lines</div>

<aside name="minus">
<p>The <code>- 1</code> is because the IP is already sitting on the next instruction to be
executed but we want the stack trace to point to the previous failed
instruction.</p>
</aside>
<p>After printing the error message itself, we walk the call stack from <span
name="top">top</span> (the most recently called function) to bottom (the
top-level code). For each frame, we find the line number that corresponds to the
current <code>ip</code> inside that frame&rsquo;s function. Then we print that line number along
with the function name.</p>
<aside name="top">
<p>There is some disagreement on which order stack frames should be shown in a
trace. Most put the innermost function as the first line and work their way
towards the bottom of the stack. Python prints them out in the opposite order.
So reading from top to bottom tells you how your program got to where it is, and
the last line is where the error actually occurred.</p>
<p>There&rsquo;s a logic to that style. It ensures you can always see the innermost
function even if the stack trace is too long to fit on one screen. On the other
hand, the &ldquo;<a href="https://en.wikipedia.org/wiki/Inverted_pyramid_(journalism)">inverted pyramid</a>&rdquo; from journalism tells us we should put the most
important information <em>first</em> in a block of text. In a stack trace, that&rsquo;s the
function where the error actually occurred. Most other language implementations
do that.</p>
</aside>
<p>For example, if you run this broken program:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">a</span>() { <span class="i">b</span>(); }
<span class="k">fun</span> <span class="i">b</span>() { <span class="i">c</span>(); }
<span class="k">fun</span> <span class="i">c</span>() {
  <span class="i">c</span>(<span class="s">&quot;too&quot;</span>, <span class="s">&quot;many&quot;</span>);
}

<span class="i">a</span>();
</pre></div>
<p>It prints out:</p>
<div class="codehilite"><pre>Expected 0 arguments but got 2.
[line 4] in c()
[line 2] in b()
[line 1] in a()
[line 7] in script
</pre></div>
<p>That doesn&rsquo;t look too bad, does it?</p>
<h3><a href="#returning-from-functions" id="returning-from-functions"><small>24&#8202;.&#8202;5&#8202;.&#8202;4</small>Returning from functions</a></h3>
<p>We&rsquo;re getting close. We can call functions, and the VM will execute them. But we
can&rsquo;t <em>return</em> from them yet. We&rsquo;ve had an <code>OP_RETURN</code> instruction for quite
some time, but it&rsquo;s always had some kind of temporary code hanging out in it
just to get us out of the bytecode loop. The time has arrived for a real
implementation.</p>
<div class="codehilite"><pre class="insert-before">      case OP_RETURN: {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 2 lines</div>
<pre class="insert">        <span class="t">Value</span> <span class="i">result</span> = <span class="i">pop</span>();
        <span class="i">vm</span>.<span class="i">frameCount</span>--;
        <span class="k">if</span> (<span class="i">vm</span>.<span class="i">frameCount</span> == <span class="n">0</span>) {
          <span class="i">pop</span>();
          <span class="k">return</span> <span class="a">INTERPRET_OK</span>;
        }

        <span class="i">vm</span>.<span class="i">stackTop</span> = <span class="i">frame</span>-&gt;<span class="i">slots</span>;
        <span class="i">push</span>(<span class="i">result</span>);
        <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span> - <span class="n">1</span>];
        <span class="k">break</span>;
</pre><pre class="insert-after">      }
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 2 lines</div>

<p>When a function returns a value, that value will be on top of the stack. We&rsquo;re
about to discard the called function&rsquo;s entire stack window, so we pop that
return value off and hang on to it. Then we discard the CallFrame for the
returning function. If that was the very last CallFrame, it means we&rsquo;ve finished
executing the top-level code. The entire program is done, so we pop the main
script function from the stack and then exit the interpreter.</p>
<p>Otherwise, we discard all of the slots the callee was using for its parameters
and local variables. That includes the same slots the caller used to pass the
arguments. Now that the call is done, the caller doesn&rsquo;t need them anymore. This
means the top of the stack ends up right at the beginning of the returning
function&rsquo;s stack window.</p>
<p>We push the return value back onto the stack at that new, lower location. Then
we update the <code>run()</code> function&rsquo;s cached pointer to the current frame. Just like
when we began a call, on the next iteration of the bytecode dispatch loop, the
VM will read <code>ip</code> from that frame, and execution will jump back to the caller,
right where it left off, immediately after the <code>OP_CALL</code> instruction.</p><img src="image/calls-and-functions/return.png" alt="Each step of the return process: popping the return value, discarding the call frame, pushing the return value." />
<p>Note that we assume here that the function <em>did</em> actually return a value, but
a function can implicitly return by reaching the end of its body:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">noReturn</span>() {
  <span class="k">print</span> <span class="s">&quot;Do stuff&quot;</span>;
  <span class="c">// No return here.</span>
}

<span class="k">print</span> <span class="i">noReturn</span>(); <span class="c">// ???</span>
</pre></div>
<p>We need to handle that correctly too. The language is specified to implicitly
return <code>nil</code> in that case. To make that happen, we add this:</p>
<div class="codehilite"><pre class="insert-before">static void emitReturn() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>emitReturn</em>()</div>
<pre class="insert">  <span class="i">emitByte</span>(<span class="a">OP_NIL</span>);
</pre><pre class="insert-after">  emitByte(OP_RETURN);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>emitReturn</em>()</div>

<p>The compiler calls <code>emitReturn()</code> to write the <code>OP_RETURN</code> instruction at the
end of a function body. Now, before that, it emits an instruction to push <code>nil</code>
onto the stack. And with that, we have working function calls! They can even
take parameters! It almost looks like we know what we&rsquo;re doing here.</p>
<h2><a href="#return-statements" id="return-statements"><small>24&#8202;.&#8202;6</small>Return Statements</a></h2>
<p>If you want a function that returns something other than the implicit <code>nil</code>, you
need a <code>return</code> statement. Let&rsquo;s get that working.</p>
<div class="codehilite"><pre class="insert-before">    ifStatement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>statement</em>()</div>
<pre class="insert">  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_RETURN</span>)) {
    <span class="i">returnStatement</span>();
</pre><pre class="insert-after">  } else if (match(TOKEN_WHILE)) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>statement</em>()</div>

<p>When the compiler sees a <code>return</code> keyword, it goes here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>printStatement</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">returnStatement</span>() {
  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_SEMICOLON</span>)) {
    <span class="i">emitReturn</span>();
  } <span class="k">else</span> {
    <span class="i">expression</span>();
    <span class="i">consume</span>(<span class="a">TOKEN_SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after return value.&quot;</span>);
    <span class="i">emitByte</span>(<span class="a">OP_RETURN</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>printStatement</em>()</div>

<p>The return value expression is optional, so the parser looks for a semicolon
token to tell if a value was provided. If there is no return value, the
statement implicitly returns <code>nil</code>. We implement that by calling <code>emitReturn()</code>,
which emits an <code>OP_NIL</code> instruction. Otherwise, we compile the return value
expression and return it with an <code>OP_RETURN</code> instruction.</p>
<p>This is the same <code>OP_RETURN</code> instruction we&rsquo;ve already implemented<span class="em">&mdash;</span>we don&rsquo;t
need any new runtime code. This is quite a difference from jlox. There, we had
to use exceptions to unwind the stack when a <code>return</code> statement was executed.
That was because you could return from deep inside some nested blocks. Since
jlox recursively walks the AST, that meant there were a bunch of Java method
calls we needed to escape out of.</p>
<p>Our bytecode compiler flattens that all out. We do recursive descent during
parsing, but at runtime, the VM&rsquo;s bytecode dispatch loop is completely flat.
There is no recursion going on at the C level at all. So returning, even from
within some nested blocks, is as straightforward as returning from the end of
the function&rsquo;s body.</p>
<p>We&rsquo;re not totally done, though. The new <code>return</code> statement gives us a new
compile error to worry about. Returns are useful for returning from functions
but the top level of a Lox program is imperative code too. You shouldn&rsquo;t be able
to <span name="worst">return</span> from there.</p>
<div class="codehilite"><pre><span class="k">return</span> <span class="s">&quot;What?!&quot;</span>;
</pre></div>
<aside name="worst">
<p>Allowing <code>return</code> at the top level isn&rsquo;t the worst idea in the world. It would
give you a natural way to terminate a script early. You could maybe even use a
returned number to indicate the process&rsquo;s exit code.</p>
</aside>
<p>We&rsquo;ve specified that it&rsquo;s a compile error to have a <code>return</code> statement outside
of any function, which we implement like so:</p>
<div class="codehilite"><pre class="insert-before">static void returnStatement() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>returnStatement</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">type</span> == <span class="a">TYPE_SCRIPT</span>) {
    <span class="i">error</span>(<span class="s">&quot;Can&#39;t return from top-level code.&quot;</span>);
  }

</pre><pre class="insert-after">  if (match(TOKEN_SEMICOLON)) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>returnStatement</em>()</div>

<p>This is one of the reasons we added that FunctionType enum to the compiler.</p>
<h2><a href="#native-functions" id="native-functions"><small>24&#8202;.&#8202;7</small>Native Functions</a></h2>
<p>Our VM is getting more powerful. We&rsquo;ve got functions, calls, parameters,
returns. You can define lots of different functions that can call each other in
interesting ways. But, ultimately, they can&rsquo;t really <em>do</em> anything. The only
user-visible thing a Lox program can do, regardless of its complexity, is print.
To add more capabilities, we need to expose them to the user.</p>
<p>A programming language implementation reaches out and touches the material world
through <strong>native functions</strong>. If you want to be able to write programs that
check the time, read user input, or access the file system, we need to add
native functions<span class="em">&mdash;</span>callable from Lox but implemented in C<span class="em">&mdash;</span>that expose those
capabilities.</p>
<p>At the language level, Lox is fairly complete<span class="em">&mdash;</span>it&rsquo;s got closures, classes,
inheritance, and other fun stuff. One reason it feels like a toy language is
because it has almost no native capabilities. We could turn it into a real
language by adding a long list of them.</p>
<p>However, grinding through a pile of OS operations isn&rsquo;t actually very
educational. Once you&rsquo;ve seen how to bind one piece of C code to Lox, you get
the idea. But you do need to see <em>one</em>, and even a single native function
requires us to build out all the machinery for interfacing Lox with C. So we&rsquo;ll
go through that and do all the hard work. Then, when that&rsquo;s done, we&rsquo;ll add one
tiny native function just to prove that it works.</p>
<p>The reason we need new machinery is because, from the implementation&rsquo;s
perspective, native functions are different from Lox functions. When they are
called, they don&rsquo;t push a CallFrame, because there&rsquo;s no bytecode code for that
frame to point to. They have no bytecode chunk. Instead, they somehow reference
a piece of native C code.</p>
<p>We handle this in clox by defining native functions as an entirely different
object type.</p>
<div class="codehilite"><pre class="insert-before">} ObjFunction;
</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjFunction</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="t">Value</span> (*<span class="t">NativeFn</span>)(<span class="t">int</span> <span class="i">argCount</span>, <span class="t">Value</span>* <span class="i">args</span>);

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">NativeFn</span> <span class="i">function</span>;
} <span class="t">ObjNative</span>;
</pre><pre class="insert-after">

struct ObjString {
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjFunction</em></div>

<p>The representation is simpler than ObjFunction<span class="em">&mdash;</span>merely an Obj header and a
pointer to the C function that implements the native behavior. The native
function takes the argument count and a pointer to the first argument on the
stack. It accesses the arguments through that pointer. Once it&rsquo;s done, it
returns the result value.</p>
<p>As always, a new object type carries some accoutrements with it. To create an
ObjNative, we declare a constructor-like function.</p>
<div class="codehilite"><pre class="insert-before">ObjFunction* newFunction();
</pre><div class="source-file"><em>object.h</em><br>
add after <em>newFunction</em>()</div>
<pre class="insert"><span class="t">ObjNative</span>* <span class="i">newNative</span>(<span class="t">NativeFn</span> <span class="i">function</span>);
</pre><pre class="insert-after">ObjString* takeString(char* chars, int length);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after <em>newFunction</em>()</div>

<p>We implement that like so:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>newFunction</em>()</div>
<pre><span class="t">ObjNative</span>* <span class="i">newNative</span>(<span class="t">NativeFn</span> <span class="i">function</span>) {
  <span class="t">ObjNative</span>* <span class="i">native</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjNative</span>, <span class="a">OBJ_NATIVE</span>);
  <span class="i">native</span>-&gt;<span class="i">function</span> = <span class="i">function</span>;
  <span class="k">return</span> <span class="i">native</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>newFunction</em>()</div>

<p>The constructor takes a C function pointer to wrap in an ObjNative. It sets up
the object header and stores the function. For the header, we need a new object
type.</p>
<div class="codehilite"><pre class="insert-before">typedef enum {
  OBJ_FUNCTION,
</pre><div class="source-file"><em>object.h</em><br>
in enum <em>ObjType</em></div>
<pre class="insert">  <span class="a">OBJ_NATIVE</span>,
</pre><pre class="insert-after">  OBJ_STRING,
} ObjType;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in enum <em>ObjType</em></div>

<p>The VM also needs to know how to deallocate a native function object.</p>
<div class="codehilite"><pre class="insert-before">    }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_NATIVE</span>:
      <span class="a">FREE</span>(<span class="t">ObjNative</span>, <span class="i">object</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_STRING: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>There isn&rsquo;t much here since ObjNative doesn&rsquo;t own any extra memory. The other
capability all Lox objects support is being printed.</p>
<div class="codehilite"><pre class="insert-before">      break;
</pre><div class="source-file"><em>object.c</em><br>
in <em>printObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_NATIVE</span>:
      <span class="i">printf</span>(<span class="s">&quot;&lt;native fn&gt;&quot;</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_STRING:
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printObject</em>()</div>

<p>In order to support dynamic typing, we have a macro to see if a value is a
native function.</p>
<div class="codehilite"><pre class="insert-before">#define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define IS_NATIVE(value)       isObjType(value, OBJ_NATIVE)</span>
</pre><pre class="insert-after">#define IS_STRING(value)       isObjType(value, OBJ_STRING)
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>Assuming that returns true, this macro extracts the C function pointer from a
Value representing a native function:</p>
<div class="codehilite"><pre class="insert-before">#define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define AS_NATIVE(value) \</span>
<span class="a">    (((ObjNative*)AS_OBJ(value))-&gt;function)</span>
</pre><pre class="insert-after">#define AS_STRING(value)       ((ObjString*)AS_OBJ(value))
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>All of this baggage lets the VM treat native functions like any other object.
You can store them in variables, pass them around, throw them birthday parties,
etc. Of course, the operation we actually care about is <em>calling</em> them<span class="em">&mdash;</span>using
one as the left-hand operand in a call expression.</p>
<p>Over in <code>callValue()</code> we add another type case.</p>
<div class="codehilite"><pre class="insert-before">      case OBJ_FUNCTION:<span name="switch"> </span>
        return call(AS_FUNCTION(callee), argCount);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>callValue</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OBJ_NATIVE</span>: {
        <span class="t">NativeFn</span> <span class="i">native</span> = <span class="a">AS_NATIVE</span>(<span class="i">callee</span>);
        <span class="t">Value</span> <span class="i">result</span> = <span class="i">native</span>(<span class="i">argCount</span>, <span class="i">vm</span>.<span class="i">stackTop</span> - <span class="i">argCount</span>);
        <span class="i">vm</span>.<span class="i">stackTop</span> -= <span class="i">argCount</span> + <span class="n">1</span>;
        <span class="i">push</span>(<span class="i">result</span>);
        <span class="k">return</span> <span class="k">true</span>;
      }
</pre><pre class="insert-after">      default:
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>callValue</em>()</div>

<p>If the object being called is a native function, we invoke the C function right
then and there. There&rsquo;s no need to muck with CallFrames or anything. We just
hand off to C, get the result, and stuff it back in the stack. This makes native
functions as fast as we can get.</p>
<p>With this, users should be able to call native functions, but there aren&rsquo;t any
to call. Without something like a foreign function interface, users can&rsquo;t define
their own native functions. That&rsquo;s our job as VM implementers. We&rsquo;ll start with
a helper to define a new native function exposed to Lox programs.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>runtimeError</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">defineNative</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>, <span class="t">NativeFn</span> <span class="i">function</span>) {
  <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">copyString</span>(<span class="i">name</span>, (<span class="t">int</span>)<span class="i">strlen</span>(<span class="i">name</span>))));
  <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">newNative</span>(<span class="i">function</span>)));
  <span class="i">tableSet</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>, <span class="a">AS_STRING</span>(<span class="i">vm</span>.<span class="i">stack</span>[<span class="n">0</span>]), <span class="i">vm</span>.<span class="i">stack</span>[<span class="n">1</span>]);
  <span class="i">pop</span>();
  <span class="i">pop</span>();
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>runtimeError</em>()</div>

<p>It takes a pointer to a C function and the name it will be known as in Lox.
We wrap the function in an ObjNative and then store that in a global variable
with the given name.</p>
<p>You&rsquo;re probably wondering why we push and pop the name and function on the
stack. That looks weird, right? This is the kind of stuff you have to worry
about when <span name="worry">garbage</span> collection gets involved. Both
<code>copyString()</code> and <code>newNative()</code> dynamically allocate memory. That means once we
have a GC, they can potentially trigger a collection. If that happens, we need
to ensure the collector knows we&rsquo;re not done with the name and ObjFunction so
that it doesn&rsquo;t free them out from under us. Storing them on the value stack
accomplishes that.</p>
<aside name="worry">
<p>Don&rsquo;t worry if you didn&rsquo;t follow all that. It will make a lot more sense once we
get around to <a href="garbage-collection.html">implementing the GC</a>.</p>
</aside>
<p>It feels silly, but after all of that work, we&rsquo;re going to add only one
little native function.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after variable <em>vm</em></div>
<pre><span class="k">static</span> <span class="t">Value</span> <span class="i">clockNative</span>(<span class="t">int</span> <span class="i">argCount</span>, <span class="t">Value</span>* <span class="i">args</span>) {
  <span class="k">return</span> <span class="a">NUMBER_VAL</span>((<span class="t">double</span>)<span class="i">clock</span>() / <span class="a">CLOCKS_PER_SEC</span>);
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after variable <em>vm</em></div>

<p>This returns the elapsed time since the program started running, in seconds. It&rsquo;s
handy for benchmarking Lox programs. In Lox, we&rsquo;ll name it <code>clock()</code>.</p>
<div class="codehilite"><pre class="insert-before">  initTable(&amp;vm.strings);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">

  <span class="i">defineNative</span>(<span class="s">&quot;clock&quot;</span>, <span class="i">clockNative</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>To get to the C standard library <code>clock()</code> function, the &ldquo;vm&rdquo; module needs an
include.</p>
<div class="codehilite"><pre class="insert-before">#include &lt;string.h&gt;
</pre><div class="source-file"><em>vm.c</em></div>
<pre class="insert"><span class="a">#include &lt;time.h&gt;</span>
</pre><pre class="insert-after">

#include &quot;common.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em></div>

<p>That was a lot of material to work through, but we did it! Type this in and try
it out:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">fib</span>(<span class="i">n</span>) {
  <span class="k">if</span> (<span class="i">n</span> &lt; <span class="n">2</span>) <span class="k">return</span> <span class="i">n</span>;
  <span class="k">return</span> <span class="i">fib</span>(<span class="i">n</span> - <span class="n">2</span>) + <span class="i">fib</span>(<span class="i">n</span> - <span class="n">1</span>);
}

<span class="k">var</span> <span class="i">start</span> = <span class="i">clock</span>();
<span class="k">print</span> <span class="i">fib</span>(<span class="n">35</span>);
<span class="k">print</span> <span class="i">clock</span>() - <span class="i">start</span>;
</pre></div>
<p>We can write a really inefficient recursive Fibonacci function. Even better, we
can measure just <span name="faster"><em>how</em></span> inefficient it is. This is, of
course, not the smartest way to calculate a Fibonacci number. But it is a good
way to stress test a language implementation&rsquo;s support for function calls. On my
machine, running this in clox is about five times faster than in jlox. That&rsquo;s
quite an improvement.</p>
<aside name="faster">
<p>It&rsquo;s a little slower than a comparable Ruby program run in Ruby 2.4.3p205, and
about 3x faster than one run in Python 3.7.3. And we still have a lot of simple
optimizations we can do in our VM.</p>
</aside>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Reading and writing the <code>ip</code> field is one of the most frequent operations
inside the bytecode loop. Right now, we access it through a pointer to the
current CallFrame. That requires a pointer indirection which may force the
CPU to bypass the cache and hit main memory. That can be a real performance
sink.</p>
<p>Ideally, we&rsquo;d keep the <code>ip</code> in a native CPU register. C doesn&rsquo;t let us
<em>require</em> that without dropping into inline assembly, but we can structure
the code to encourage the compiler to make that optimization. If we store
the <code>ip</code> directly in a C local variable and mark it <code>register</code>, there&rsquo;s a
good chance the C compiler will accede to our polite request.</p>
<p>This does mean we need to be careful to load and store the local <code>ip</code> back
into the correct CallFrame when starting and ending function calls.
Implement this optimization. Write a couple of benchmarks and see how it
affects the performance. Do you think the extra code complexity is worth it?</p>
</li>
<li>
<p>Native function calls are fast in part because we don&rsquo;t validate that the
call passes as many arguments as the function expects. We really should, or
an incorrect call to a native function without enough arguments could cause
the function to read uninitialized memory. Add arity checking.</p>
</li>
<li>
<p>Right now, there&rsquo;s no way for a native function to signal a runtime error.
In a real implementation, this is something we&rsquo;d need to support because
native functions live in the statically typed world of C but are called
from dynamically typed Lox land. If a user, say, tries to pass a string to
<code>sqrt()</code>, that native function needs to report a runtime error.</p>
<p>Extend the native function system to support that. How does this capability
affect the performance of native calls?</p>
</li>
<li>
<p>Add some more native functions to do things you find useful. Write some
programs using those. What did you add? How do they affect the feel of the
language and how practical it is?</p>
</li>
</ol>
</div>

<footer>
<a href="closures.html" class="next">
  Next Chapter: &ldquo;Closures&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/chunks-of-bytecode.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Chunks of Bytecode &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Chunks of Bytecode<small>14</small></a></h3>

<ul>
    <li><a href="#bytecode"><small>14.1</small> Bytecode?</a></li>
    <li><a href="#getting-started"><small>14.2</small> Getting Started</a></li>
    <li><a href="#chunks-of-instructions"><small>14.3</small> Chunks of Instructions</a></li>
    <li><a href="#disassembling-chunks"><small>14.4</small> Disassembling Chunks</a></li>
    <li><a href="#constants"><small>14.5</small> Constants</a></li>
    <li><a href="#line-information"><small>14.6</small> Line Information</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Test Your Language</a></li>
</ul>


<div class="prev-next">
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="a-virtual-machine.html" title="A Virtual Machine" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine" class="prev">←</a>
<a href="a-virtual-machine.html" title="A Virtual Machine" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Chunks of Bytecode<small>14</small></a></h3>

<ul>
    <li><a href="#bytecode"><small>14.1</small> Bytecode?</a></li>
    <li><a href="#getting-started"><small>14.2</small> Getting Started</a></li>
    <li><a href="#chunks-of-instructions"><small>14.3</small> Chunks of Instructions</a></li>
    <li><a href="#disassembling-chunks"><small>14.4</small> Disassembling Chunks</a></li>
    <li><a href="#constants"><small>14.5</small> Constants</a></li>
    <li><a href="#line-information"><small>14.6</small> Line Information</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Test Your Language</a></li>
</ul>


<div class="prev-next">
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="a-virtual-machine.html" title="A Virtual Machine" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">14</div>
  <h1>Chunks of Bytecode</h1>

<blockquote>
<p>If you find that you&rsquo;re spending almost all your time on theory, start turning
some attention to practical things; it will improve your theories. If you find
that you&rsquo;re spending almost all your time on practice, start turning some
attention to theoretical things; it will improve your practice.</p>
<p><cite>Donald Knuth</cite></p>
</blockquote>
<p>We already have ourselves a complete implementation of Lox with jlox, so why
isn&rsquo;t the book over yet? Part of this is because jlox relies on the <span
name="metal">JVM</span> to do lots of things for us. If we want to understand
how an interpreter works all the way down to the metal, we need to build those
bits and pieces ourselves.</p>
<aside name="metal">
<p>Of course, our second interpreter relies on the C standard library for basics
like memory allocation, and the C compiler frees us from details of the
underlying machine code we&rsquo;re running it on. Heck, that machine code is probably
implemented in terms of microcode on the chip. And the C runtime relies on the
operating system to hand out pages of memory. But we have to stop <em>somewhere</em> if
this book is going to fit on your bookshelf.</p>
</aside>
<p>An even more fundamental reason that jlox isn&rsquo;t sufficient is that it&rsquo;s too damn
slow. A tree-walk interpreter is fine for some kinds of high-level, declarative
languages. But for a general-purpose, imperative language<span class="em">&mdash;</span>even a &ldquo;scripting&rdquo;
language like Lox<span class="em">&mdash;</span>it won&rsquo;t fly. Take this little script:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">fib</span>(<span class="i">n</span>) {
  <span class="k">if</span> (<span class="i">n</span> &lt; <span class="n">2</span>) <span class="k">return</span> <span class="i">n</span>;
  <span class="k">return</span> <span class="i">fib</span>(<span class="i">n</span> - <span class="n">1</span>) + <span class="i">fib</span>(<span class="i">n</span> - <span class="n">2</span>);<span name="fib"> </span>
}

<span class="k">var</span> <span class="i">before</span> = <span class="i">clock</span>();
<span class="k">print</span> <span class="i">fib</span>(<span class="n">40</span>);
<span class="k">var</span> <span class="i">after</span> = <span class="i">clock</span>();
<span class="k">print</span> <span class="i">after</span> - <span class="i">before</span>;
</pre></div>
<aside name="fib">
<p>This is a comically inefficient way to actually calculate Fibonacci numbers.
Our goal is to see how fast the <em>interpreter</em> runs, not to see how fast of a
program we can write. A slow program that does a lot of work<span class="em">&mdash;</span>pointless or not<span class="em">&mdash;</span>is a good test case for that.</p>
</aside>
<p>On my laptop, that takes jlox about 72 seconds to execute. An equivalent C
program finishes in half a second. Our dynamically typed scripting language is
never going to be as fast as a statically typed language with manual memory
management, but we don&rsquo;t need to settle for more than <em>two orders of magnitude</em>
slower.</p>
<p>We could take jlox and run it in a profiler and start tuning and tweaking
hotspots, but that will only get us so far. The execution model<span class="em">&mdash;</span>walking the
AST<span class="em">&mdash;</span>is fundamentally the wrong design. We can&rsquo;t micro-optimize that to the
performance we want any more than you can polish an AMC Gremlin into an SR-71
Blackbird.</p>
<p>We need to rethink the core model. This chapter introduces that model, bytecode,
and begins our new interpreter, clox.</p>
<h2><a href="#bytecode" id="bytecode"><small>14&#8202;.&#8202;1</small>Bytecode?</a></h2>
<p>In engineering, few choices are without trade-offs. To best understand why we&rsquo;re
going with bytecode, let&rsquo;s stack it up against a couple of alternatives.</p>
<h3><a href="#why-not-walk-the-ast" id="why-not-walk-the-ast"><small>14&#8202;.&#8202;1&#8202;.&#8202;1</small>Why not walk the AST?</a></h3>
<p>Our existing interpreter has a couple of things going for it:</p>
<ul>
<li>
<p>Well, first, we already wrote it. It&rsquo;s done. And the main reason it&rsquo;s done
is because this style of interpreter is <em>really simple to implement</em>. The
runtime representation of the code directly maps to the syntax. It&rsquo;s
virtually effortless to get from the parser to the data structures we need
at runtime.</p>
</li>
<li>
<p>It&rsquo;s <em>portable</em>. Our current interpreter is written in Java and runs on any
platform Java supports. We could write a new implementation in C using the
same approach and compile and run our language on basically every platform
under the sun.</p>
</li>
</ul>
<p>Those are real advantages. But, on the other hand, it&rsquo;s <em>not memory-efficient</em>.
Each piece of syntax becomes an AST node. A tiny Lox expression like <code>1 + 2</code>
turns into a slew of objects with lots of pointers between them, something like:</p>
<p><span name="header"></span></p>
<aside name="header">
<p>The &ldquo;(header)&rdquo; parts are the bookkeeping information the Java virtual machine
uses to support memory management and store the object&rsquo;s type. Those take up
space too!</p>
</aside><img src="image/chunks-of-bytecode/ast.png" alt="The tree of Java objects created to represent '1 + 2'." />
<p>Each of those pointers adds an extra 32 or 64 bits of overhead to the object.
Worse, sprinkling our data across the heap in a loosely connected web of objects
does bad things for <span name="locality"><em>spatial locality</em></span>.</p>
<aside name="locality">
<p>I wrote <a href="http://gameprogrammingpatterns.com/data-locality.html">an entire chapter</a> about this exact problem in my first
book, <em>Game Programming Patterns</em>, if you want to really dig in.</p>
</aside>
<p>Modern CPUs process data way faster than they can pull it from RAM. To
compensate for that, chips have multiple layers of caching. If a piece of memory
it needs is already in the cache, it can be loaded more quickly. We&rsquo;re talking
upwards of 100 <em>times</em> faster.</p>
<p>How does data get into that cache? The machine speculatively stuffs things in
there for you. Its heuristic is pretty simple. Whenever the CPU reads a bit of
data from RAM, it pulls in a whole little bundle of adjacent bytes and stuffs
them in the cache.</p>
<p>If our program next requests some data close enough to be inside that cache
line, our CPU runs like a well-oiled conveyor belt in a factory. We <em>really</em>
want to take advantage of this. To use the cache effectively, the way we
represent code in memory should be dense and ordered like it&rsquo;s read.</p>
<p>Now look up at that tree. Those sub-objects could be <span
name="anywhere"><em>anywhere</em></span>. Every step the tree-walker takes where it
follows a reference to a child node may step outside the bounds of the cache and
force the CPU to stall until a new lump of data can be slurped in from RAM. Just
the <em>overhead</em> of those tree nodes with all of their pointer fields and object
headers tends to push objects away from each other and out of the cache.</p>
<aside name="anywhere">
<p>Even if the objects happened to be allocated in sequential memory when the
parser first produced them, after a couple of rounds of garbage collection<span class="em">&mdash;</span>which may move objects around in memory<span class="em">&mdash;</span>there&rsquo;s no telling where they&rsquo;ll be.</p>
</aside>
<p>Our AST walker has other overhead too around interface dispatch and the Visitor
pattern, but the locality issues alone are enough to justify a better code
representation.</p>
<h3><a href="#why-not-compile-to-native-code" id="why-not-compile-to-native-code"><small>14&#8202;.&#8202;1&#8202;.&#8202;2</small>Why not compile to native code?</a></h3>
<p>If you want to go <em>real</em> fast, you want to get all of those layers of
indirection out of the way. Right down to the metal. Machine code. It even
<em>sounds</em> fast. <em>Machine code.</em></p>
<p>Compiling directly to the native instruction set the chip supports is what the
fastest languages do. Targeting native code has been the most efficient option
since way back in the early days when engineers actually <span
name="hand">handwrote</span> programs in machine code.</p>
<aside name="hand">
<p>Yes, they actually wrote machine code by hand. On punched cards. Which,
presumably, they punched <em>with their fists</em>.</p>
</aside>
<p>If you&rsquo;ve never written any machine code, or its slightly more human-palatable
cousin assembly code before, I&rsquo;ll give you the gentlest of introductions. Native
code is a dense series of operations, encoded directly in binary. Each
instruction is between one and a few bytes long, and is almost mind-numbingly
low level. &ldquo;Move a value from this address to this register.&rdquo; &ldquo;Add the integers
in these two registers.&rdquo; Stuff like that.</p>
<p>The CPU cranks through the instructions, decoding and executing each one in
order. There is no tree structure like our AST, and control flow is handled by
jumping from one point in the code directly to another. No indirection, no
overhead, no unnecessary skipping around or chasing pointers.</p>
<p>Lightning fast, but that performance comes at a cost. First of all, compiling to
native code ain&rsquo;t easy. Most chips in wide use today have sprawling Byzantine
architectures with heaps of instructions that accreted over decades. They
require sophisticated register allocation, pipelining, and instruction
scheduling.</p>
<p>And, of course, you&rsquo;ve thrown <span name="back">portability</span> out. Spend a
few years mastering some architecture and that still only gets you onto <em>one</em> of
the several popular instruction sets out there. To get your language on all of
them, you need to learn all of their instruction sets and write a separate back
end for each one.</p>
<aside name="back">
<p>The situation isn&rsquo;t entirely dire. A well-architected compiler lets you
share the front end and most of the middle layer optimization passes across the
different architectures you support. It&rsquo;s mainly the code generation and some of
the details around instruction selection that you&rsquo;ll need to write afresh each
time.</p>
<p>The <a href="https://llvm.org/">LLVM</a> project gives you some of this out of the box. If your compiler
outputs LLVM&rsquo;s own special intermediate language, LLVM in turn compiles that to
native code for a plethora of architectures.</p>
</aside>
<h3><a href="#what-is-bytecode" id="what-is-bytecode"><small>14&#8202;.&#8202;1&#8202;.&#8202;3</small>What is bytecode?</a></h3>
<p>Fix those two points in your mind. On one end, a tree-walk interpreter is
simple, portable, and slow. On the other, native code is complex and
platform-specific but fast. Bytecode sits in the middle. It retains the
portability of a tree-walker<span class="em">&mdash;</span>we won&rsquo;t be getting our hands dirty with
assembly code in this book. It sacrifices <em>some</em> simplicity to get a performance
boost in return, though not as fast as going fully native.</p>
<p>Structurally, bytecode resembles machine code. It&rsquo;s a dense, linear sequence of
binary instructions. That keeps overhead low and plays nice with the cache.
However, it&rsquo;s a much simpler, higher-level instruction set than any real chip
out there. (In many bytecode formats, each instruction is only a single byte
long, hence &ldquo;bytecode&rdquo;.)</p>
<p>Imagine you&rsquo;re writing a native compiler from some source language and you&rsquo;re
given carte blanche to define the easiest possible architecture to target.
Bytecode is kind of like that. It&rsquo;s an idealized fantasy instruction set that
makes your life as the compiler writer easier.</p>
<p>The problem with a fantasy architecture, of course, is that it doesn&rsquo;t exist. We
solve that by writing an <em>emulator</em><span class="em">&mdash;</span>a simulated chip written in software that
interprets the bytecode one instruction at a time. A <em>virtual machine (VM)</em>, if
you will.</p>
<p>That emulation layer adds <span name="p-code">overhead</span>, which is a key
reason bytecode is slower than native code. But in return, it gives us
portability. Write our VM in a language like C that is already supported on all
the machines we care about, and we can run our emulator on top of any hardware
we like.</p>
<aside name="p-code">
<p>One of the first bytecode formats was <a href="https://en.wikipedia.org/wiki/P-code_machine">p-code</a>, developed for Niklaus Wirth&rsquo;s
Pascal language. You might think a PDP-11 running at 15MHz couldn&rsquo;t afford the
overhead of emulating a virtual machine. But back then, computers were in their
Cambrian explosion and new architectures appeared every day. Keeping up with the
latest chips was worth more than squeezing the maximum performance from each
one. That&rsquo;s why the &ldquo;p&rdquo; in p-code doesn&rsquo;t stand for &ldquo;Pascal&rdquo;, but &ldquo;portable&rdquo;.</p>
</aside>
<p>This is the path we&rsquo;ll take with our new interpreter, clox. We&rsquo;ll follow in the
footsteps of the main implementations of Python, Ruby, Lua, OCaml, Erlang, and
others. In many ways, our VM&rsquo;s design will parallel the structure of our
previous interpreter:</p>
<p><img src="image/chunks-of-bytecode/phases.png" alt="Phases of the two
implementations. jlox is Parser to Syntax Trees to Interpreter. clox is Compiler
to Bytecode to Virtual Machine." /></p>
<p>Of course, we won&rsquo;t implement the phases strictly in order. Like our previous
interpreter, we&rsquo;ll bounce around, building up the implementation one language
feature at a time. In this chapter, we&rsquo;ll get the skeleton of the application in
place and create the data structures needed to store and represent a chunk of
bytecode.</p>
<h2><a href="#getting-started" id="getting-started"><small>14&#8202;.&#8202;2</small>Getting Started</a></h2>
<p>Where else to begin, but at <code>main()</code>? <span name="ready">Fire</span> up your
trusty text editor and start typing.</p>
<aside name="ready">
<p>Now is a good time to stretch, maybe crack your knuckles. A little montage music
wouldn&rsquo;t hurt either.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>main.c</em><br>
create new file</div>
<pre><span class="a">#include &quot;common.h&quot;</span>

<span class="t">int</span> <span class="i">main</span>(<span class="t">int</span> <span class="i">argc</span>, <span class="k">const</span> <span class="t">char</span>* <span class="i">argv</span>[]) {
  <span class="k">return</span> <span class="n">0</span>;
}
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, create new file</div>

<p>From this tiny seed, we will grow our entire VM. Since C provides us with so
little, we first need to spend some time amending the soil. Some of that goes
into this header:</p>
<div class="codehilite"><div class="source-file"><em>common.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_common_h</span>
<span class="a">#define clox_common_h</span>

<span class="a">#include &lt;stdbool.h&gt;</span>
<span class="a">#include &lt;stddef.h&gt;</span>
<span class="a">#include &lt;stdint.h&gt;</span>

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>common.h</em>, create new file</div>

<p>There are a handful of types and constants we&rsquo;ll use throughout the interpreter,
and this is a convenient place to put them. For now, it&rsquo;s the venerable <code>NULL</code>,
<code>size_t</code>, the nice C99 Boolean <code>bool</code>, and explicit-sized integer types<span class="em">&mdash;</span><code>uint8_t</code> and friends.</p>
<h2><a href="#chunks-of-instructions" id="chunks-of-instructions"><small>14&#8202;.&#8202;3</small>Chunks of Instructions</a></h2>
<p>Next, we need a module to define our code representation. I&rsquo;ve been using
&ldquo;chunk&rdquo; to refer to sequences of bytecode, so let&rsquo;s make that the official name
for that module.</p>
<div class="codehilite"><div class="source-file"><em>chunk.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_chunk_h</span>
<span class="a">#define clox_chunk_h</span>

<span class="a">#include &quot;common.h&quot;</span>

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, create new file</div>

<p>In our bytecode format, each instruction has a one-byte <strong>operation code</strong>
(universally shortened to <strong>opcode</strong>). That number controls what kind of
instruction we&rsquo;re dealing with<span class="em">&mdash;</span>add, subtract, look up variable, etc. We
define those here:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;
</pre><div class="source-file"><em>chunk.h</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">enum</span> {
  <span class="a">OP_RETURN</span>,
} <span class="t">OpCode</span>;
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em></div>

<p>For now, we start with a single instruction, <code>OP_RETURN</code>. When we have a
full-featured VM, this instruction will mean &ldquo;return from the current function&rdquo;.
I admit this isn&rsquo;t exactly useful yet, but we have to start somewhere, and this
is a particularly simple instruction, for reasons we&rsquo;ll get to later.</p>
<h3><a href="#a-dynamic-array-of-instructions" id="a-dynamic-array-of-instructions"><small>14&#8202;.&#8202;3&#8202;.&#8202;1</small>A dynamic array of instructions</a></h3>
<p>Bytecode is a series of instructions. Eventually, we&rsquo;ll store some other data
along with the instructions, so let&rsquo;s go ahead and create a struct to hold it
all.</p>
<div class="codehilite"><pre class="insert-before">} OpCode;
</pre><div class="source-file"><em>chunk.h</em><br>
add after enum <em>OpCode</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">uint8_t</span>* <span class="i">code</span>;
} <span class="t">Chunk</span>;
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, add after enum <em>OpCode</em></div>

<p>At the moment, this is simply a wrapper around an array of bytes. Since we don&rsquo;t
know how big the array needs to be before we start compiling a chunk, it must be
dynamic. Dynamic arrays are one of my favorite data structures. That sounds like
claiming vanilla is my favorite ice cream <span name="flavor">flavor</span>, but
hear me out. Dynamic arrays provide:</p>
<aside name="flavor">
<p>Butter pecan is actually my favorite.</p>
</aside>
<ul>
<li>
<p>Cache-friendly, dense storage</p>
</li>
<li>
<p>Constant-time indexed element lookup</p>
</li>
<li>
<p>Constant-time appending to the end of the array</p>
</li>
</ul>
<p>Those features are exactly why we used dynamic arrays all the time in jlox under
the guise of Java&rsquo;s ArrayList class. Now that we&rsquo;re in C, we get to roll our
own. If you&rsquo;re rusty on dynamic arrays, the idea is pretty simple. In addition
to the array itself, we keep two numbers: the number of elements in the array we
have allocated (&ldquo;capacity&rdquo;) and how many of those allocated entries are actually
in use (&ldquo;count&rdquo;).</p>
<div class="codehilite"><pre class="insert-before">typedef struct {
</pre><div class="source-file"><em>chunk.h</em><br>
in struct <em>Chunk</em></div>
<pre class="insert">  <span class="t">int</span> <span class="i">count</span>;
  <span class="t">int</span> <span class="i">capacity</span>;
</pre><pre class="insert-after">  uint8_t* code;
} Chunk;
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in struct <em>Chunk</em></div>

<p>When we add an element, if the count is less than the capacity, then there is
already available space in the array. We store the new element right in there
and bump the count.</p>
<p><img src="image/chunks-of-bytecode/insert.png" alt="Storing an element in an
array that has enough capacity." /></p>
<p>If we have no spare capacity, then the process is a little more involved.</p>
<p><img src="image/chunks-of-bytecode/grow.png" alt="Growing the dynamic array
before storing an element." class="wide" /></p>
<ol>
<li><span name="amortized">Allocate</span> a new array with more capacity.</li>
<li>Copy the existing elements from the old array to the new one.</li>
<li>Store the new <code>capacity</code>.</li>
<li>Delete the old array.</li>
<li>Update <code>code</code> to point to the new array.</li>
<li>Store the element in the new array now that there is room.</li>
<li>Update the <code>count</code>.</li>
</ol>
<aside name="amortized">
<p>Copying the existing elements when you grow the array makes it seem like
appending an element is <em>O(n)</em>, not <em>O(1)</em> like I said above. However, you need
to do this copy step only on <em>some</em> of the appends. Most of the time, there is
already extra capacity, so you don&rsquo;t need to copy.</p>
<p>To understand how this works, we need <a href="https://en.wikipedia.org/wiki/Amortized_analysis"><strong>amortized
analysis</strong></a>. That shows us
that as long as we grow the array by a multiple of its current size, when we
average out the cost of a <em>sequence</em> of appends, each append is <em>O(1)</em>.</p>
</aside>
<p>We have our struct ready, so let&rsquo;s implement the functions to work with it. C
doesn&rsquo;t have constructors, so we declare a function to initialize a new chunk.</p>
<div class="codehilite"><pre class="insert-before">} Chunk;
</pre><div class="source-file"><em>chunk.h</em><br>
add after struct <em>Chunk</em></div>
<pre class="insert">

<span class="t">void</span> <span class="i">initChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, add after struct <em>Chunk</em></div>

<p>And implement it thusly:</p>
<div class="codehilite"><div class="source-file"><em>chunk.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdlib.h&gt;</span>

<span class="a">#include &quot;chunk.h&quot;</span>

<span class="t">void</span> <span class="i">initChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>) {
  <span class="i">chunk</span>-&gt;<span class="i">count</span> = <span class="n">0</span>;
  <span class="i">chunk</span>-&gt;<span class="i">capacity</span> = <span class="n">0</span>;
  <span class="i">chunk</span>-&gt;<span class="i">code</span> = <span class="a">NULL</span>;
}
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, create new file</div>

<p>The dynamic array starts off completely empty. We don&rsquo;t even allocate a raw
array yet. To append a byte to the end of the chunk, we use a new function.</p>
<div class="codehilite"><pre class="insert-before">void initChunk(Chunk* chunk);
</pre><div class="source-file"><em>chunk.h</em><br>
add after <em>initChunk</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">writeChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">uint8_t</span> <span class="i">byte</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, add after <em>initChunk</em>()</div>

<p>This is where the interesting work happens.</p>
<div class="codehilite"><div class="source-file"><em>chunk.c</em><br>
add after <em>initChunk</em>()</div>
<pre><span class="t">void</span> <span class="i">writeChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">uint8_t</span> <span class="i">byte</span>) {
  <span class="k">if</span> (<span class="i">chunk</span>-&gt;<span class="i">capacity</span> &lt; <span class="i">chunk</span>-&gt;<span class="i">count</span> + <span class="n">1</span>) {
    <span class="t">int</span> <span class="i">oldCapacity</span> = <span class="i">chunk</span>-&gt;<span class="i">capacity</span>;
    <span class="i">chunk</span>-&gt;<span class="i">capacity</span> = <span class="a">GROW_CAPACITY</span>(<span class="i">oldCapacity</span>);
    <span class="i">chunk</span>-&gt;<span class="i">code</span> = <span class="a">GROW_ARRAY</span>(<span class="t">uint8_t</span>, <span class="i">chunk</span>-&gt;<span class="i">code</span>,
        <span class="i">oldCapacity</span>, <span class="i">chunk</span>-&gt;<span class="i">capacity</span>);
  }

  <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">chunk</span>-&gt;<span class="i">count</span>] = <span class="i">byte</span>;
  <span class="i">chunk</span>-&gt;<span class="i">count</span>++;
}
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, add after <em>initChunk</em>()</div>

<p>The first thing we need to do is see if the current array already has capacity
for the new byte. If it doesn&rsquo;t, then we first need to grow the array to make
room. (We also hit this case on the very first write when the array is <code>NULL</code>
and <code>capacity</code> is 0.)</p>
<p>To grow the array, first we figure out the new capacity and grow the array to
that size. Both of those lower-level memory operations are defined in a new
module.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;chunk.h&quot;
</pre><div class="source-file"><em>chunk.c</em></div>
<pre class="insert"><span class="a">#include &quot;memory.h&quot;</span>
</pre><pre class="insert-after">

void initChunk(Chunk* chunk) {
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em></div>

<p>This is enough to get us started.</p>
<div class="codehilite"><div class="source-file"><em>memory.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_memory_h</span>
<span class="a">#define clox_memory_h</span>

<span class="a">#include &quot;common.h&quot;</span>

<span class="a">#define GROW_CAPACITY(capacity) \</span>
<span class="a">    ((capacity) &lt; 8 ? 8 : (capacity) * 2)</span>

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>memory.h</em>, create new file</div>

<p>This macro calculates a new capacity based on a given current capacity. In order
to get the performance we want, the important part is that it <em>scales</em> based on
the old size. We grow by a factor of two, which is pretty typical. 1.5&times; is
another common choice.</p>
<p>We also handle when the current capacity is zero. In that case, we jump straight
to eight elements instead of starting at one. That <span
name="profile">avoids</span> a little extra memory churn when the array is very
small, at the expense of wasting a few bytes on very small chunks.</p>
<aside name="profile">
<p>I picked the number eight somewhat arbitrarily for the book. Most dynamic array
implementations have a minimum threshold like this. The right way to pick a
value for this is to profile against real-world usage and see which constant
makes the best performance trade-off between extra grows versus wasted space.</p>
</aside>
<p>Once we know the desired capacity, we create or grow the array to that size
using <code>GROW_ARRAY()</code>.</p>
<div class="codehilite"><pre class="insert-before">#define GROW_CAPACITY(capacity) \
    ((capacity) &lt; 8 ? 8 : (capacity) * 2)
</pre><div class="source-file"><em>memory.h</em></div>
<pre class="insert">

<span class="a">#define GROW_ARRAY(type, pointer, oldCount, newCount) \</span>
<span class="a">    (type*)reallocate(pointer, sizeof(type) * (oldCount), \</span>
<span class="a">        sizeof(type) * (newCount))</span>

<span class="t">void</span>* <span class="i">reallocate</span>(<span class="t">void</span>* <span class="i">pointer</span>, <span class="t">size_t</span> <span class="i">oldSize</span>, <span class="t">size_t</span> <span class="i">newSize</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>memory.h</em></div>

<p>This macro pretties up a function call to <code>reallocate()</code> where the real work
happens. The macro itself takes care of getting the size of the array&rsquo;s element
type and casting the resulting <code>void*</code> back to a pointer of the right type.</p>
<p>This <code>reallocate()</code> function is the single function we&rsquo;ll use for all dynamic
memory management in clox<span class="em">&mdash;</span>allocating memory, freeing it, and changing the
size of an existing allocation. Routing all of those operations through a single
function will be important later when we add a garbage collector that needs to
keep track of how much memory is in use.</p>
<p>The two size arguments passed to <code>reallocate()</code> control which operation to
perform:</p><table>
  <thead>
    <tr>
      <td>oldSize</td>
      <td>newSize</td>
      <td>Operation</td>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>0</td>
      <td>Non&#8209;zero</td>
      <td>Allocate new block.</td>
    </tr>
    <tr>
      <td>Non&#8209;zero</td>
      <td>0</td>
      <td>Free allocation.</td>
    </tr>
    <tr>
      <td>Non&#8209;zero</td>
      <td>Smaller&nbsp;than&nbsp;<code>oldSize</code></td>
      <td>Shrink existing allocation.</td>
    </tr>
    <tr>
      <td>Non&#8209;zero</td>
      <td>Larger&nbsp;than&nbsp;<code>oldSize</code></td>
      <td>Grow existing allocation.</td>
    </tr>
  </tbody>
</table>
<p>That sounds like a lot of cases to handle, but here&rsquo;s the implementation:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdlib.h&gt;</span>

<span class="a">#include &quot;memory.h&quot;</span>

<span class="t">void</span>* <span class="i">reallocate</span>(<span class="t">void</span>* <span class="i">pointer</span>, <span class="t">size_t</span> <span class="i">oldSize</span>, <span class="t">size_t</span> <span class="i">newSize</span>) {
  <span class="k">if</span> (<span class="i">newSize</span> == <span class="n">0</span>) {
    <span class="i">free</span>(<span class="i">pointer</span>);
    <span class="k">return</span> <span class="a">NULL</span>;
  }

  <span class="t">void</span>* <span class="i">result</span> = <span class="i">realloc</span>(<span class="i">pointer</span>, <span class="i">newSize</span>);
  <span class="k">return</span> <span class="i">result</span>;
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, create new file</div>

<p>When <code>newSize</code> is zero, we handle the deallocation case ourselves by calling
<code>free()</code>. Otherwise, we rely on the C standard library&rsquo;s <code>realloc()</code> function.
That function conveniently supports the other three aspects of our policy. When
<code>oldSize</code> is zero, <code>realloc()</code> is equivalent to calling <code>malloc()</code>.</p>
<p>The interesting cases are when both <code>oldSize</code> and <code>newSize</code> are not zero. Those
tell <code>realloc()</code> to resize the previously allocated block. If the new size is
smaller than the existing block of memory, it simply <span
name="shrink">updates</span> the size of the block and returns the same pointer
you gave it. If the new size is larger, it attempts to grow the existing block
of memory.</p>
<p>It can do that only if the memory after that block isn&rsquo;t already in use. If
there isn&rsquo;t room to grow the block, <code>realloc()</code> instead allocates a <em>new</em> block
of memory of the desired size, copies over the old bytes, frees the old block,
and then returns a pointer to the new block. Remember, that&rsquo;s exactly the
behavior we want for our dynamic array.</p>
<p>Because computers are finite lumps of matter and not the perfect mathematical
abstractions computer science theory would have us believe, allocation can fail
if there isn&rsquo;t enough memory and <code>realloc()</code> will return <code>NULL</code>. We should
handle that.</p>
<div class="codehilite"><pre class="insert-before">  void* result = realloc(pointer, newSize);
</pre><div class="source-file"><em>memory.c</em><br>
in <em>reallocate</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">result</span> == <span class="a">NULL</span>) <span class="i">exit</span>(<span class="n">1</span>);
</pre><pre class="insert-after">  return result;
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>reallocate</em>()</div>

<p>There&rsquo;s not really anything <em>useful</em> that our VM can do if it can&rsquo;t get the
memory it needs, but we at least detect that and abort the process immediately
instead of returning a <code>NULL</code> pointer and letting it go off the rails later.</p>
<aside name="shrink">
<p>Since all we passed in was a bare pointer to the first byte of memory, what does
it mean to &ldquo;update&rdquo; the block&rsquo;s size? Under the hood, the memory allocator
maintains additional bookkeeping information for each block of heap-allocated
memory, including its size.</p>
<p>Given a pointer to some previously allocated memory, it can find this
bookkeeping information, which is necessary to be able to cleanly free it. It&rsquo;s
this size metadata that <code>realloc()</code> updates.</p>
<p>Many implementations of <code>malloc()</code> store the allocated size in memory right
<em>before</em> the returned address.</p>
</aside>
<p>OK, we can create new chunks and write instructions to them. Are we done? Nope!
We&rsquo;re in C now, remember, we have to manage memory ourselves, like in Ye Olden
Times, and that means <em>freeing</em> it too.</p>
<div class="codehilite"><pre class="insert-before">void initChunk(Chunk* chunk);
</pre><div class="source-file"><em>chunk.h</em><br>
add after <em>initChunk</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">freeChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>);
</pre><pre class="insert-after">void writeChunk(Chunk* chunk, uint8_t byte);
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, add after <em>initChunk</em>()</div>

<p>The implementation is:</p>
<div class="codehilite"><div class="source-file"><em>chunk.c</em><br>
add after <em>initChunk</em>()</div>
<pre><span class="t">void</span> <span class="i">freeChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>) {
  <span class="a">FREE_ARRAY</span>(<span class="t">uint8_t</span>, <span class="i">chunk</span>-&gt;<span class="i">code</span>, <span class="i">chunk</span>-&gt;<span class="i">capacity</span>);
  <span class="i">initChunk</span>(<span class="i">chunk</span>);
}
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, add after <em>initChunk</em>()</div>

<p>We deallocate all of the memory and then call <code>initChunk()</code> to zero out the
fields leaving the chunk in a well-defined empty state. To free the memory, we
add one more macro.</p>
<div class="codehilite"><pre class="insert-before">#define GROW_ARRAY(type, pointer, oldCount, newCount) \
    (type*)reallocate(pointer, sizeof(type) * (oldCount), \
        sizeof(type) * (newCount))
</pre><div class="source-file"><em>memory.h</em></div>
<pre class="insert">

<span class="a">#define FREE_ARRAY(type, pointer, oldCount) \</span>
<span class="a">    reallocate(pointer, sizeof(type) * (oldCount), 0)</span>
</pre><pre class="insert-after">

void* reallocate(void* pointer, size_t oldSize, size_t newSize);
</pre></div>
<div class="source-file-narrow"><em>memory.h</em></div>

<p>Like <code>GROW_ARRAY()</code>, this is a wrapper around a call to <code>reallocate()</code>. This one
frees the memory by passing in zero for the new size. I know, this is a lot of
boring low-level stuff. Don&rsquo;t worry, we&rsquo;ll get a lot of use out of these in
later chapters and will get to program at a higher level. Before we can do that,
though, we gotta lay our own foundation.</p>
<h2><a href="#disassembling-chunks" id="disassembling-chunks"><small>14&#8202;.&#8202;4</small>Disassembling Chunks</a></h2>
<p>Now we have a little module for creating chunks of bytecode. Let&rsquo;s try it out by
hand-building a sample chunk.</p>
<div class="codehilite"><pre class="insert-before">int main(int argc, const char* argv[]) {
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">  <span class="t">Chunk</span> <span class="i">chunk</span>;
  <span class="i">initChunk</span>(&amp;<span class="i">chunk</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_RETURN</span>);
  <span class="i">freeChunk</span>(&amp;<span class="i">chunk</span>);
</pre><pre class="insert-after">  return 0;
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>Don&rsquo;t forget the include.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;
</pre><div class="source-file"><em>main.c</em></div>
<pre class="insert"><span class="a">#include &quot;chunk.h&quot;</span>
</pre><pre class="insert-after">

int main(int argc, const char* argv[]) {
</pre></div>
<div class="source-file-narrow"><em>main.c</em></div>

<p>Run that and give it a try. Did it work? Uh<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>who knows? All we&rsquo;ve done is push
some bytes around in memory. We have no human-friendly way to see what&rsquo;s
actually inside that chunk we made.</p>
<p>To fix this, we&rsquo;re going to create a <strong>disassembler</strong>. An <strong>assembler</strong> is an
old-school program that takes a file containing human-readable mnemonic names
for CPU instructions like &ldquo;ADD&rdquo; and &ldquo;MULT&rdquo; and translates them to their binary
machine code equivalent. A <em>dis</em>assembler goes in the other direction<span class="em">&mdash;</span>given a
blob of machine code, it spits out a textual listing of the instructions.</p>
<p>We&rsquo;ll implement something <span name="printer">similar</span>. Given a chunk, it
will print out all of the instructions in it. A Lox <em>user</em> won&rsquo;t use this, but
we Lox <em>maintainers</em> will certainly benefit since it gives us a window into the
interpreter&rsquo;s internal representation of code.</p>
<aside name="printer">
<p>In jlox, our analogous tool was the <a href="representing-code.html#a-not-very-pretty-printer">AstPrinter class</a>.</p>
</aside>
<p>In <code>main()</code>, after we create the chunk, we pass it to the disassembler.</p>
<div class="codehilite"><pre class="insert-before">  initChunk(&amp;chunk);
  writeChunk(&amp;chunk, OP_RETURN);
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">

  <span class="i">disassembleChunk</span>(&amp;<span class="i">chunk</span>, <span class="s">&quot;test chunk&quot;</span>);
</pre><pre class="insert-after">  freeChunk(&amp;chunk);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>Again, we whip up <span name="module">yet another</span> module.</p>
<aside name="module">
<p>I promise you we won&rsquo;t be creating this many new files in later chapters.</p>
</aside>
<div class="codehilite"><pre class="insert-before">#include &quot;chunk.h&quot;
</pre><div class="source-file"><em>main.c</em></div>
<pre class="insert"><span class="a">#include &quot;debug.h&quot;</span>
</pre><pre class="insert-after">

int main(int argc, const char* argv[]) {
</pre></div>
<div class="source-file-narrow"><em>main.c</em></div>

<p>Here&rsquo;s that header:</p>
<div class="codehilite"><div class="source-file"><em>debug.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_debug_h</span>
<span class="a">#define clox_debug_h</span>

<span class="a">#include &quot;chunk.h&quot;</span>

<span class="t">void</span> <span class="i">disassembleChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>);
<span class="t">int</span> <span class="i">disassembleInstruction</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">int</span> <span class="i">offset</span>);

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>debug.h</em>, create new file</div>

<p>In <code>main()</code>, we call <code>disassembleChunk()</code> to disassemble all of the instructions
in the entire chunk. That&rsquo;s implemented in terms of the other function, which
just disassembles a single instruction. It shows up here in the header because
we&rsquo;ll call it from the VM in later chapters.</p>
<p>Here&rsquo;s a start at the implementation file:</p>
<div class="codehilite"><div class="source-file"><em>debug.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdio.h&gt;</span>

<span class="a">#include &quot;debug.h&quot;</span>

<span class="t">void</span> <span class="i">disassembleChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>) {
  <span class="i">printf</span>(<span class="s">&quot;== %s ==</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">name</span>);

  <span class="k">for</span> (<span class="t">int</span> <span class="i">offset</span> = <span class="n">0</span>; <span class="i">offset</span> &lt; <span class="i">chunk</span>-&gt;<span class="i">count</span>;) {
    <span class="i">offset</span> = <span class="i">disassembleInstruction</span>(<span class="i">chunk</span>, <span class="i">offset</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, create new file</div>

<p>To disassemble a chunk, we print a little header (so we can tell <em>which</em> chunk
we&rsquo;re looking at) and then crank through the bytecode, disassembling each
instruction. The way we iterate through the code is a little odd. Instead of
incrementing <code>offset</code> in the loop, we let <code>disassembleInstruction()</code> do it for
us. When we call that function, after disassembling the instruction at the given
offset, it returns the offset of the <em>next</em> instruction. This is because, as
we&rsquo;ll see later, instructions can have different sizes.</p>
<p>The core of the &ldquo;debug&rdquo; module is this function:</p>
<div class="codehilite"><div class="source-file"><em>debug.c</em><br>
add after <em>disassembleChunk</em>()</div>
<pre><span class="t">int</span> <span class="i">disassembleInstruction</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">int</span> <span class="i">offset</span>) {
  <span class="i">printf</span>(<span class="s">&quot;%04d &quot;</span>, <span class="i">offset</span>);

  <span class="t">uint8_t</span> <span class="i">instruction</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span>];
  <span class="k">switch</span> (<span class="i">instruction</span>) {
    <span class="k">case</span> <span class="a">OP_RETURN</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_RETURN&quot;</span>, <span class="i">offset</span>);
    <span class="k">default</span>:
      <span class="i">printf</span>(<span class="s">&quot;Unknown opcode %d</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">instruction</span>);
      <span class="k">return</span> <span class="i">offset</span> + <span class="n">1</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, add after <em>disassembleChunk</em>()</div>

<p>First, it prints the byte offset of the given instruction<span class="em">&mdash;</span>that tells us where
in the chunk this instruction is. This will be a helpful signpost when we start
doing control flow and jumping around in the bytecode.</p>
<p>Next, it reads a single byte from the bytecode at the given offset. That&rsquo;s our
opcode. We <span name="switch">switch</span> on that. For each kind of
instruction, we dispatch to a little utility function for displaying it. On the
off chance that the given byte doesn&rsquo;t look like an instruction at all<span class="em">&mdash;</span>a bug
in our compiler<span class="em">&mdash;</span>we print that too. For the one instruction we do have,
<code>OP_RETURN</code>, the display function is:</p>
<aside name="switch">
<p>We have only one instruction right now, but this switch will grow throughout the
rest of the book.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>debug.c</em><br>
add after <em>disassembleChunk</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">simpleInstruction</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>, <span class="t">int</span> <span class="i">offset</span>) {
  <span class="i">printf</span>(<span class="s">&quot;%s</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">name</span>);
  <span class="k">return</span> <span class="i">offset</span> + <span class="n">1</span>;
}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, add after <em>disassembleChunk</em>()</div>

<p>There isn&rsquo;t much to a return instruction, so all it does is print the name of
the opcode, then return the next byte offset past this instruction. Other
instructions will have more going on.</p>
<p>If we run our nascent interpreter now, it actually prints something:</p>
<div class="codehilite"><pre>== test chunk ==
0000 OP_RETURN
</pre></div>
<p>It worked! This is sort of the &ldquo;Hello, world!&rdquo; of our code representation. We
can create a chunk, write an instruction to it, and then extract that
instruction back out. Our encoding and decoding of the binary bytecode is
working.</p>
<h2><a href="#constants" id="constants"><small>14&#8202;.&#8202;5</small>Constants</a></h2>
<p>Now that we have a rudimentary chunk structure working, let&rsquo;s start making it
more useful. We can store <em>code</em> in chunks, but what about <em>data</em>? Many values
the interpreter works with are created at runtime as the result of operations.</p>
<div class="codehilite"><pre><span class="n">1</span> + <span class="n">2</span>;
</pre></div>
<p>The value 3 appears nowhere in the code here. However, the literals <code>1</code> and <code>2</code>
do. To compile that statement to bytecode, we need some sort of instruction that
means &ldquo;produce a constant&rdquo; and those literal values need to get stored in the
chunk somewhere. In jlox, the Expr.Literal AST node held the value. We need a
different solution now that we don&rsquo;t have a syntax tree.</p>
<h3><a href="#representing-values" id="representing-values"><small>14&#8202;.&#8202;5&#8202;.&#8202;1</small>Representing values</a></h3>
<p>We won&rsquo;t be <em>running</em> any code in this chapter, but since constants have a foot
in both the static and dynamic worlds of our interpreter, they force us to start
thinking at least a little bit about how our VM should represent values.</p>
<p>For now, we&rsquo;re going to start as simple as possible<span class="em">&mdash;</span>we&rsquo;ll support only
double-precision, floating-point numbers. This will obviously expand over time,
so we&rsquo;ll set up a new module to give ourselves room to grow.</p>
<div class="codehilite"><div class="source-file"><em>value.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_value_h</span>
<span class="a">#define clox_value_h</span>

<span class="a">#include &quot;common.h&quot;</span>

<span class="k">typedef</span> <span class="t">double</span> <span class="t">Value</span>;

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, create new file</div>

<p>This typedef abstracts how Lox values are concretely represented in C. That way,
we can change that representation without needing to go back and fix existing
code that passes around values.</p>
<p>Back to the question of where to store constants in a chunk. For small
fixed-size values like integers, many instruction sets store the value directly
in the code stream right after the opcode. These are called <strong>immediate
instructions</strong> because the bits for the value are immediately after the opcode.</p>
<p>That doesn&rsquo;t work well for large or variable-sized constants like strings. In a
native compiler to machine code, those bigger constants get stored in a separate
&ldquo;constant data&rdquo; region in the binary executable. Then, the instruction to load a
constant has an address or offset pointing to where the value is stored in that
section.</p>
<p>Most virtual machines do something similar. For example, the Java Virtual
Machine <a href="https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.4">associates a <strong>constant pool</strong></a> with each compiled class.
That sounds good enough for clox to me. Each chunk will carry with it a list of
the values that appear as literals in the program. To keep things <span
name="immediate">simpler</span>, we&rsquo;ll put <em>all</em> constants in there, even simple
integers.</p>
<aside name="immediate">
<p>In addition to needing two kinds of constant instructions<span class="em">&mdash;</span>one for immediate
values and one for constants in the constant table<span class="em">&mdash;</span>immediates also force us
to worry about alignment, padding, and endianness. Some architectures aren&rsquo;t
happy if you try to say, stuff a 4-byte integer at an odd address.</p>
</aside>
<h3><a href="#value-arrays" id="value-arrays"><small>14&#8202;.&#8202;5&#8202;.&#8202;2</small>Value arrays</a></h3>
<p>The constant pool is an array of values. The instruction to load a constant
looks up the value by index in that array. As with our <span
name="generic">bytecode</span> array, the compiler doesn&rsquo;t know how big the
array needs to be ahead of time. So, again, we need a dynamic one. Since C
doesn&rsquo;t have generic data structures, we&rsquo;ll write another dynamic array data
structure, this time for Value.</p>
<aside name="generic">
<p>Defining a new struct and manipulation functions each time we need a dynamic
array of a different type is a chore. We could cobble together some preprocessor
macros to fake generics, but that&rsquo;s overkill for clox. We won&rsquo;t need many more
of these.</p>
</aside>
<div class="codehilite"><pre class="insert-before">typedef double Value;
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">int</span> <span class="i">capacity</span>;
  <span class="t">int</span> <span class="i">count</span>;
  <span class="t">Value</span>* <span class="i">values</span>;
} <span class="t">ValueArray</span>;
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>As with the bytecode array in Chunk, this struct wraps a pointer to an array
along with its allocated capacity and the number of elements in use. We also
need the same three functions to work with value arrays.</p>
<div class="codehilite"><pre class="insert-before">} ValueArray;
</pre><div class="source-file"><em>value.h</em><br>
add after struct <em>ValueArray</em></div>
<pre class="insert">

<span class="t">void</span> <span class="i">initValueArray</span>(<span class="t">ValueArray</span>* <span class="i">array</span>);
<span class="t">void</span> <span class="i">writeValueArray</span>(<span class="t">ValueArray</span>* <span class="i">array</span>, <span class="t">Value</span> <span class="i">value</span>);
<span class="t">void</span> <span class="i">freeValueArray</span>(<span class="t">ValueArray</span>* <span class="i">array</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after struct <em>ValueArray</em></div>

<p>The implementations will probably give you déjà vu. First, to create a new one:</p>
<div class="codehilite"><div class="source-file"><em>value.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdio.h&gt;</span>

<span class="a">#include &quot;memory.h&quot;</span>
<span class="a">#include &quot;value.h&quot;</span>

<span class="t">void</span> <span class="i">initValueArray</span>(<span class="t">ValueArray</span>* <span class="i">array</span>) {
  <span class="i">array</span>-&gt;<span class="i">values</span> = <span class="a">NULL</span>;
  <span class="i">array</span>-&gt;<span class="i">capacity</span> = <span class="n">0</span>;
  <span class="i">array</span>-&gt;<span class="i">count</span> = <span class="n">0</span>;
}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, create new file</div>

<p>Once we have an initialized array, we can start <span name="add">adding</span>
values to it.</p>
<aside name="add">
<p>Fortunately, we don&rsquo;t need other operations like insertion and removal.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>value.c</em><br>
add after <em>initValueArray</em>()</div>
<pre><span class="t">void</span> <span class="i">writeValueArray</span>(<span class="t">ValueArray</span>* <span class="i">array</span>, <span class="t">Value</span> <span class="i">value</span>) {
  <span class="k">if</span> (<span class="i">array</span>-&gt;<span class="i">capacity</span> &lt; <span class="i">array</span>-&gt;<span class="i">count</span> + <span class="n">1</span>) {
    <span class="t">int</span> <span class="i">oldCapacity</span> = <span class="i">array</span>-&gt;<span class="i">capacity</span>;
    <span class="i">array</span>-&gt;<span class="i">capacity</span> = <span class="a">GROW_CAPACITY</span>(<span class="i">oldCapacity</span>);
    <span class="i">array</span>-&gt;<span class="i">values</span> = <span class="a">GROW_ARRAY</span>(<span class="t">Value</span>, <span class="i">array</span>-&gt;<span class="i">values</span>,
                               <span class="i">oldCapacity</span>, <span class="i">array</span>-&gt;<span class="i">capacity</span>);
  }

  <span class="i">array</span>-&gt;<span class="i">values</span>[<span class="i">array</span>-&gt;<span class="i">count</span>] = <span class="i">value</span>;
  <span class="i">array</span>-&gt;<span class="i">count</span>++;
}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, add after <em>initValueArray</em>()</div>

<p>The memory-management macros we wrote earlier do let us reuse some of the logic
from the code array, so this isn&rsquo;t too bad. Finally, to release all memory used
by the array:</p>
<div class="codehilite"><div class="source-file"><em>value.c</em><br>
add after <em>writeValueArray</em>()</div>
<pre><span class="t">void</span> <span class="i">freeValueArray</span>(<span class="t">ValueArray</span>* <span class="i">array</span>) {
  <span class="a">FREE_ARRAY</span>(<span class="t">Value</span>, <span class="i">array</span>-&gt;<span class="i">values</span>, <span class="i">array</span>-&gt;<span class="i">capacity</span>);
  <span class="i">initValueArray</span>(<span class="i">array</span>);
}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, add after <em>writeValueArray</em>()</div>

<p>Now that we have growable arrays of values, we can add one to Chunk to store the
chunk&rsquo;s constants.</p>
<div class="codehilite"><pre class="insert-before">  uint8_t* code;
</pre><div class="source-file"><em>chunk.h</em><br>
in struct <em>Chunk</em></div>
<pre class="insert">  <span class="t">ValueArray</span> <span class="i">constants</span>;
</pre><pre class="insert-after">} Chunk;
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in struct <em>Chunk</em></div>

<p>Don&rsquo;t forget the include.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;
</pre><div class="source-file"><em>chunk.h</em></div>
<pre class="insert"><span class="a">#include &quot;value.h&quot;</span>
</pre><pre class="insert-after">

typedef enum {
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em></div>

<p>Ah, C, and its Stone Age modularity story. Where were we? Right. When we
initialize a new chunk, we initialize its constant list too.</p>
<div class="codehilite"><pre class="insert-before">  chunk-&gt;code = NULL;
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>initChunk</em>()</div>
<pre class="insert">  <span class="i">initValueArray</span>(&amp;<span class="i">chunk</span>-&gt;<span class="i">constants</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>initChunk</em>()</div>

<p>Likewise, we free the constants when we free the chunk.</p>
<div class="codehilite"><pre class="insert-before">  FREE_ARRAY(uint8_t, chunk-&gt;code, chunk-&gt;capacity);
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>freeChunk</em>()</div>
<pre class="insert">  <span class="i">freeValueArray</span>(&amp;<span class="i">chunk</span>-&gt;<span class="i">constants</span>);
</pre><pre class="insert-after">  initChunk(chunk);
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>freeChunk</em>()</div>

<p>Next, we define a convenience method to add a new constant to the chunk. Our
yet-to-be-written compiler could write to the constant array inside Chunk
directly<span class="em">&mdash;</span>it&rsquo;s not like C has private fields or anything<span class="em">&mdash;</span>but it&rsquo;s a little
nicer to add an explicit function.</p>
<div class="codehilite"><pre class="insert-before">void writeChunk(Chunk* chunk, uint8_t byte);
</pre><div class="source-file"><em>chunk.h</em><br>
add after <em>writeChunk</em>()</div>
<pre class="insert"><span class="t">int</span> <span class="i">addConstant</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">Value</span> <span class="i">value</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, add after <em>writeChunk</em>()</div>

<p>Then we implement it.</p>
<div class="codehilite"><div class="source-file"><em>chunk.c</em><br>
add after <em>writeChunk</em>()</div>
<pre><span class="t">int</span> <span class="i">addConstant</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">Value</span> <span class="i">value</span>) {
  <span class="i">writeValueArray</span>(&amp;<span class="i">chunk</span>-&gt;<span class="i">constants</span>, <span class="i">value</span>);
  <span class="k">return</span> <span class="i">chunk</span>-&gt;<span class="i">constants</span>.<span class="i">count</span> - <span class="n">1</span>;
}
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, add after <em>writeChunk</em>()</div>

<p>After we add the constant, we return the index where the constant was appended
so that we can locate that same constant later.</p>
<h3><a href="#constant-instructions" id="constant-instructions"><small>14&#8202;.&#8202;5&#8202;.&#8202;3</small>Constant instructions</a></h3>
<p>We can <em>store</em> constants in chunks, but we also need to <em>execute</em> them. In a
piece of code like:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="n">1</span>;
<span class="k">print</span> <span class="n">2</span>;
</pre></div>
<p>The compiled chunk needs to not only contain the values 1 and 2, but know <em>when</em>
to produce them so that they are printed in the right order. Thus, we need an
instruction that produces a particular constant.</p>
<div class="codehilite"><pre class="insert-before">typedef enum {
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_CONSTANT</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>When the VM executes a constant instruction, it <span name="load">&ldquo;loads&rdquo;</span>
the constant for use. This new instruction is a little more complex than
<code>OP_RETURN</code>. In the above example, we load two different constants. A single
bare opcode isn&rsquo;t enough to know <em>which</em> constant to load.</p>
<aside name="load">
<p>I&rsquo;m being vague about what it means to &ldquo;load&rdquo; or &ldquo;produce&rdquo; a constant because we
haven&rsquo;t learned how the virtual machine actually executes code at runtime yet.
For that, you&rsquo;ll have to wait until you get to (or skip ahead to, I suppose) the
<a href="a-virtual-machine.html">next chapter</a>.</p>
</aside>
<p>To handle cases like this, our bytecode<span class="em">&mdash;</span>like most others<span class="em">&mdash;</span>allows
instructions to have <span name="operand"><strong>operands</strong></span>. These are stored
as binary data immediately after the opcode in the instruction stream and let us
parameterize what the instruction does.</p>
<p><img src="image/chunks-of-bytecode/format.png" alt="OP_CONSTANT is a byte for
the opcode followed by a byte for the constant index." /></p>
<p>Each opcode determines how many operand bytes it has and what they mean. For
example, a simple operation like &ldquo;return&rdquo; may have no operands, where an
instruction for &ldquo;load local variable&rdquo; needs an operand to identify which
variable to load. Each time we add a new opcode to clox, we specify what its
operands look like<span class="em">&mdash;</span>its <strong>instruction format</strong>.</p>
<aside name="operand">
<p>Bytecode instruction operands are <em>not</em> the same as the operands passed to an
arithmetic operator. You&rsquo;ll see when we get to expressions that arithmetic
operand values are tracked separately. Instruction operands are a lower-level
notion that modify how the bytecode instruction itself behaves.</p>
</aside>
<p>In this case, <code>OP_CONSTANT</code> takes a single byte operand that specifies which
constant to load from the chunk&rsquo;s constant array. Since we don&rsquo;t have a compiler
yet, we &ldquo;hand-compile&rdquo; an instruction in our test chunk.</p>
<div class="codehilite"><pre class="insert-before">  initChunk(&amp;chunk);
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()</div>
<pre class="insert">

  <span class="t">int</span> <span class="i">constant</span> = <span class="i">addConstant</span>(&amp;<span class="i">chunk</span>, <span class="n">1.2</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_CONSTANT</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="i">constant</span>);

</pre><pre class="insert-after">  writeChunk(&amp;chunk, OP_RETURN);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>()</div>

<p>We add the constant value itself to the chunk&rsquo;s constant pool. That returns the
index of the constant in the array. Then we write the constant instruction,
starting with its opcode. After that, we write the one-byte constant index
operand. Note that <code>writeChunk()</code> can write opcodes or operands. It&rsquo;s all raw
bytes as far as that function is concerned.</p>
<p>If we try to run this now, the disassembler is going to yell at us because it
doesn&rsquo;t know how to decode the new instruction. Let&rsquo;s fix that.</p>
<div class="codehilite"><pre class="insert-before">  switch (instruction) {
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_CONSTANT</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_CONSTANT&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>This instruction has a different instruction format, so we write a new helper
function to disassemble it.</p>
<div class="codehilite"><div class="source-file"><em>debug.c</em><br>
add after <em>disassembleChunk</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">constantInstruction</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>, <span class="t">Chunk</span>* <span class="i">chunk</span>,
                               <span class="t">int</span> <span class="i">offset</span>) {
  <span class="t">uint8_t</span> <span class="i">constant</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span> + <span class="n">1</span>];
  <span class="i">printf</span>(<span class="s">&quot;%-16s %4d &#39;&quot;</span>, <span class="i">name</span>, <span class="i">constant</span>);
  <span class="i">printValue</span>(<span class="i">chunk</span>-&gt;<span class="i">constants</span>.<span class="i">values</span>[<span class="i">constant</span>]);
  <span class="i">printf</span>(<span class="s">&quot;&#39;</span><span class="e">\n</span><span class="s">&quot;</span>);
}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, add after <em>disassembleChunk</em>()</div>

<p>There&rsquo;s more going on here. As with <code>OP_RETURN</code>, we print out the name of the
opcode. Then we pull out the constant index from the subsequent byte in the
chunk. We print that index, but that isn&rsquo;t super useful to us human readers. So
we also look up the actual constant value<span class="em">&mdash;</span>since constants <em>are</em> known at
compile time after all<span class="em">&mdash;</span>and display the value itself too.</p>
<p>This requires some way to print a clox Value. That function will live in the
&ldquo;value&rdquo; module, so we include that.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;debug.h&quot;
</pre><div class="source-file"><em>debug.c</em></div>
<pre class="insert"><span class="a">#include &quot;value.h&quot;</span>
</pre><pre class="insert-after">

void disassembleChunk(Chunk* chunk, const char* name) {
</pre></div>
<div class="source-file-narrow"><em>debug.c</em></div>

<p>Over in that header, we declare:</p>
<div class="codehilite"><pre class="insert-before">void freeValueArray(ValueArray* array);
</pre><div class="source-file"><em>value.h</em><br>
add after <em>freeValueArray</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">printValue</span>(<span class="t">Value</span> <span class="i">value</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after <em>freeValueArray</em>()</div>

<p>And here&rsquo;s an implementation:</p>
<div class="codehilite"><div class="source-file"><em>value.c</em><br>
add after <em>freeValueArray</em>()</div>
<pre><span class="t">void</span> <span class="i">printValue</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="i">printf</span>(<span class="s">&quot;%g&quot;</span>, <span class="i">value</span>);
}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, add after <em>freeValueArray</em>()</div>

<p>Magnificent, right? As you can imagine, this is going to get more complex once
we add dynamic typing to Lox and have values of different types.</p>
<p>Back in <code>constantInstruction()</code>, the only remaining piece is the return value.</p>
<div class="codehilite"><pre class="insert-before">  printf(&quot;'\n&quot;);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>constantInstruction</em>()</div>
<pre class="insert">  <span class="k">return</span> <span class="i">offset</span> + <span class="n">2</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>constantInstruction</em>()</div>

<p>Remember that <code>disassembleInstruction()</code> also returns a number to tell the
caller the offset of the beginning of the <em>next</em> instruction. Where <code>OP_RETURN</code>
was only a single byte, <code>OP_CONSTANT</code> is two<span class="em">&mdash;</span>one for the opcode and one for
the operand.</p>
<h2><a href="#line-information" id="line-information"><small>14&#8202;.&#8202;6</small>Line Information</a></h2>
<p>Chunks contain almost all of the information that the runtime needs from the
user&rsquo;s source code. It&rsquo;s kind of crazy to think that we can reduce all of the
different AST classes that we created in jlox down to an array of bytes and an
array of constants. There&rsquo;s only one piece of data we&rsquo;re missing. We need it,
even though the user hopes to never see it.</p>
<p>When a runtime error occurs, we show the user the line number of the offending
source code. In jlox, those numbers live in tokens, which we in turn store in
the AST nodes. We need a different solution for clox now that we&rsquo;ve ditched
syntax trees in favor of bytecode. Given any bytecode instruction, we need to be
able to determine the line of the user&rsquo;s source program that it was compiled
from.</p>
<p>There are a lot of clever ways we could encode this. I took the absolute <span
name="side">simplest</span> approach I could come up with, even though it&rsquo;s
embarrassingly inefficient with memory. In the chunk, we store a separate array
of integers that parallels the bytecode. Each number in the array is the line
number for the corresponding byte in the bytecode. When a runtime error occurs,
we look up the line number at the same index as the current instruction&rsquo;s offset
in the code array.</p>
<aside name="side">
<p>This braindead encoding does do one thing right: it keeps the line information
in a <em>separate</em> array instead of interleaving it in the bytecode itself. Since
line information is only used when a runtime error occurs, we don&rsquo;t want it
between the instructions, taking up precious space in the CPU cache and causing
more cache misses as the interpreter skips past it to get to the opcodes and
operands it cares about.</p>
</aside>
<p>To implement this, we add another array to Chunk.</p>
<div class="codehilite"><pre class="insert-before">  uint8_t* code;
</pre><div class="source-file"><em>chunk.h</em><br>
in struct <em>Chunk</em></div>
<pre class="insert">  <span class="t">int</span>* <span class="i">lines</span>;
</pre><pre class="insert-after">  ValueArray constants;
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in struct <em>Chunk</em></div>

<p>Since it exactly parallels the bytecode array, we don&rsquo;t need a separate count or
capacity. Every time we touch the code array, we make a corresponding change to
the line number array, starting with initialization.</p>
<div class="codehilite"><pre class="insert-before">  chunk-&gt;code = NULL;
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>initChunk</em>()</div>
<pre class="insert">  <span class="i">chunk</span>-&gt;<span class="i">lines</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">  initValueArray(&amp;chunk-&gt;constants);
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>initChunk</em>()</div>

<p>And likewise deallocation:</p>
<div class="codehilite"><pre class="insert-before">  FREE_ARRAY(uint8_t, chunk-&gt;code, chunk-&gt;capacity);
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>freeChunk</em>()</div>
<pre class="insert">  <span class="a">FREE_ARRAY</span>(<span class="t">int</span>, <span class="i">chunk</span>-&gt;<span class="i">lines</span>, <span class="i">chunk</span>-&gt;<span class="i">capacity</span>);
</pre><pre class="insert-after">  freeValueArray(&amp;chunk-&gt;constants);
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>freeChunk</em>()</div>

<p>When we write a byte of code to the chunk, we need to know what source line it
came from, so we add an extra parameter in the declaration of <code>writeChunk()</code>.</p>
<div class="codehilite"><pre class="insert-before">void freeChunk(Chunk* chunk);
</pre><div class="source-file"><em>chunk.h</em><br>
function <em>writeChunk</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="t">void</span> <span class="i">writeChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">uint8_t</span> <span class="i">byte</span>, <span class="t">int</span> <span class="i">line</span>);
</pre><pre class="insert-after">int addConstant(Chunk* chunk, Value value);
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, function <em>writeChunk</em>(), replace 1 line</div>

<p>And in the implementation:</p>
<div class="codehilite"><div class="source-file"><em>chunk.c</em><br>
function <em>writeChunk</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="t">void</span> <span class="i">writeChunk</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">uint8_t</span> <span class="i">byte</span>, <span class="t">int</span> <span class="i">line</span>) {
</pre><pre class="insert-after">  if (chunk-&gt;capacity &lt; chunk-&gt;count + 1) {
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, function <em>writeChunk</em>(), replace 1 line</div>

<p>When we allocate or grow the code array, we do the same for the line info too.</p>
<div class="codehilite"><pre class="insert-before">    chunk-&gt;code = GROW_ARRAY(uint8_t, chunk-&gt;code,
        oldCapacity, chunk-&gt;capacity);
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>writeChunk</em>()</div>
<pre class="insert">    <span class="i">chunk</span>-&gt;<span class="i">lines</span> = <span class="a">GROW_ARRAY</span>(<span class="t">int</span>, <span class="i">chunk</span>-&gt;<span class="i">lines</span>,
        <span class="i">oldCapacity</span>, <span class="i">chunk</span>-&gt;<span class="i">capacity</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>writeChunk</em>()</div>

<p>Finally, we store the line number in the array.</p>
<div class="codehilite"><pre class="insert-before">  chunk-&gt;code[chunk-&gt;count] = byte;
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>writeChunk</em>()</div>
<pre class="insert">  <span class="i">chunk</span>-&gt;<span class="i">lines</span>[<span class="i">chunk</span>-&gt;<span class="i">count</span>] = <span class="i">line</span>;
</pre><pre class="insert-after">  chunk-&gt;count++;
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>writeChunk</em>()</div>

<h3><a href="#disassembling-line-information" id="disassembling-line-information"><small>14&#8202;.&#8202;6&#8202;.&#8202;1</small>Disassembling line information</a></h3>
<p>Alright, let&rsquo;s try this out with our little, uh, artisanal chunk. First, since
we added a new parameter to <code>writeChunk()</code>, we need to fix those calls to pass
in some<span class="em">&mdash;</span>arbitrary at this point<span class="em">&mdash;</span>line number.</p>
<div class="codehilite"><pre class="insert-before">  int constant = addConstant(&amp;chunk, 1.2);
</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()<br>
replace 4 lines</div>
<pre class="insert">  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_CONSTANT</span>, <span class="n">123</span>);
  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="i">constant</span>, <span class="n">123</span>);

  <span class="i">writeChunk</span>(&amp;<span class="i">chunk</span>, <span class="a">OP_RETURN</span>, <span class="n">123</span>);
</pre><pre class="insert-after">

  disassembleChunk(&amp;chunk, &quot;test chunk&quot;);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>(), replace 4 lines</div>

<p>Once we have a real front end, of course, the compiler will track the current
line as it parses and pass that in.</p>
<p>Now that we have line information for every instruction, let&rsquo;s put it to good
use. In our disassembler, it&rsquo;s helpful to show which source line each
instruction was compiled from. That gives us a way to map back to the original
code when we&rsquo;re trying to figure out what some blob of bytecode is supposed to
do. After printing the offset of the instruction<span class="em">&mdash;</span>the number of bytes from the
beginning of the chunk<span class="em">&mdash;</span>we show its source line.</p>
<div class="codehilite"><pre class="insert-before">int disassembleInstruction(Chunk* chunk, int offset) {
  printf(&quot;%04d &quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">offset</span> &gt; <span class="n">0</span> &amp;&amp;
      <span class="i">chunk</span>-&gt;<span class="i">lines</span>[<span class="i">offset</span>] == <span class="i">chunk</span>-&gt;<span class="i">lines</span>[<span class="i">offset</span> - <span class="n">1</span>]) {
    <span class="i">printf</span>(<span class="s">&quot;   | &quot;</span>);
  } <span class="k">else</span> {
    <span class="i">printf</span>(<span class="s">&quot;%4d &quot;</span>, <span class="i">chunk</span>-&gt;<span class="i">lines</span>[<span class="i">offset</span>]);
  }
</pre><pre class="insert-after">

  uint8_t instruction = chunk-&gt;code[offset];
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>Bytecode instructions tend to be pretty fine-grained. A single line of source
code often compiles to a whole sequence of instructions. To make that more
visually clear, we show a <code>|</code> for any instruction that comes from the same
source line as the preceding one. The resulting output for our handwritten
chunk looks like:</p>
<div class="codehilite"><pre>== test chunk ==
0000  123 OP_CONSTANT         0 '1.2'
0002    | OP_RETURN
</pre></div>
<p>We have a three-byte chunk. The first two bytes are a constant instruction that
loads 1.2 from the chunk&rsquo;s constant pool. The first byte is the <code>OP_CONSTANT</code>
opcode and the second is the index in the constant pool. The third byte (at
offset 2) is a single-byte return instruction.</p>
<p>In the remaining chapters, we will flesh this out with lots more kinds of
instructions. But the basic structure is here, and we have everything we need
now to completely represent an executable piece of code at runtime in our
virtual machine. Remember that whole family of AST classes we defined in jlox?
In clox, we&rsquo;ve reduced that down to three arrays: bytes of code, constant
values, and line information for debugging.</p>
<p>This reduction is a key reason why our new interpreter will be faster than jlox.
You can think of bytecode as a sort of compact serialization of the AST, highly
optimized for how the interpreter will deserialize it in the order it needs as
it executes. In the <a href="a-virtual-machine.html">next chapter</a>, we will see how the virtual machine does
exactly that.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Our encoding of line information is hilariously wasteful of memory. Given
that a series of instructions often correspond to the same source line, a
natural solution is something akin to <a href="https://en.wikipedia.org/wiki/Run-length_encoding">run-length encoding</a> of the line
numbers.</p>
<p>Devise an encoding that compresses the line information for a
series of instructions on the same line. Change <code>writeChunk()</code> to write this
compressed form, and implement a <code>getLine()</code> function that, given the index
of an instruction, determines the line where the instruction occurs.</p>
<p><em>Hint: It&rsquo;s not necessary for <code>getLine()</code> to be particularly efficient.
Since it is called only when a runtime error occurs, it is well off the
critical path where performance matters.</em></p>
</li>
<li>
<p>Because <code>OP_CONSTANT</code> uses only a single byte for its operand, a chunk may
only contain up to 256 different constants. That&rsquo;s small enough that people
writing real-world code will hit that limit. We could use two or more bytes
to store the operand, but that makes <em>every</em> constant instruction take up
more space. Most chunks won&rsquo;t need that many unique constants, so that
wastes space and sacrifices some locality in the common case to support the
rare case.</p>
<p>To balance those two competing aims, many instruction sets feature multiple
instructions that perform the same operation but with operands of different
sizes. Leave our existing one-byte <code>OP_CONSTANT</code> instruction alone, and
define a second <code>OP_CONSTANT_LONG</code> instruction. It stores the operand as a
24-bit number, which should be plenty.</p>
<p>Implement this function:</p>
<div class="codehilite"><pre><span class="t">void</span> <span class="i">writeConstant</span>(<span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">Value</span> <span class="i">value</span>, <span class="t">int</span> <span class="i">line</span>) {
  <span class="c">// Implement me...</span>
}
</pre></div>
<p>It adds <code>value</code> to <code>chunk</code>&rsquo;s constant array and then writes an appropriate
instruction to load the constant. Also add support to the disassembler for
<code>OP_CONSTANT_LONG</code> instructions.</p>
<p>Defining two instructions seems to be the best of both worlds. What
sacrifices, if any, does it force on us?</p>
</li>
<li>
<p>Our <code>reallocate()</code> function relies on the C standard library for dynamic
memory allocation and freeing. <code>malloc()</code> and <code>free()</code> aren&rsquo;t magic. Find
a couple of open source implementations of them and explain how they work.
How do they keep track of which bytes are allocated and which are free?
What is required to allocate a block of memory? Free it? How do they make
that efficient? What do they do about fragmentation?</p>
<p><em>Hardcore mode:</em> Implement <code>reallocate()</code> without calling <code>realloc()</code>,
<code>malloc()</code>, or <code>free()</code>. You are allowed to call <code>malloc()</code> <em>once</em>, at the
beginning of the interpreter&rsquo;s execution, to allocate a single big block of
memory, which your <code>reallocate()</code> function has access to. It parcels out
blobs of memory from that single region, your own personal heap. It&rsquo;s your
job to define how it does that.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Test Your Language</a></h2>
<p>We&rsquo;re almost halfway through the book and one thing we haven&rsquo;t talked about is
<em>testing</em> your language implementation. That&rsquo;s not because testing isn&rsquo;t
important. I can&rsquo;t possibly stress enough how vital it is to have a good,
comprehensive test suite for your language.</p>
<p>I wrote a <a href="https://github.com/munificent/craftinginterpreters/tree/master/test">test suite for Lox</a> (which you are welcome to use on your own
Lox implementation) before I wrote a single word of this book. Those tests found
countless bugs in my implementations.</p>
<p>Tests are important in all software, but they&rsquo;re even more important for a
programming language for at least a couple of reasons:</p>
<ul>
<li>
<p><strong>Users expect their programming languages to be rock solid.</strong> We are so
used to mature, stable compilers and interpreters that &ldquo;It&rsquo;s your code, not
the compiler&rdquo; is <a href="https://blog.codinghorror.com/the-first-rule-of-programming-its-always-your-fault/">an ingrained part of software culture</a>. If there
are bugs in your language implementation, users will go through the full
five stages of grief before they can figure out what&rsquo;s going on, and you
don&rsquo;t want to put them through all that.</p>
</li>
<li>
<p><strong>A language implementation is a deeply interconnected piece of software.</strong>
Some codebases are broad and shallow. If the file loading code is broken in
your text editor, it<span class="em">&mdash;</span>hopefully!<span class="em">&mdash;</span>won&rsquo;t cause failures in the text
rendering on screen. Language implementations are narrower and deeper,
especially the core of the interpreter that handles the language&rsquo;s actual
semantics. That makes it easy for subtle bugs to creep in caused by weird
interactions between various parts of the system. It takes good tests to
flush those out.</p>
</li>
<li>
<p><strong>The input to a language implementation is, by design, combinatorial.</strong>
There are an infinite number of possible programs a user could write, and
your implementation needs to run them all correctly. You obviously can&rsquo;t
test that exhaustively, but you need to work hard to cover as much of the
input space as you can.</p>
</li>
<li>
<p><strong>Language implementations are often complex, constantly changing, and full
of optimizations.</strong> That leads to gnarly code with lots of dark corners
where bugs can hide.</p>
</li>
</ul>
<p>All of that means you&rsquo;re gonna want a lot of tests. But <em>what</em> tests? Projects
I&rsquo;ve seen focus mostly on end-to-end &ldquo;language tests&rdquo;. Each test is a program
written in the language along with the output or errors it is expected to
produce. Then you have a test runner that pushes the test program through your
language implementation and validates that it does what it&rsquo;s supposed to.
Writing your tests in the language itself has a few nice advantages:</p>
<ul>
<li>
<p>The tests aren&rsquo;t coupled to any particular API or internal architecture
decisions of the implementation. This frees you to reorganize or rewrite
parts of your interpreter or compiler without needing to update a slew of
tests.</p>
</li>
<li>
<p>You can use the same tests for multiple implementations of the language.</p>
</li>
<li>
<p>Tests can often be terse and easy to read and maintain since they are
simply scripts in your language.</p>
</li>
</ul>
<p>It&rsquo;s not all rosy, though:</p>
<ul>
<li>
<p>End-to-end tests help you determine <em>if</em> there is a bug, but not <em>where</em> the
bug is. It can be harder to figure out where the erroneous code in the
implementation is because all the test tells you is that the right output
didn&rsquo;t appear.</p>
</li>
<li>
<p>It can be a chore to craft a valid program that tickles some obscure corner
of the implementation. This is particularly true for highly optimized
compilers where you may need to write convoluted code to ensure that you
end up on just the right optimization path where a bug may be hiding.</p>
</li>
<li>
<p>The overhead can be high to fire up the interpreter, parse, compile, and
run each test script. With a big suite of tests<span class="em">&mdash;</span>which you <em>do</em> want,
remember<span class="em">&mdash;</span>that can mean a lot of time spent waiting for the tests to
finish running.</p>
</li>
</ul>
<p>I could go on, but I don&rsquo;t want this to turn into a sermon. Also, I don&rsquo;t
pretend to be an expert on <em>how</em> to test languages. I just want you to
internalize how important it is <em>that</em> you test yours. Seriously. Test your
language. You&rsquo;ll thank me for it.</p>
</div>

<footer>
<a href="a-virtual-machine.html" class="next">
  Next Chapter: &ldquo;A Virtual Machine&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/classes-and-instances.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Classes and Instances &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Classes and Instances<small>27</small></a></h3>

<ul>
    <li><a href="#class-objects"><small>27.1</small> Class Objects</a></li>
    <li><a href="#class-declarations"><small>27.2</small> Class Declarations</a></li>
    <li><a href="#instances-of-classes"><small>27.3</small> Instances of Classes</a></li>
    <li><a href="#get-and-set-expressions"><small>27.4</small> Get and Set Expressions</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="garbage-collection.html" title="Garbage Collection" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="methods-and-initializers.html" title="Methods and Initializers" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="garbage-collection.html" title="Garbage Collection" class="prev">←</a>
<a href="methods-and-initializers.html" title="Methods and Initializers" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Classes and Instances<small>27</small></a></h3>

<ul>
    <li><a href="#class-objects"><small>27.1</small> Class Objects</a></li>
    <li><a href="#class-declarations"><small>27.2</small> Class Declarations</a></li>
    <li><a href="#instances-of-classes"><small>27.3</small> Instances of Classes</a></li>
    <li><a href="#get-and-set-expressions"><small>27.4</small> Get and Set Expressions</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="garbage-collection.html" title="Garbage Collection" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="methods-and-initializers.html" title="Methods and Initializers" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">27</div>
  <h1>Classes and Instances</h1>

<blockquote>
<p>Caring too much for objects can destroy you. Only<span class="em">&mdash;</span>if you care for a thing
enough, it takes on a life of its own, doesn&rsquo;t it? And isn’t the whole point
of things<span class="em">&mdash;</span>beautiful things<span class="em">&mdash;</span>that they connect you to some larger beauty?</p>
<p><cite>Donna Tartt, <em>The Goldfinch</em></cite></p>
</blockquote>
<p>The last area left to implement in clox is object-oriented programming. <span
name="oop">OOP</span> is a bundle of intertwined features: classes, instances,
fields, methods, initializers, and inheritance. Using relatively high-level
Java, we packed all that into two chapters. Now that we&rsquo;re coding in C, which
feels like building a model of the Eiffel tower out of toothpicks, we&rsquo;ll devote
three chapters to covering the same territory. This makes for a leisurely stroll
through the implementation. After strenuous chapters like <a href="closures.html">closures</a> and the
<a href="garbage-collection.html">garbage collector</a>, you have earned a rest. In fact, the book should be easy
from here on out.</p>
<aside name="oop">
<p>People who have strong opinions about object-oriented programming<span class="em">&mdash;</span>read
&ldquo;everyone&rdquo;<span class="em">&mdash;</span>tend to assume OOP means some very specific list of language
features, but really there&rsquo;s a whole space to explore, and each language has its
own ingredients and recipes.</p>
<p>Self has objects but no classes. CLOS has methods but doesn&rsquo;t attach them to
specific classes. C++ initially had no runtime polymorphism<span class="em">&mdash;</span>no virtual
methods. Python has multiple inheritance, but Java does not. Ruby attaches
methods to classes, but you can also define methods on a single object.</p>
</aside>
<p>In this chapter, we cover the first three features: classes, instances, and
fields. This is the stateful side of object orientation. Then in the next two
chapters, we will hang behavior and code reuse off of those objects.</p>
<h2><a href="#class-objects" id="class-objects"><small>27&#8202;.&#8202;1</small>Class Objects</a></h2>
<p>In a class-based object-oriented language, everything begins with classes. They
define what sorts of objects exist in the program and are the factories used to
produce new instances. Going bottom-up, we&rsquo;ll start with their runtime
representation and then hook that into the language.</p>
<p>By this point, we&rsquo;re well-acquainted with the process of adding a new object
type to the VM. We start with a struct.</p>
<div class="codehilite"><pre class="insert-before">} ObjClosure;
</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjClosure</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">ObjString</span>* <span class="i">name</span>;
} <span class="t">ObjClass</span>;
</pre><pre class="insert-after">

ObjClosure* newClosure(ObjFunction* function);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjClosure</em></div>

<p>After the Obj header, we store the class&rsquo;s name. This isn&rsquo;t strictly needed for
the user&rsquo;s program, but it lets us show the name at runtime for things like
stack traces.</p>
<p>The new type needs a corresponding case in the ObjType enum.</p>
<div class="codehilite"><pre class="insert-before">typedef enum {
</pre><div class="source-file"><em>object.h</em><br>
in enum <em>ObjType</em></div>
<pre class="insert">  <span class="a">OBJ_CLASS</span>,
</pre><pre class="insert-after">  OBJ_CLOSURE,
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in enum <em>ObjType</em></div>

<p>And that type gets a corresponding pair of macros. First, for testing an
object&rsquo;s type:</p>
<div class="codehilite"><pre class="insert-before">#define OBJ_TYPE(value)        (AS_OBJ(value)-&gt;type)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define IS_CLASS(value)        isObjType(value, OBJ_CLASS)</span>
</pre><pre class="insert-after">#define IS_CLOSURE(value)      isObjType(value, OBJ_CLOSURE)
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>And then for casting a Value to an ObjClass pointer:</p>
<div class="codehilite"><pre class="insert-before">#define IS_STRING(value)       isObjType(value, OBJ_STRING)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define AS_CLASS(value)        ((ObjClass*)AS_OBJ(value))</span>
</pre><pre class="insert-after">#define AS_CLOSURE(value)      ((ObjClosure*)AS_OBJ(value))
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>The VM creates new class objects using this function:</p>
<div class="codehilite"><pre class="insert-before">} ObjClass;

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjClass</em></div>
<pre class="insert"><span class="t">ObjClass</span>* <span class="i">newClass</span>(<span class="t">ObjString</span>* <span class="i">name</span>);
</pre><pre class="insert-after">ObjClosure* newClosure(ObjFunction* function);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjClass</em></div>

<p>The implementation lives over here:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>allocateObject</em>()</div>
<pre><span class="t">ObjClass</span>* <span class="i">newClass</span>(<span class="t">ObjString</span>* <span class="i">name</span>) {
  <span class="t">ObjClass</span>* <span class="i">klass</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjClass</span>, <span class="a">OBJ_CLASS</span>);
  <span class="i">klass</span>-&gt;<span class="i">name</span> = <span class="i">name</span>;<span name="klass"> </span>
  <span class="k">return</span> <span class="i">klass</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>allocateObject</em>()</div>

<p>Pretty much all boilerplate. It takes in the class&rsquo;s name as a string and stores
it. Every time the user declares a new class, the VM will create a new one of
these ObjClass structs to represent it.</p>
<aside name="klass"><img src="image/classes-and-instances/klass.png" alt="'Klass' in a zany kidz font."/>
<p>I named the variable &ldquo;klass&rdquo; not just to give the VM a zany preschool &ldquo;Kidz
Korner&rdquo; feel. It makes it easier to get clox compiling as C++ where &ldquo;class&rdquo; is
a reserved word.</p>
</aside>
<p>When the VM no longer needs a class, it frees it like so:</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_CLASS</span>: {
      <span class="a">FREE</span>(<span class="t">ObjClass</span>, <span class="i">object</span>);
      <span class="k">break</span>;
    }<span name="braces"> </span>
</pre><pre class="insert-after">    case OBJ_CLOSURE: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<aside name="braces">
<p>The braces here are pointless now, but will be useful in the next chapter when
we add some more code to the switch case.</p>
</aside>
<p>We have a memory manager now, so we also need to support tracing through class
objects.</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_CLASS</span>: {
      <span class="t">ObjClass</span>* <span class="i">klass</span> = (<span class="t">ObjClass</span>*)<span class="i">object</span>;
      <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">klass</span>-&gt;<span class="i">name</span>);
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_CLOSURE: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>When the GC reaches a class object, it marks the class&rsquo;s name to keep that
string alive too.</p>
<p>The last operation the VM can perform on a class is printing it.</p>
<div class="codehilite"><pre class="insert-before">  switch (OBJ_TYPE(value)) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>printObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_CLASS</span>:
      <span class="i">printf</span>(<span class="s">&quot;%s&quot;</span>, <span class="a">AS_CLASS</span>(<span class="i">value</span>)-&gt;<span class="i">name</span>-&gt;<span class="i">chars</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_CLOSURE:
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printObject</em>()</div>

<p>A class simply says its own name.</p>
<h2><a href="#class-declarations" id="class-declarations"><small>27&#8202;.&#8202;2</small>Class Declarations</a></h2>
<p>Runtime representation in hand, we are ready to add support for classes to the
language. Next, we move into the parser.</p>
<div class="codehilite"><pre class="insert-before">static void declaration() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>declaration</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_CLASS</span>)) {
    <span class="i">classDeclaration</span>();
  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_FUN</span>)) {
</pre><pre class="insert-after">    funDeclaration();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>declaration</em>(), replace 1 line</div>

<p>Class declarations are statements, and the parser recognizes one by the leading
<code>class</code> keyword. The rest of the compilation happens over here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>function</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">classDeclaration</span>() {
  <span class="i">consume</span>(<span class="a">TOKEN_IDENTIFIER</span>, <span class="s">&quot;Expect class name.&quot;</span>);
  <span class="t">uint8_t</span> <span class="i">nameConstant</span> = <span class="i">identifierConstant</span>(&amp;<span class="i">parser</span>.<span class="i">previous</span>);
  <span class="i">declareVariable</span>();

  <span class="i">emitBytes</span>(<span class="a">OP_CLASS</span>, <span class="i">nameConstant</span>);
  <span class="i">defineVariable</span>(<span class="i">nameConstant</span>);

  <span class="i">consume</span>(<span class="a">TOKEN_LEFT_BRACE</span>, <span class="s">&quot;Expect &#39;{&#39; before class body.&quot;</span>);
  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_BRACE</span>, <span class="s">&quot;Expect &#39;}&#39; after class body.&quot;</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>function</em>()</div>

<p>Immediately after the <code>class</code> keyword is the class&rsquo;s name. We take that
identifier and add it to the surrounding function&rsquo;s constant table as a string.
As you just saw, printing a class shows its name, so the compiler needs to stuff
the name string somewhere that the runtime can find. The constant table is the
way to do that.</p>
<p>The class&rsquo;s <span name="variable">name</span> is also used to bind the class
object to a variable of the same name. So we declare a variable with that
identifier right after consuming its token.</p>
<aside name="variable">
<p>We could have made class declarations be <em>expressions</em> instead of statements<span class="em">&mdash;</span>they are essentially a literal that produces a value after all. Then users would
have to explicitly bind the class to a variable themselves like:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="t">Pie</span> = <span class="k">class</span> {}
</pre></div>
<p>Sort of like lambda functions but for classes. But since we generally want
classes to be named anyway, it makes sense to treat them as declarations.</p>
</aside>
<p>Next, we emit a new instruction to actually create the class object at runtime.
That instruction takes the constant table index of the class&rsquo;s name as an
operand.</p>
<p>After that, but before compiling the body of the class, we define the variable
for the class&rsquo;s name. <em>Declaring</em> the variable adds it to the scope, but recall
from <a href="local-variables.html#another-scope-edge-case">a previous chapter</a> that we can&rsquo;t <em>use</em> the variable until it&rsquo;s
<em>defined</em>. For classes, we define the variable before the body. That way, users
can refer to the containing class inside the bodies of its own methods. That&rsquo;s
useful for things like factory methods that produce new instances of the class.</p>
<p>Finally, we compile the body. We don&rsquo;t have methods yet, so right now it&rsquo;s
simply an empty pair of braces. Lox doesn&rsquo;t require fields to be declared in the
class, so we&rsquo;re done with the body<span class="em">&mdash;</span>and the parser<span class="em">&mdash;</span>for now.</p>
<p>The compiler is emitting a new instruction, so let&rsquo;s define that.</p>
<div class="codehilite"><pre class="insert-before">  OP_RETURN,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_CLASS</span>,
</pre><pre class="insert-after">} OpCode;
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>And add it to the disassembler:</p>
<div class="codehilite"><pre class="insert-before">    case OP_RETURN:
      return simpleInstruction(&quot;OP_RETURN&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_CLASS</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_CLASS&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    default:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>For such a large-seeming feature, the interpreter support is minimal.</p>
<div class="codehilite"><pre class="insert-before">        break;
      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_CLASS</span>:
        <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">newClass</span>(<span class="a">READ_STRING</span>())));
        <span class="k">break</span>;
</pre><pre class="insert-after">    }
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We load the string for the class&rsquo;s name from the constant table and pass that to
<code>newClass()</code>. That creates a new class object with the given name. We push that
onto the stack and we&rsquo;re good. If the class is bound to a global variable, then
the compiler&rsquo;s call to <code>defineVariable()</code> will emit code to store that object
from the stack into the global variable table. Otherwise, it&rsquo;s right where it
needs to be on the stack for a new <span name="local">local</span> variable.</p>
<aside name="local">
<p>&ldquo;Local&rdquo; classes<span class="em">&mdash;</span>classes declared inside the body of a function or block, are
an unusual concept. Many languages don&rsquo;t allow them at all. But since Lox is a
dynamically typed scripting language, it treats the top level of a program and
the bodies of functions and blocks uniformly. Classes are just another kind of
declaration, and since you can declare variables and functions inside blocks,
you can declare classes in there too.</p>
</aside>
<p>There you have it, our VM supports classes now. You can run this:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Brioche</span> {}
<span class="k">print</span> <span class="t">Brioche</span>;
</pre></div>
<p>Unfortunately, printing is about <em>all</em> you can do with classes, so next is
making them more useful.</p>
<h2><a href="#instances-of-classes" id="instances-of-classes"><small>27&#8202;.&#8202;3</small>Instances of Classes</a></h2>
<p>Classes serve two main purposes in a language:</p>
<ul>
<li>
<p><strong>They are how you create new instances.</strong> Sometimes this involves a <code>new</code>
keyword, other times it&rsquo;s a method call on the class object, but you usually
mention the class by name <em>somehow</em> to get a new instance.</p>
</li>
<li>
<p><strong>They contain methods.</strong> These define how all instances of the class
behave.</p>
</li>
</ul>
<p>We won&rsquo;t get to methods until the next chapter, so for now we will only worry
about the first part. Before classes can create instances, we need a
representation for them.</p>
<div class="codehilite"><pre class="insert-before">} ObjClass;
</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjClass</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">ObjClass</span>* <span class="i">klass</span>;
  <span class="t">Table</span> <span class="i">fields</span>;<span name="fields"> </span>
} <span class="t">ObjInstance</span>;
</pre><pre class="insert-after">

ObjClass* newClass(ObjString* name);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjClass</em></div>

<p>Instances know their class<span class="em">&mdash;</span>each instance has a pointer to the class that it
is an instance of.  We won&rsquo;t use this much in this chapter, but it will become
critical when we add methods.</p>
<p>More important to this chapter is how instances store their state. Lox lets
users freely add fields to an instance at runtime. This means we need a storage
mechanism that can grow. We could use a dynamic array, but we also want to look
up fields by name as quickly as possible. There&rsquo;s a data structure that&rsquo;s just
perfect for quickly accessing a set of values by name and<span class="em">&mdash;</span>even more conveniently<span class="em">&mdash;</span>we&rsquo;ve already implemented it. Each instance stores
its fields using a hash table.</p>
<aside name="fields">
<p>Being able to freely add fields to an object at runtime is a big practical
difference between most dynamic and static languages. Statically typed languages
usually require fields to be explicitly declared. This way, the compiler knows
exactly what fields each instance has. It can use that to determine the precise
amount of memory needed for each instance and the offsets in that memory where
each field can be found.</p>
<p>In Lox and other dynamic languages, accessing a field is usually a hash table
lookup. Constant time, but still pretty heavyweight. In a language like C++,
accessing a field is as fast as offsetting a pointer by an integer constant.</p>
</aside>
<p>We only need to add an include, and we&rsquo;ve got it.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;chunk.h&quot;
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#include &quot;table.h&quot;</span>
</pre><pre class="insert-after">#include &quot;value.h&quot;
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>This new struct gets a new object type.</p>
<div class="codehilite"><pre class="insert-before">  OBJ_FUNCTION,
</pre><div class="source-file"><em>object.h</em><br>
in enum <em>ObjType</em></div>
<pre class="insert">  <span class="a">OBJ_INSTANCE</span>,
</pre><pre class="insert-after">  OBJ_NATIVE,
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in enum <em>ObjType</em></div>

<p>I want to slow down a bit here because the Lox <em>language&rsquo;s</em> notion of &ldquo;type&rdquo; and
the VM <em>implementation&rsquo;s</em> notion of &ldquo;type&rdquo; brush against each other in ways that
can be confusing. Inside the C code that makes clox, there are a number of
different types of Obj<span class="em">&mdash;</span>ObjString, ObjClosure, etc. Each has its own internal
representation and semantics.</p>
<p>In the Lox <em>language</em>, users can define their own classes<span class="em">&mdash;</span>say Cake and Pie<span class="em">&mdash;</span>and then create instances of those classes. From the user&rsquo;s perspective, an
instance of Cake is a different type of object than an instance of Pie. But,
from the VM&rsquo;s perspective, every class the user defines is simply another value
of type ObjClass. Likewise, each instance in the user&rsquo;s program, no matter what
class it is an instance of, is an ObjInstance. That one VM object type covers
instances of all classes. The two worlds map to each other something like this:</p><img src="image/classes-and-instances/lox-clox.png" alt="A set of class declarations and instances, and the runtime representations each maps to."/>
<p>Got it? OK, back to the implementation. We also get our usual macros.</p>
<div class="codehilite"><pre class="insert-before">#define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define IS_INSTANCE(value)     isObjType(value, OBJ_INSTANCE)</span>
</pre><pre class="insert-after">#define IS_NATIVE(value)       isObjType(value, OBJ_NATIVE)
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>And:</p>
<div class="codehilite"><pre class="insert-before">#define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define AS_INSTANCE(value)     ((ObjInstance*)AS_OBJ(value))</span>
</pre><pre class="insert-after">#define AS_NATIVE(value) \
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>Since fields are added after the instance is created, the &ldquo;constructor&rdquo; function
only needs to know the class.</p>
<div class="codehilite"><pre class="insert-before">ObjFunction* newFunction();
</pre><div class="source-file"><em>object.h</em><br>
add after <em>newFunction</em>()</div>
<pre class="insert"><span class="t">ObjInstance</span>* <span class="i">newInstance</span>(<span class="t">ObjClass</span>* <span class="i">klass</span>);
</pre><pre class="insert-after">ObjNative* newNative(NativeFn function);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after <em>newFunction</em>()</div>

<p>We implement that function here:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>newFunction</em>()</div>
<pre><span class="t">ObjInstance</span>* <span class="i">newInstance</span>(<span class="t">ObjClass</span>* <span class="i">klass</span>) {
  <span class="t">ObjInstance</span>* <span class="i">instance</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjInstance</span>, <span class="a">OBJ_INSTANCE</span>);
  <span class="i">instance</span>-&gt;<span class="i">klass</span> = <span class="i">klass</span>;
  <span class="i">initTable</span>(&amp;<span class="i">instance</span>-&gt;<span class="i">fields</span>);
  <span class="k">return</span> <span class="i">instance</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>newFunction</em>()</div>

<p>We store a reference to the instance&rsquo;s class. Then we initialize the field
table to an empty hash table. A new baby object is born!</p>
<p>At the sadder end of the instance&rsquo;s lifespan, it gets freed.</p>
<div class="codehilite"><pre class="insert-before">      FREE(ObjFunction, object);
      break;
    }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_INSTANCE</span>: {
      <span class="t">ObjInstance</span>* <span class="i">instance</span> = (<span class="t">ObjInstance</span>*)<span class="i">object</span>;
      <span class="i">freeTable</span>(&amp;<span class="i">instance</span>-&gt;<span class="i">fields</span>);
      <span class="a">FREE</span>(<span class="t">ObjInstance</span>, <span class="i">object</span>);
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_NATIVE:
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>The instance owns its field table so when freeing the instance, we also free the
table. We don&rsquo;t explicitly free the entries <em>in</em> the table, because there may
be other references to those objects. The garbage collector will take care of
those for us. Here we free only the entry array of the table itself.</p>
<p>Speaking of the garbage collector, it needs support for tracing through
instances.</p>
<div class="codehilite"><pre class="insert-before">      markArray(&amp;function-&gt;chunk.constants);
      break;
    }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_INSTANCE</span>: {
      <span class="t">ObjInstance</span>* <span class="i">instance</span> = (<span class="t">ObjInstance</span>*)<span class="i">object</span>;
      <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">instance</span>-&gt;<span class="i">klass</span>);
      <span class="i">markTable</span>(&amp;<span class="i">instance</span>-&gt;<span class="i">fields</span>);
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_UPVALUE:
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>If the instance is alive, we need to keep its class around. Also, we need to
keep every object referenced by the instance&rsquo;s fields. Most live objects that
are not roots are reachable because some instance refers to the object in a
field. Fortunately, we already have a nice <code>markTable()</code> function to make
tracing them easy.</p>
<p>Less critical but still important is printing.</p>
<div class="codehilite"><pre class="insert-before">      break;
</pre><div class="source-file"><em>object.c</em><br>
in <em>printObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_INSTANCE</span>:
      <span class="i">printf</span>(<span class="s">&quot;%s instance&quot;</span>,
             <span class="a">AS_INSTANCE</span>(<span class="i">value</span>)-&gt;<span class="i">klass</span>-&gt;<span class="i">name</span>-&gt;<span class="i">chars</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_NATIVE:
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printObject</em>()</div>

<p><span name="print">An</span> instance prints its name followed by &ldquo;instance&rdquo;.
(The &ldquo;instance&rdquo; part is mainly so that classes and instances don&rsquo;t print the
same.)</p>
<aside name="print">
<p>Most object-oriented languages let a class define some sort of <code>toString()</code>
method that lets the class specify how its instances are converted to a string
and printed. If Lox was less of a toy language, I would want to support that
too.</p>
</aside>
<p>The real fun happens over in the interpreter. Lox has no special <code>new</code> keyword.
The way to create an instance of a class is to invoke the class itself as if it
were a function. The runtime already supports function calls, and it checks the
type of object being called to make sure the user doesn&rsquo;t try to invoke a number
or other invalid type.</p>
<p>We extend that runtime checking with a new case.</p>
<div class="codehilite"><pre class="insert-before">    switch (OBJ_TYPE(callee)) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>callValue</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OBJ_CLASS</span>: {
        <span class="t">ObjClass</span>* <span class="i">klass</span> = <span class="a">AS_CLASS</span>(<span class="i">callee</span>);
        <span class="i">vm</span>.<span class="i">stackTop</span>[-<span class="i">argCount</span> - <span class="n">1</span>] = <span class="a">OBJ_VAL</span>(<span class="i">newInstance</span>(<span class="i">klass</span>));
        <span class="k">return</span> <span class="k">true</span>;
      }
</pre><pre class="insert-after">      case OBJ_CLOSURE:
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>callValue</em>()</div>

<p>If the value being called<span class="em">&mdash;</span>the object that results when evaluating the
expression to the left of the opening parenthesis<span class="em">&mdash;</span>is a class, then we treat
it as a constructor call. We <span name="args">create</span> a new instance of
the called class and store the result on the stack.</p>
<aside name="args">
<p>We ignore any arguments passed to the call for now. We&rsquo;ll revisit this code in
the <a href="methods-and-initializers.html">next chapter</a> when we add support for initializers.</p>
</aside>
<p>We&rsquo;re one step farther. Now we can define classes and create instances of them.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Brioche</span> {}
<span class="k">print</span> <span class="t">Brioche</span>();
</pre></div>
<p>Note the parentheses after <code>Brioche</code> on the second line now. This prints
&ldquo;Brioche instance&rdquo;.</p>
<h2><a href="#get-and-set-expressions" id="get-and-set-expressions"><small>27&#8202;.&#8202;4</small>Get and Set Expressions</a></h2>
<p>Our object representation for instances can already store state, so all that
remains is exposing that functionality to the user. Fields are accessed and
modified using get and set expressions. Not one to break with tradition, Lox
uses the classic &ldquo;dot&rdquo; syntax:</p>
<div class="codehilite"><pre><span class="i">eclair</span>.<span class="i">filling</span> = <span class="s">&quot;pastry creme&quot;</span>;
<span class="k">print</span> <span class="i">eclair</span>.<span class="i">filling</span>;
</pre></div>
<p>The period<span class="em">&mdash;</span>full stop for my English friends<span class="em">&mdash;</span>works <span
name="sort">sort</span> of like an infix operator. There is an expression to the
left that is evaluated first and produces an instance. After that is the <code>.</code>
followed by a field name. Since there is a preceding operand, we hook this into
the parse table as an infix expression.</p>
<aside name="sort">
<p>I say &ldquo;sort of&rdquo; because the right-hand side after the <code>.</code> is not an expression,
but a single identifier whose semantics are handled by the get or set expression
itself. It&rsquo;s really closer to a postfix expression.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  [TOKEN_COMMA]         = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_DOT</span>]           = {<span class="a">NULL</span>,     <span class="i">dot</span>,    <span class="a">PREC_CALL</span>},
</pre><pre class="insert-after">  [TOKEN_MINUS]         = {unary,    binary, PREC_TERM},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>As in other languages, the <code>.</code> operator binds tightly, with precedence as high
as the parentheses in a function call. After the parser consumes the dot token,
it dispatches to a new parse function.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>call</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">dot</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
  <span class="i">consume</span>(<span class="a">TOKEN_IDENTIFIER</span>, <span class="s">&quot;Expect property name after &#39;.&#39;.&quot;</span>);
  <span class="t">uint8_t</span> <span class="i">name</span> = <span class="i">identifierConstant</span>(&amp;<span class="i">parser</span>.<span class="i">previous</span>);

  <span class="k">if</span> (<span class="i">canAssign</span> &amp;&amp; <span class="i">match</span>(<span class="a">TOKEN_EQUAL</span>)) {
    <span class="i">expression</span>();
    <span class="i">emitBytes</span>(<span class="a">OP_SET_PROPERTY</span>, <span class="i">name</span>);
  } <span class="k">else</span> {
    <span class="i">emitBytes</span>(<span class="a">OP_GET_PROPERTY</span>, <span class="i">name</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>call</em>()</div>

<p>The parser expects to find a <span name="prop">property</span> name immediately
after the dot. We load that token&rsquo;s lexeme into the constant table as a string
so that the name is available at runtime.</p>
<aside name="prop">
<p>The compiler uses &ldquo;property&rdquo; instead of &ldquo;field&rdquo; here because, remember, Lox also
lets you use dot syntax to access a method without calling it. &ldquo;Property&rdquo; is the
general term we use to refer to any named entity you can access on an instance.
Fields are the subset of properties that are backed by the instance&rsquo;s state.</p>
</aside>
<p>We have two new expression forms<span class="em">&mdash;</span>getters and setters<span class="em">&mdash;</span>that this one
function handles. If we see an equals sign after the field name, it must be a
set expression that is assigning to a field. But we don&rsquo;t <em>always</em> allow an
equals sign after the field to be compiled. Consider:</p>
<div class="codehilite"><pre><span class="i">a</span> + <span class="i">b</span>.<span class="i">c</span> = <span class="n">3</span>
</pre></div>
<p>This is syntactically invalid according to Lox&rsquo;s grammar, which means our Lox
implementation is obligated to detect and report the error. If <code>dot()</code> silently
parsed the <code>= 3</code> part, we would incorrectly interpret the code as if the user
had written:</p>
<div class="codehilite"><pre><span class="i">a</span> + (<span class="i">b</span>.<span class="i">c</span> = <span class="n">3</span>)
</pre></div>
<p>The problem is that the <code>=</code> side of a set expression has much lower precedence
than the <code>.</code> part. The parser may call <code>dot()</code> in a context that is too high
precedence to permit a setter to appear. To avoid incorrectly allowing that, we
parse and compile the equals part only when <code>canAssign</code> is true. If an equals
token appears when <code>canAssign</code> is false, <code>dot()</code> leaves it alone and returns. In
that case, the compiler will eventually unwind up to <code>parsePrecedence()</code>, which
stops at the unexpected <code>=</code> still sitting as the next token and reports an
error.</p>
<p>If we find an <code>=</code> in a context where it <em>is</em> allowed, then we compile the
expression that follows. After that, we emit a new <span
name="set"><code>OP_SET_PROPERTY</code></span> instruction. That takes a single operand for
the index of the property name in the constant table. If we didn&rsquo;t compile a set
expression, we assume it&rsquo;s a getter and emit an <code>OP_GET_PROPERTY</code> instruction,
which also takes an operand for the property name.</p>
<aside name="set">
<p>You can&rsquo;t <em>set</em> a non-field property, so I suppose that instruction could have
been <code>OP_SET_FIELD</code>, but I thought it looked nicer to be consistent with the get
instruction.</p>
</aside>
<p>Now is a good time to define these two new instructions.</p>
<div class="codehilite"><pre class="insert-before">  OP_SET_UPVALUE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_GET_PROPERTY</span>,
  <span class="a">OP_SET_PROPERTY</span>,
</pre><pre class="insert-after">  OP_EQUAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>And add support for disassembling them:</p>
<div class="codehilite"><pre class="insert-before">      return byteInstruction(&quot;OP_SET_UPVALUE&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_GET_PROPERTY</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_GET_PROPERTY&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_SET_PROPERTY</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_SET_PROPERTY&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_EQUAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<h3><a href="#interpreting-getter-and-setter-expressions" id="interpreting-getter-and-setter-expressions"><small>27&#8202;.&#8202;4&#8202;.&#8202;1</small>Interpreting getter and setter expressions</a></h3>
<p>Sliding over to the runtime, we&rsquo;ll start with get expressions since those are a
little simpler.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_GET_PROPERTY</span>: {
        <span class="t">ObjInstance</span>* <span class="i">instance</span> = <span class="a">AS_INSTANCE</span>(<span class="i">peek</span>(<span class="n">0</span>));
        <span class="t">ObjString</span>* <span class="i">name</span> = <span class="a">READ_STRING</span>();

        <span class="t">Value</span> <span class="i">value</span>;
        <span class="k">if</span> (<span class="i">tableGet</span>(&amp;<span class="i">instance</span>-&gt;<span class="i">fields</span>, <span class="i">name</span>, &amp;<span class="i">value</span>)) {
          <span class="i">pop</span>(); <span class="c">// Instance.</span>
          <span class="i">push</span>(<span class="i">value</span>);
          <span class="k">break</span>;
        }
      }
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>When the interpreter reaches this instruction, the expression to the left of the
dot has already been executed and the resulting instance is on top of the stack.
We read the field name from the constant pool and look it up in the instance&rsquo;s
field table. If the hash table contains an entry with that name, we pop the
instance and push the entry&rsquo;s value as the result.</p>
<p>Of course, the field might not exist. In Lox, we&rsquo;ve defined that to be a runtime
error. So we add a check for that and abort if it happens.</p>
<div class="codehilite"><pre class="insert-before">          push(value);
          break;
        }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">

        <span class="i">runtimeError</span>(<span class="s">&quot;Undefined property &#39;%s&#39;.&quot;</span>, <span class="i">name</span>-&gt;<span class="i">chars</span>);
        <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
</pre><pre class="insert-after">      }
      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p><span name="field">There</span> is another failure mode to handle which you&rsquo;ve
probably noticed. The above code assumes the expression to the left of the dot
did evaluate to an ObjInstance. But there&rsquo;s nothing preventing a user from
writing this:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">obj</span> = <span class="s">&quot;not an instance&quot;</span>;
<span class="k">print</span> <span class="i">obj</span>.<span class="i">field</span>;
</pre></div>
<p>The user&rsquo;s program is wrong, but the VM still has to handle it with some grace.
Right now, it will misinterpret the bits of the ObjString as an ObjInstance and,
I don&rsquo;t know, catch on fire or something definitely not graceful.</p>
<p>In Lox, only instances are allowed to have fields. You can&rsquo;t stuff a field onto
a string or number. So we need to check that the value is an instance before
accessing any fields on it.</p>
<aside name="field">
<p>Lox <em>could</em> support adding fields to values of other types. It&rsquo;s our language
and we can do what we want. But it&rsquo;s likely a bad idea. It significantly
complicates the implementation in ways that hurt performance<span class="em">&mdash;</span>for example,
string interning gets a lot harder.</p>
<p>Also, it raises gnarly semantic questions around the equality and identity of
values. If I attach a field to the number <code>3</code>, does the result of <code>1 + 2</code> have
that field as well? If so, how does the implementation track that? If not, are
those two resulting &ldquo;threes&rdquo; still considered equal?</p>
</aside>
<div class="codehilite"><pre class="insert-before">      case OP_GET_PROPERTY: {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">        <span class="k">if</span> (!<span class="a">IS_INSTANCE</span>(<span class="i">peek</span>(<span class="n">0</span>))) {
          <span class="i">runtimeError</span>(<span class="s">&quot;Only instances have properties.&quot;</span>);
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }

</pre><pre class="insert-after">        ObjInstance* instance = AS_INSTANCE(peek(0));
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>If the value on the stack isn&rsquo;t an instance, we report a runtime error and
safely exit.</p>
<p>Of course, get expressions are not very useful when no instances have any
fields. For that we need setters.</p>
<div class="codehilite"><pre class="insert-before">        return INTERPRET_RUNTIME_ERROR;
      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_SET_PROPERTY</span>: {
        <span class="t">ObjInstance</span>* <span class="i">instance</span> = <span class="a">AS_INSTANCE</span>(<span class="i">peek</span>(<span class="n">1</span>));
        <span class="i">tableSet</span>(&amp;<span class="i">instance</span>-&gt;<span class="i">fields</span>, <span class="a">READ_STRING</span>(), <span class="i">peek</span>(<span class="n">0</span>));
        <span class="t">Value</span> <span class="i">value</span> = <span class="i">pop</span>();
        <span class="i">pop</span>();
        <span class="i">push</span>(<span class="i">value</span>);
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>This is a little more complex than <code>OP_GET_PROPERTY</code>. When this executes, the
top of the stack has the instance whose field is being set and above that, the
value to be stored. Like before, we read the instruction&rsquo;s operand and find the
field name string. Using that, we store the value on top of the stack into the
instance&rsquo;s field table.</p>
<p>After that is a little <span name="stack">stack</span> juggling. We pop the
stored value off, then pop the instance, and finally push the value back on. In
other words, we remove the <em>second</em> element from the stack while leaving the top
alone. A setter is itself an expression whose result is the assigned value, so
we need to leave that value on the stack. Here&rsquo;s what I mean:</p>
<aside name="stack">
<p>The stack operations go like this:</p><img src="image/classes-and-instances/stack.png" alt="Popping two values and then pushing the first value back on the stack."/>
</aside>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Toast</span> {}
<span class="k">var</span> <span class="i">toast</span> = <span class="t">Toast</span>();
<span class="k">print</span> <span class="i">toast</span>.<span class="i">jam</span> = <span class="s">&quot;grape&quot;</span>; <span class="c">// Prints &quot;grape&quot;.</span>
</pre></div>
<p>Unlike when reading a field, we don&rsquo;t need to worry about the hash table not
containing the field. A setter implicitly creates the field if needed. We do
need to handle the user incorrectly trying to store a field on a value that
isn&rsquo;t an instance.</p>
<div class="codehilite"><pre class="insert-before">      case OP_SET_PROPERTY: {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">        <span class="k">if</span> (!<span class="a">IS_INSTANCE</span>(<span class="i">peek</span>(<span class="n">1</span>))) {
          <span class="i">runtimeError</span>(<span class="s">&quot;Only instances have fields.&quot;</span>);
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }

</pre><pre class="insert-after">        ObjInstance* instance = AS_INSTANCE(peek(1));
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Exactly like with get expressions, we check the value&rsquo;s type and report a
runtime error if it&rsquo;s invalid. And, with that, the stateful side of Lox&rsquo;s
support for object-oriented programming is in place. Give it a try:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Pair</span> {}

<span class="k">var</span> <span class="i">pair</span> = <span class="t">Pair</span>();
<span class="i">pair</span>.<span class="i">first</span> = <span class="n">1</span>;
<span class="i">pair</span>.<span class="i">second</span> = <span class="n">2</span>;
<span class="k">print</span> <span class="i">pair</span>.<span class="i">first</span> + <span class="i">pair</span>.<span class="i">second</span>; <span class="c">// 3.</span>
</pre></div>
<p>This doesn&rsquo;t really feel very <em>object</em>-oriented. It&rsquo;s more like a strange,
dynamically typed variant of C where objects are loose struct-like bags of data.
Sort of a dynamic procedural language. But this is a big step in expressiveness.
Our Lox implementation now lets users freely aggregate data into bigger units.
In the next chapter, we will breathe life into those inert blobs.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Trying to access a non-existent field on an object immediately aborts the
entire VM. The user has no way to recover from this runtime error, nor is
there any way to see if a field exists <em>before</em> trying to access it. It&rsquo;s up
to the user to ensure on their own that only valid fields are read.</p>
<p>How do other dynamically typed languages handle missing fields? What do you
think Lox should do? Implement your solution.</p>
</li>
<li>
<p>Fields are accessed at runtime by their <em>string</em> name. But that name must
always appear directly in the source code as an <em>identifier token</em>. A user
program cannot imperatively build a string value and then use that as the
name of a field. Do you think they should be able to? Devise a language
feature that enables that and implement it.</p>
</li>
<li>
<p>Conversely, Lox offers no way to <em>remove</em> a field from an instance. You can
set a field&rsquo;s value to <code>nil</code>, but the entry in the hash table is still
there. How do other languages handle this? Choose and implement a strategy
for Lox.</p>
</li>
<li>
<p>Because fields are accessed by name at runtime, working with instance state
is slow. It&rsquo;s technically a constant-time operation<span class="em">&mdash;</span>thanks, hash tables<span class="em">&mdash;</span>but the constant factors are relatively large. This is a major component
of why dynamic languages are slower than statically typed ones.</p>
<p>How do sophisticated implementations of dynamically typed languages cope
with and optimize this?</p>
</li>
</ol>
</div>

<footer>
<a href="methods-and-initializers.html" class="next">
  Next Chapter: &ldquo;Methods and Initializers&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/classes.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Classes &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Classes<small>12</small></a></h3>

<ul>
    <li><a href="#oop-and-classes"><small>12.1</small> OOP and Classes</a></li>
    <li><a href="#class-declarations"><small>12.2</small> Class Declarations</a></li>
    <li><a href="#creating-instances"><small>12.3</small> Creating Instances</a></li>
    <li><a href="#properties-on-instances"><small>12.4</small> Properties on Instances</a></li>
    <li><a href="#methods-on-classes"><small>12.5</small> Methods on Classes</a></li>
    <li><a href="#this"><small>12.6</small> This</a></li>
    <li><a href="#constructors-and-initializers"><small>12.7</small> Constructors and Initializers</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Prototypes and Power</a></li>
</ul>


<div class="prev-next">
    <a href="resolving-and-binding.html" title="Resolving and Binding" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="inheritance.html" title="Inheritance" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="resolving-and-binding.html" title="Resolving and Binding" class="prev">←</a>
<a href="inheritance.html" title="Inheritance" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Classes<small>12</small></a></h3>

<ul>
    <li><a href="#oop-and-classes"><small>12.1</small> OOP and Classes</a></li>
    <li><a href="#class-declarations"><small>12.2</small> Class Declarations</a></li>
    <li><a href="#creating-instances"><small>12.3</small> Creating Instances</a></li>
    <li><a href="#properties-on-instances"><small>12.4</small> Properties on Instances</a></li>
    <li><a href="#methods-on-classes"><small>12.5</small> Methods on Classes</a></li>
    <li><a href="#this"><small>12.6</small> This</a></li>
    <li><a href="#constructors-and-initializers"><small>12.7</small> Constructors and Initializers</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Prototypes and Power</a></li>
</ul>


<div class="prev-next">
    <a href="resolving-and-binding.html" title="Resolving and Binding" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="inheritance.html" title="Inheritance" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">12</div>
  <h1>Classes</h1>

<blockquote>
<p>One has no right to love or hate anything if one has not acquired a thorough
knowledge of its nature. Great love springs from great knowledge of the
beloved object, and if you know it but little you will be able to love it only
a little or not at all.</p>
<p><cite>Leonardo da Vinci</cite></p>
</blockquote>
<p>We&rsquo;re eleven chapters in, and the interpreter sitting on your machine is nearly
a complete scripting language. It could use a couple of built-in data structures
like lists and maps, and it certainly needs a core library for file I/O, user
input, etc. But the language itself is sufficient. We&rsquo;ve got a little procedural
language in the same vein as BASIC, Tcl, Scheme (minus macros), and early
versions of Python and Lua.</p>
<p>If this were the &rsquo;80s, we&rsquo;d stop here. But today, many popular languages support
&ldquo;object-oriented programming&rdquo;. Adding that to Lox will give users a familiar set
of tools for writing larger programs. Even if you personally don&rsquo;t <span
name="hate">like</span> OOP, this chapter and <a href="inheritance.html">the next</a> will help
you understand how others design and build object systems.</p>
<aside name="hate">
<p>If you <em>really</em> hate classes, though, you can skip these two chapters. They are
fairly isolated from the rest of the book. Personally, I find it&rsquo;s good to learn
more about the things I dislike. Things look simple at a distance, but as I get
closer, details emerge and I gain a more nuanced perspective.</p>
</aside>
<h2><a href="#oop-and-classes" id="oop-and-classes"><small>12&#8202;.&#8202;1</small>OOP and Classes</a></h2>
<p>There are three broad paths to object-oriented programming: classes,
<a href="http://gameprogrammingpatterns.com/prototype.html">prototypes</a>, and <span name="multimethods"><a href="https://en.wikipedia.org/wiki/Multiple_dispatch">multimethods</a></span>. Classes
came first and are the most popular style. With the rise of JavaScript (and to a
lesser extent <a href="https://www.lua.org/pil/13.4.1.html">Lua</a>), prototypes are more widely known than they used to be.
I&rsquo;ll talk more about those <a href="#design-note">later</a>. For Lox, we&rsquo;re taking the, ahem, classic
approach.</p>
<aside name="multimethods">
<p>Multimethods are the approach you&rsquo;re least likely to be familiar with. I&rsquo;d love
to talk more about them<span class="em">&mdash;</span>I designed <a href="http://magpie-lang.org/">a hobby language</a> around them
once and they are <em>super rad</em><span class="em">&mdash;</span>but there are only so many pages I can fit in.
If you&rsquo;d like to learn more, take a look at <a href="https://en.wikipedia.org/wiki/Common_Lisp_Object_System">CLOS</a> (the object system in
Common Lisp), <a href="https://opendylan.org/">Dylan</a>, <a href="https://julialang.org/">Julia</a>, or <a href="https://docs.raku.org/language/functions#Multi-dispatch">Raku</a>.</p>
</aside>
<p>Since you&rsquo;ve written about a thousand lines of Java code with me already, I&rsquo;m
assuming you don&rsquo;t need a detailed introduction to object orientation. The main
goal is to bundle data with the code that acts on it. Users do that by declaring
a <em>class</em> that:</p>
<p><span name="circle"></span></p>
<ol>
<li>
<p>Exposes a <em>constructor</em> to create and initialize new <em>instances</em> of the
class</p>
</li>
<li>
<p>Provides a way to store and access <em>fields</em> on instances</p>
</li>
<li>
<p>Defines a set of <em>methods</em> shared by all instances of the class that
operate on each instances&rsquo; state.</p>
</li>
</ol>
<p>That&rsquo;s about as minimal as it gets. Most object-oriented languages, all the way
back to Simula, also do inheritance to reuse behavior across classes. We&rsquo;ll add
that in the <a href="inheritance.html">next chapter</a>. Even kicking that out, we still have a
lot to get through. This is a big chapter and everything doesn&rsquo;t quite come
together until we have all of the above pieces, so gather your stamina.</p>
<aside name="circle"><img src="image/classes/circle.png" alt="The relationships between classes, methods, instances, constructors, and fields." />
<p>It&rsquo;s like the circle of life, <em>sans</em> Sir Elton John.</p>
</aside>
<h2><a href="#class-declarations" id="class-declarations"><small>12&#8202;.&#8202;2</small>Class Declarations</a></h2>
<p>Like we do, we&rsquo;re gonna start with syntax. A <code>class</code> statement introduces a new
name, so it lives in the <code>declaration</code> grammar rule.</p>
<div class="codehilite"><pre><span class="i">declaration</span>    → <span class="i">classDecl</span>
               | <span class="i">funDecl</span>
               | <span class="i">varDecl</span>
               | <span class="i">statement</span> ;

<span class="i">classDecl</span>      → <span class="s">&quot;class&quot;</span> <span class="t">IDENTIFIER</span> <span class="s">&quot;{&quot;</span> <span class="i">function</span>* <span class="s">&quot;}&quot;</span> ;
</pre></div>
<p>The new <code>classDecl</code> rule relies on the <code>function</code> rule we defined
<a href="functions.html#function-declarations">earlier</a>. To refresh your memory:</p>
<div class="codehilite"><pre><span class="i">function</span>       → <span class="t">IDENTIFIER</span> <span class="s">&quot;(&quot;</span> <span class="i">parameters</span>? <span class="s">&quot;)&quot;</span> <span class="i">block</span> ;
<span class="i">parameters</span>     → <span class="t">IDENTIFIER</span> ( <span class="s">&quot;,&quot;</span> <span class="t">IDENTIFIER</span> )* ;
</pre></div>
<p>In plain English, a class declaration is the <code>class</code> keyword, followed by the
class&rsquo;s name, then a curly-braced body. Inside that body is a list of method
declarations. Unlike function declarations, methods don&rsquo;t have a leading <span
name="fun"><code>fun</code></span> keyword. Each method is a name, parameter list, and
body. Here&rsquo;s an example:</p>
<aside name="fun">
<p>Not that I&rsquo;m trying to say methods aren&rsquo;t fun or anything.</p>
</aside>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Breakfast</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Eggs a-fryin&#39;!&quot;</span>;
  }

  <span class="i">serve</span>(<span class="i">who</span>) {
    <span class="k">print</span> <span class="s">&quot;Enjoy your breakfast, &quot;</span> + <span class="i">who</span> + <span class="s">&quot;.&quot;</span>;
  }
}
</pre></div>
<p>Like most dynamically typed languages, fields are not explicitly listed in the
class declaration. Instances are loose bags of data and you can freely add
fields to them as you see fit using normal imperative code.</p>
<p>Over in our AST generator, the <code>classDecl</code> grammar rule gets its own statement
<span name="class-ast">node</span>.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Block      : List&lt;Stmt&gt; statements&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Class      : Token name, List&lt;Stmt.Function&gt; methods&quot;</span>,
</pre><pre class="insert-after">      &quot;Expression : Expr expression&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="class-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#class-statement">Appendix II</a>.</p>
</aside>
<p>It stores the class&rsquo;s name and the methods inside its body. Methods are
represented by the existing Stmt.Function class that we use for function
declaration AST nodes. That gives us all the bits of state that we need for a
method: name, parameter list, and body.</p>
<p>A class can appear anywhere a named declaration is allowed, triggered by the
leading <code>class</code> keyword.</p>
<div class="codehilite"><pre class="insert-before">    try {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>declaration</em>()</div>
<pre class="insert">      <span class="k">if</span> (<span class="i">match</span>(<span class="i">CLASS</span>)) <span class="k">return</span> <span class="i">classDeclaration</span>();
</pre><pre class="insert-after">      if (match(FUN)) return function(&quot;function&quot;);
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>declaration</em>()</div>

<p>That calls out to:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>declaration</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">classDeclaration</span>() {
    <span class="t">Token</span> <span class="i">name</span> = <span class="i">consume</span>(<span class="i">IDENTIFIER</span>, <span class="s">&quot;Expect class name.&quot;</span>);
    <span class="i">consume</span>(<span class="i">LEFT_BRACE</span>, <span class="s">&quot;Expect &#39;{&#39; before class body.&quot;</span>);

    <span class="t">List</span>&lt;<span class="t">Stmt</span>.<span class="t">Function</span>&gt; <span class="i">methods</span> = <span class="k">new</span> <span class="t">ArrayList</span>&lt;&gt;();
    <span class="k">while</span> (!<span class="i">check</span>(<span class="i">RIGHT_BRACE</span>) &amp;&amp; !<span class="i">isAtEnd</span>()) {
      <span class="i">methods</span>.<span class="i">add</span>(<span class="i">function</span>(<span class="s">&quot;method&quot;</span>));
    }

    <span class="i">consume</span>(<span class="i">RIGHT_BRACE</span>, <span class="s">&quot;Expect &#39;}&#39; after class body.&quot;</span>);

    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Class</span>(<span class="i">name</span>, <span class="i">methods</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>declaration</em>()</div>

<p>There&rsquo;s more meat to this than most of the other parsing methods, but it roughly
follows the grammar. We&rsquo;ve already consumed the <code>class</code> keyword, so we look for
the expected class name next, followed by the opening curly brace. Once inside
the body, we keep parsing method declarations until we hit the closing brace.
Each method declaration is parsed by a call to <code>function()</code>, which we defined
back in the <a href="functions.html">chapter where functions were introduced</a>.</p>
<p>Like we do in any open-ended loop in the parser, we also check for hitting the
end of the file. That won&rsquo;t happen in correct code since a class should have a
closing brace at the end, but it ensures the parser doesn&rsquo;t get stuck in an
infinite loop if the user has a syntax error and forgets to correctly end the
class body.</p>
<p>We wrap the name and list of methods into a Stmt.Class node and we&rsquo;re done.
Previously, we would jump straight into the interpreter, but now we need to
plumb the node through the resolver first.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitBlockStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitClassStmt</span>(<span class="t">Stmt</span>.<span class="t">Class</span> <span class="i">stmt</span>) {
    <span class="i">declare</span>(<span class="i">stmt</span>.<span class="i">name</span>);
    <span class="i">define</span>(<span class="i">stmt</span>.<span class="i">name</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitBlockStmt</em>()</div>

<p>We aren&rsquo;t going to worry about resolving the methods themselves yet, so for now
all we need to do is declare the class using its name. It&rsquo;s not common to
declare a class as a local variable, but Lox permits it, so we need to handle it
correctly.</p>
<p>Now we interpret the class declaration.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitBlockStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitClassStmt</span>(<span class="t">Stmt</span>.<span class="t">Class</span> <span class="i">stmt</span>) {
    <span class="i">environment</span>.<span class="i">define</span>(<span class="i">stmt</span>.<span class="i">name</span>.<span class="i">lexeme</span>, <span class="k">null</span>);
    <span class="t">LoxClass</span> <span class="i">klass</span> = <span class="k">new</span> <span class="t">LoxClass</span>(<span class="i">stmt</span>.<span class="i">name</span>.<span class="i">lexeme</span>);
    <span class="i">environment</span>.<span class="i">assign</span>(<span class="i">stmt</span>.<span class="i">name</span>, <span class="i">klass</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitBlockStmt</em>()</div>

<p>This looks similar to how we execute function declarations. We declare the
class&rsquo;s name in the current environment. Then we turn the class <em>syntax node</em>
into a LoxClass, the <em>runtime</em> representation of a class. We circle back and
store the class object in the variable we previously declared. That two-stage
variable binding process allows references to the class inside its own methods.</p>
<p>We will refine it throughout the chapter, but the first draft of LoxClass looks
like this:</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxClass.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.List</span>;
<span class="k">import</span> <span class="i">java.util.Map</span>;

<span class="k">class</span> <span class="t">LoxClass</span> {
  <span class="k">final</span> <span class="t">String</span> <span class="i">name</span>;

  <span class="t">LoxClass</span>(<span class="t">String</span> <span class="i">name</span>) {
    <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
  }

  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">String</span> <span class="i">toString</span>() {
    <span class="k">return</span> <span class="i">name</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, create new file</div>

<p>Literally a wrapper around a name. We don&rsquo;t even store the methods yet. Not
super useful, but it does have a <code>toString()</code> method so we can write a trivial
script and test that class objects are actually being parsed and executed.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">DevonshireCream</span> {
  <span class="i">serveOn</span>() {
    <span class="k">return</span> <span class="s">&quot;Scones&quot;</span>;
  }
}

<span class="k">print</span> <span class="t">DevonshireCream</span>; <span class="c">// Prints &quot;DevonshireCream&quot;.</span>
</pre></div>
<h2><a href="#creating-instances" id="creating-instances"><small>12&#8202;.&#8202;3</small>Creating Instances</a></h2>
<p>We have classes, but they don&rsquo;t do anything yet. Lox doesn&rsquo;t have &ldquo;static&rdquo;
methods that you can call right on the class itself, so without actual
instances, classes are useless. Thus instances are the next step.</p>
<p>While some syntax and semantics are fairly standard across OOP languages, the
way you create new instances isn&rsquo;t. Ruby, following Smalltalk, creates instances
by calling a method on the class object itself, a <span
name="turtles">recursively</span> graceful approach. Some, like C++ and Java,
have a <code>new</code> keyword dedicated to birthing a new object. Python has you &ldquo;call&rdquo;
the class itself like a function. (JavaScript, ever weird, sort of does both.)</p>
<aside name="turtles">
<p>In Smalltalk, even <em>classes</em> are created by calling methods on an existing
object, usually the desired superclass. It&rsquo;s sort of a turtles-all-the-way-down
thing. It ultimately bottoms out on a few magical classes like Object and
Metaclass that the runtime conjures into being <em>ex nihilo</em>.</p>
</aside>
<p>I took a minimal approach with Lox. We already have class objects, and we
already have function calls, so we&rsquo;ll use call expressions on class objects to
create new instances. It&rsquo;s as if a class is a factory function that generates
instances of itself. This feels elegant to me, and also spares us the need to
introduce syntax like <code>new</code>. Therefore, we can skip past the front end straight
into the runtime.</p>
<p>Right now, if you try this:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Bagel</span> {}
<span class="t">Bagel</span>();
</pre></div>
<p>You get a runtime error. <code>visitCallExpr()</code> checks to see if the called object
implements <code>LoxCallable</code> and reports an error since LoxClass doesn&rsquo;t. Not <em>yet</em>,
that is.</p>
<div class="codehilite"><pre class="insert-before">import java.util.Map;

</pre><div class="source-file"><em>lox/LoxClass.java</em><br>
replace 1 line</div>
<pre class="insert"><span class="k">class</span> <span class="t">LoxClass</span> <span class="k">implements</span> <span class="t">LoxCallable</span> {
</pre><pre class="insert-after">  final String name;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, replace 1 line</div>

<p>Implementing that interface requires two methods.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxClass.java</em><br>
add after <em>toString</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">call</span>(<span class="t">Interpreter</span> <span class="i">interpreter</span>,
                     <span class="t">List</span>&lt;<span class="t">Object</span>&gt; <span class="i">arguments</span>) {
    <span class="t">LoxInstance</span> <span class="i">instance</span> = <span class="k">new</span> <span class="t">LoxInstance</span>(<span class="k">this</span>);
    <span class="k">return</span> <span class="i">instance</span>;
  }

  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">int</span> <span class="i">arity</span>() {
    <span class="k">return</span> <span class="n">0</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, add after <em>toString</em>()</div>

<p>The interesting one is <code>call()</code>. When you &ldquo;call&rdquo; a class, it instantiates a new
LoxInstance for the called class and returns it. The <code>arity()</code> method is how the
interpreter validates that you passed the right number of arguments to a
callable. For now, we&rsquo;ll say you can&rsquo;t pass any. When we get to user-defined
constructors, we&rsquo;ll revisit this.</p>
<p>That leads us to LoxInstance, the runtime representation of an instance of a Lox
class. Again, our first implementation starts small.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxInstance.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.HashMap</span>;
<span class="k">import</span> <span class="i">java.util.Map</span>;

<span class="k">class</span> <span class="t">LoxInstance</span> {
  <span class="k">private</span> <span class="t">LoxClass</span> <span class="i">klass</span>;

  <span class="t">LoxInstance</span>(<span class="t">LoxClass</span> <span class="i">klass</span>) {
    <span class="k">this</span>.<span class="i">klass</span> = <span class="i">klass</span>;
  }

  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">String</span> <span class="i">toString</span>() {
    <span class="k">return</span> <span class="i">klass</span>.<span class="i">name</span> + <span class="s">&quot; instance&quot;</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/LoxInstance.java</em>, create new file</div>

<p>Like LoxClass, it&rsquo;s pretty bare bones, but we&rsquo;re only getting started. If you
want to give it a try, here&rsquo;s a script to run:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Bagel</span> {}
<span class="k">var</span> <span class="i">bagel</span> = <span class="t">Bagel</span>();
<span class="k">print</span> <span class="i">bagel</span>; <span class="c">// Prints &quot;Bagel instance&quot;.</span>
</pre></div>
<p>This program doesn&rsquo;t do much, but it&rsquo;s starting to do <em>something</em>.</p>
<h2><a href="#properties-on-instances" id="properties-on-instances"><small>12&#8202;.&#8202;4</small>Properties on Instances</a></h2>
<p>We have instances, so we should make them useful. We&rsquo;re at a fork in the road.
We could add behavior first<span class="em">&mdash;</span>methods<span class="em">&mdash;</span>or we could start with state<span class="em">&mdash;</span>properties. We&rsquo;re going to take the latter because, as we&rsquo;ll see, the two get
entangled in an interesting way and it will be easier to make sense of them if
we get properties working first.</p>
<p>Lox follows JavaScript and Python in how it handles state. Every instance is an
open collection of named values. Methods on the instance&rsquo;s class can access and
modify properties, but so can <span name="outside">outside</span> code.
Properties are accessed using a <code>.</code> syntax.</p>
<aside name="outside">
<p>Allowing code outside of the class to directly modify an object&rsquo;s fields goes
against the object-oriented credo that a class <em>encapsulates</em> state. Some
languages take a more principled stance. In Smalltalk, fields are accessed using
simple identifiers<span class="em">&mdash;</span>essentially, variables that are only in scope inside a
class&rsquo;s methods. Ruby uses <code>@</code> followed by a name to access a field in an
object. That syntax is only meaningful inside a method and always accesses state
on the current object.</p>
<p>Lox, for better or worse, isn&rsquo;t quite so pious about its OOP faith.</p>
</aside>
<div class="codehilite"><pre><span class="i">someObject</span>.<span class="i">someProperty</span>
</pre></div>
<p>An expression followed by <code>.</code> and an identifier reads the property with that
name from the object the expression evaluates to. That dot has the same
precedence as the parentheses in a function call expression, so we slot it into
the grammar by replacing the existing <code>call</code> rule with:</p>
<div class="codehilite"><pre><span class="i">call</span>           → <span class="i">primary</span> ( <span class="s">&quot;(&quot;</span> <span class="i">arguments</span>? <span class="s">&quot;)&quot;</span> | <span class="s">&quot;.&quot;</span> <span class="t">IDENTIFIER</span> )* ;
</pre></div>
<p>After a primary expression, we allow a series of any mixture of parenthesized
calls and dotted property accesses. &ldquo;Property access&rdquo; is a mouthful, so from
here on out, we&rsquo;ll call these &ldquo;get expressions&rdquo;.</p>
<h3><a href="#get-expressions" id="get-expressions"><small>12&#8202;.&#8202;4&#8202;.&#8202;1</small>Get expressions</a></h3>
<p>The <span name="get-ast">syntax tree node</span> is:</p>
<div class="codehilite"><pre class="insert-before">      &quot;Call     : Expr callee, Token paren, List&lt;Expr&gt; arguments&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Get      : Expr object, Token name&quot;</span>,
</pre><pre class="insert-after">      &quot;Grouping : Expr expression&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="get-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#get-expression">Appendix II</a>.</p>
</aside>
<p>Following the grammar, the new parsing code goes in our existing <code>call()</code>
method.</p>
<div class="codehilite"><pre class="insert-before">    while (true) {<span name="while-true"> </span>
      if (match(LEFT_PAREN)) {
        expr = finishCall(expr);
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>call</em>()</div>
<pre class="insert">      } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="i">DOT</span>)) {
        <span class="t">Token</span> <span class="i">name</span> = <span class="i">consume</span>(<span class="i">IDENTIFIER</span>,
            <span class="s">&quot;Expect property name after &#39;.&#39;.&quot;</span>);
        <span class="i">expr</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Get</span>(<span class="i">expr</span>, <span class="i">name</span>);
</pre><pre class="insert-after">      } else {
        break;
      }
    }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>call</em>()</div>

<p>The outer <code>while</code> loop there corresponds to the <code>*</code> in the grammar rule. We zip
along the tokens building up a chain of calls and gets as we find parentheses
and dots, like so:</p><img src="image/classes/zip.png" alt="Parsing a series of '.' and '()' expressions to an AST." />
<p>Instances of the new Expr.Get node feed into the resolver.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitCallExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitGetExpr</span>(<span class="t">Expr</span>.<span class="t">Get</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">object</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitCallExpr</em>()</div>

<p>OK, not much to that. Since properties are looked up <span
name="dispatch">dynamically</span>, they don&rsquo;t get resolved. During resolution,
we recurse only into the expression to the left of the dot. The actual property
access happens in the interpreter.</p>
<aside name="dispatch">
<p>You can literally see that property dispatch in Lox is dynamic since we don&rsquo;t
process the property name during the static resolution pass.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitCallExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitGetExpr</span>(<span class="t">Expr</span>.<span class="t">Get</span> <span class="i">expr</span>) {
    <span class="t">Object</span> <span class="i">object</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">object</span>);
    <span class="k">if</span> (<span class="i">object</span> <span class="k">instanceof</span> <span class="t">LoxInstance</span>) {
      <span class="k">return</span> ((<span class="t">LoxInstance</span>) <span class="i">object</span>).<span class="i">get</span>(<span class="i">expr</span>.<span class="i">name</span>);
    }

    <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">expr</span>.<span class="i">name</span>,
        <span class="s">&quot;Only instances have properties.&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitCallExpr</em>()</div>

<p>First, we evaluate the expression whose property is being accessed. In Lox, only
instances of classes have properties. If the object is some other type like a
number, invoking a getter on it is a runtime error.</p>
<p>If the object is a LoxInstance, then we ask it to look up the property. It must
be time to give LoxInstance some actual state. A map will do fine.</p>
<div class="codehilite"><pre class="insert-before">  private LoxClass klass;
</pre><div class="source-file"><em>lox/LoxInstance.java</em><br>
in class <em>LoxInstance</em></div>
<pre class="insert">  <span class="k">private</span> <span class="k">final</span> <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">Object</span>&gt; <span class="i">fields</span> = <span class="k">new</span> <span class="t">HashMap</span>&lt;&gt;();
</pre><pre class="insert-after">

  LoxInstance(LoxClass klass) {
</pre></div>
<div class="source-file-narrow"><em>lox/LoxInstance.java</em>, in class <em>LoxInstance</em></div>

<p>Each key in the map is a property name and the corresponding value is the
property&rsquo;s value. To look up a property on an instance:</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxInstance.java</em><br>
add after <em>LoxInstance</em>()</div>
<pre>  <span class="t">Object</span> <span class="i">get</span>(<span class="t">Token</span> <span class="i">name</span>) {
    <span class="k">if</span> (<span class="i">fields</span>.<span class="i">containsKey</span>(<span class="i">name</span>.<span class="i">lexeme</span>)) {
      <span class="k">return</span> <span class="i">fields</span>.<span class="i">get</span>(<span class="i">name</span>.<span class="i">lexeme</span>);
    }

    <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">name</span>,<span name="hidden"> </span>
        <span class="s">&quot;Undefined property &#39;&quot;</span> + <span class="i">name</span>.<span class="i">lexeme</span> + <span class="s">&quot;&#39;.&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxInstance.java</em>, add after <em>LoxInstance</em>()</div>

<aside name="hidden">
<p>Doing a hash table lookup for every field access is fast enough for many
language implementations, but not ideal. High performance VMs for languages like
JavaScript use sophisticated optimizations like &ldquo;<a href="http://richardartoul.github.io/jekyll/update/2015/04/26/hidden-classes.html">hidden classes</a>&rdquo; to avoid
that overhead.</p>
<p>Paradoxically, many of the optimizations invented to make dynamic languages fast
rest on the observation that<span class="em">&mdash;</span>even in those languages<span class="em">&mdash;</span>most code is fairly
static in terms of the types of objects it works with and their fields.</p>
</aside>
<p>An interesting edge case we need to handle is what happens if the instance
doesn&rsquo;t <em>have</em> a property with the given name. We could silently return some
dummy value like <code>nil</code>, but my experience with languages like JavaScript is that
this behavior masks bugs more often than it does anything useful. Instead, we&rsquo;ll
make it a runtime error.</p>
<p>So the first thing we do is see if the instance actually has a field with the
given name. Only then do we return it. Otherwise, we raise an error.</p>
<p>Note how I switched from talking about &ldquo;properties&rdquo; to &ldquo;fields&rdquo;. There is a
subtle difference between the two. Fields are named bits of state stored
directly in an instance. Properties are the named, uh, <em>things</em>, that a get
expression may return. Every field is a property, but as we&rsquo;ll see <span
name="foreshadowing">later</span>, not every property is a field.</p>
<aside name="foreshadowing">
<p>Ooh, foreshadowing. Spooky!</p>
</aside>
<p>In theory, we can now read properties on objects. But since there&rsquo;s no way to
actually stuff any state into an instance, there are no fields to access. Before
we can test out reading, we must support writing.</p>
<h3><a href="#set-expressions" id="set-expressions"><small>12&#8202;.&#8202;4&#8202;.&#8202;2</small>Set expressions</a></h3>
<p>Setters use the same syntax as getters, except they appear on the left side of
an assignment.</p>
<div class="codehilite"><pre><span class="i">someObject</span>.<span class="i">someProperty</span> = <span class="i">value</span>;
</pre></div>
<p>In grammar land, we extend the rule for assignment to allow dotted identifiers
on the left-hand side.</p>
<div class="codehilite"><pre><span class="i">assignment</span>     → ( <span class="i">call</span> <span class="s">&quot;.&quot;</span> )? <span class="t">IDENTIFIER</span> <span class="s">&quot;=&quot;</span> <span class="i">assignment</span>
               | <span class="i">logic_or</span> ;
</pre></div>
<p>Unlike getters, setters don&rsquo;t chain. However, the reference to <code>call</code> allows any
high-precedence expression before the last dot, including any number of
<em>getters</em>, as in:</p><img src="image/classes/setter.png" alt="breakfast.omelette.filling.meat = ham" />
<p>Note here that only the <em>last</em> part, the <code>.meat</code> is the <em>setter</em>. The
<code>.omelette</code> and <code>.filling</code> parts are both <em>get</em> expressions.</p>
<p>Just as we have two separate AST nodes for variable access and variable
assignment, we need a <span name="set-ast">second setter node</span> to
complement our getter node.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Logical  : Expr left, Token operator, Expr right&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Set      : Expr object, Token name, Expr value&quot;</span>,
</pre><pre class="insert-after">      &quot;Unary    : Token operator, Expr right&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="set-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#set-expression">Appendix II</a>.</p>
</aside>
<p>In case you don&rsquo;t remember, the way we handle assignment in the parser is a
little funny. We can&rsquo;t easily tell that a series of tokens is the left-hand side
of an assignment until we reach the <code>=</code>. Now that our assignment grammar rule
has <code>call</code> on the left side, which can expand to arbitrarily large expressions,
that final <code>=</code> may be many tokens away from the point where we need to know
we&rsquo;re parsing an assignment.</p>
<p>Instead, the trick we do is parse the left-hand side as a normal expression.
Then, when we stumble onto the equal sign after it, we take the expression we
already parsed and transform it into the correct syntax tree node for the
assignment.</p>
<p>We add another clause to that transformation to handle turning an Expr.Get
expression on the left into the corresponding Expr.Set.</p>
<div class="codehilite"><pre class="insert-before">        return new Expr.Assign(name, value);
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>assignment</em>()</div>
<pre class="insert">      } <span class="k">else</span> <span class="k">if</span> (<span class="i">expr</span> <span class="k">instanceof</span> <span class="t">Expr</span>.<span class="t">Get</span>) {
        <span class="t">Expr</span>.<span class="t">Get</span> <span class="i">get</span> = (<span class="t">Expr</span>.<span class="t">Get</span>)<span class="i">expr</span>;
        <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Set</span>(<span class="i">get</span>.<span class="i">object</span>, <span class="i">get</span>.<span class="i">name</span>, <span class="i">value</span>);
</pre><pre class="insert-after">      }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>assignment</em>()</div>

<p>That&rsquo;s parsing our syntax. We push that node through into the resolver.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitLogicalExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitSetExpr</span>(<span class="t">Expr</span>.<span class="t">Set</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">value</span>);
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">object</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitLogicalExpr</em>()</div>

<p>Again, like Expr.Get, the property itself is dynamically evaluated, so there&rsquo;s
nothing to resolve there. All we need to do is recurse into the two
subexpressions of Expr.Set, the object whose property is being set, and the
value it&rsquo;s being set to.</p>
<p>That leads us to the interpreter.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitLogicalExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitSetExpr</span>(<span class="t">Expr</span>.<span class="t">Set</span> <span class="i">expr</span>) {
    <span class="t">Object</span> <span class="i">object</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">object</span>);

    <span class="k">if</span> (!(<span class="i">object</span> <span class="k">instanceof</span> <span class="t">LoxInstance</span>)) {<span name="order"> </span>
      <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">expr</span>.<span class="i">name</span>,
                             <span class="s">&quot;Only instances have fields.&quot;</span>);
    }

    <span class="t">Object</span> <span class="i">value</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">value</span>);
    ((<span class="t">LoxInstance</span>)<span class="i">object</span>).<span class="i">set</span>(<span class="i">expr</span>.<span class="i">name</span>, <span class="i">value</span>);
    <span class="k">return</span> <span class="i">value</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitLogicalExpr</em>()</div>

<p>We evaluate the object whose property is being set and check to see if it&rsquo;s a
LoxInstance. If not, that&rsquo;s a runtime error. Otherwise, we evaluate the value
being set and store it on the instance. That relies on a new method in
LoxInstance.</p>
<aside name="order">
<p>This is another semantic edge case. There are three distinct operations:</p>
<ol>
<li>
<p>Evaluate the object.</p>
</li>
<li>
<p>Raise a runtime error if it&rsquo;s not an instance of a class.</p>
</li>
<li>
<p>Evaluate the value.</p>
</li>
</ol>
<p>The order that those are performed in could be user visible, which means we need
to carefully specify it and ensure our implementations do these in the same
order.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/LoxInstance.java</em><br>
add after <em>get</em>()</div>
<pre>  <span class="t">void</span> <span class="i">set</span>(<span class="t">Token</span> <span class="i">name</span>, <span class="t">Object</span> <span class="i">value</span>) {
    <span class="i">fields</span>.<span class="i">put</span>(<span class="i">name</span>.<span class="i">lexeme</span>, <span class="i">value</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxInstance.java</em>, add after <em>get</em>()</div>

<p>No real magic here. We stuff the values straight into the Java map where fields
live. Since Lox allows freely creating new fields on instances, there&rsquo;s no need
to see if the key is already present.</p>
<h2><a href="#methods-on-classes" id="methods-on-classes"><small>12&#8202;.&#8202;5</small>Methods on Classes</a></h2>
<p>You can create instances of classes and stuff data into them, but the class
itself doesn&rsquo;t really <em>do</em> anything. Instances are just maps and all instances
are more or less the same. To make them feel like instances <em>of classes</em>, we
need behavior<span class="em">&mdash;</span>methods.</p>
<p>Our helpful parser already parses method declarations, so we&rsquo;re good there. We
also don&rsquo;t need to add any new parser support for method <em>calls</em>. We already
have <code>.</code> (getters) and <code>()</code> (function calls). A &ldquo;method call&rdquo; simply chains
those together.</p><img src="image/classes/method.png" alt="The syntax tree for 'object.method(argument)" />
<p>That raises an interesting question. What happens when those two expressions are
pulled apart? Assuming that <code>method</code> in this example is a method on the class of
<code>object</code> and not a field on the instance, what should the following piece of
code do?</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">m</span> = <span class="i">object</span>.<span class="i">method</span>;
<span class="i">m</span>(<span class="i">argument</span>);
</pre></div>
<p>This program &ldquo;looks up&rdquo; the method and stores the result<span class="em">&mdash;</span>whatever that is<span class="em">&mdash;</span>in a variable and then calls that object later. Is this allowed? Can you treat a
method like it&rsquo;s a function on the instance?</p>
<p>What about the other direction?</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Box</span> {}

<span class="k">fun</span> <span class="i">notMethod</span>(<span class="i">argument</span>) {
  <span class="k">print</span> <span class="s">&quot;called function with &quot;</span> + <span class="i">argument</span>;
}

<span class="k">var</span> <span class="i">box</span> = <span class="t">Box</span>();
<span class="i">box</span>.<span class="i">function</span> = <span class="i">notMethod</span>;
<span class="i">box</span>.<span class="i">function</span>(<span class="s">&quot;argument&quot;</span>);
</pre></div>
<p>This program creates an instance and then stores a function in a field on it.
Then it calls that function using the same syntax as a method call. Does that
work?</p>
<p>Different languages have different answers to these questions. One could write a
treatise on it. For Lox, we&rsquo;ll say the answer to both of these is yes, it does
work. We have a couple of reasons to justify that. For the second example<span class="em">&mdash;</span>calling a function stored in a field<span class="em">&mdash;</span>we want to support that because
first-class functions are useful and storing them in fields is a perfectly
normal thing to do.</p>
<p>The first example is more obscure. One motivation is that users generally expect
to be able to hoist a subexpression out into a local variable without changing
the meaning of the program. You can take this:</p>
<div class="codehilite"><pre><span class="i">breakfast</span>(<span class="i">omelette</span>.<span class="i">filledWith</span>(<span class="i">cheese</span>), <span class="i">sausage</span>);
</pre></div>
<p>And turn it into this:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">eggs</span> = <span class="i">omelette</span>.<span class="i">filledWith</span>(<span class="i">cheese</span>);
<span class="i">breakfast</span>(<span class="i">eggs</span>, <span class="i">sausage</span>);
</pre></div>
<p>And it does the same thing. Likewise, since the <code>.</code> and the <code>()</code> in a method
call <em>are</em> two separate expressions, it seems you should be able to hoist the
<em>lookup</em> part into a variable and then call it <span
name="callback">later</span>. We need to think carefully about what the <em>thing</em>
you get when you look up a method is, and how it behaves, even in weird cases
like:</p>
<aside name="callback">
<p>A motivating use for this is callbacks. Often, you want to pass a callback whose
body simply invokes a method on some object. Being able to look up the method and
pass it directly saves you the chore of manually declaring a function to wrap
it. Compare this:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">callback</span>(<span class="i">a</span>, <span class="i">b</span>, <span class="i">c</span>) {
  <span class="i">object</span>.<span class="i">method</span>(<span class="i">a</span>, <span class="i">b</span>, <span class="i">c</span>);
}

<span class="i">takeCallback</span>(<span class="i">callback</span>);
</pre></div>
<p>With this:</p>
<div class="codehilite"><pre><span class="i">takeCallback</span>(<span class="i">object</span>.<span class="i">method</span>);
</pre></div>
</aside>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Person</span> {
  <span class="i">sayName</span>() {
    <span class="k">print</span> <span class="k">this</span>.<span class="i">name</span>;
  }
}

<span class="k">var</span> <span class="i">jane</span> = <span class="t">Person</span>();
<span class="i">jane</span>.<span class="i">name</span> = <span class="s">&quot;Jane&quot;</span>;

<span class="k">var</span> <span class="i">method</span> = <span class="i">jane</span>.<span class="i">sayName</span>;
<span class="i">method</span>(); <span class="c">// ?</span>
</pre></div>
<p>If you grab a handle to a method on some instance and call it later, does it
&ldquo;remember&rdquo; the instance it was pulled off from? Does <code>this</code> inside the method
still refer to that original object?</p>
<p>Here&rsquo;s a more pathological example to bend your brain:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Person</span> {
  <span class="i">sayName</span>() {
    <span class="k">print</span> <span class="k">this</span>.<span class="i">name</span>;
  }
}

<span class="k">var</span> <span class="i">jane</span> = <span class="t">Person</span>();
<span class="i">jane</span>.<span class="i">name</span> = <span class="s">&quot;Jane&quot;</span>;

<span class="k">var</span> <span class="i">bill</span> = <span class="t">Person</span>();
<span class="i">bill</span>.<span class="i">name</span> = <span class="s">&quot;Bill&quot;</span>;

<span class="i">bill</span>.<span class="i">sayName</span> = <span class="i">jane</span>.<span class="i">sayName</span>;
<span class="i">bill</span>.<span class="i">sayName</span>(); <span class="c">// ?</span>
</pre></div>
<p>Does that last line print &ldquo;Bill&rdquo; because that&rsquo;s the instance that we <em>called</em>
the method through, or &ldquo;Jane&rdquo; because it&rsquo;s the instance where we first grabbed
the method?</p>
<p>Equivalent code in Lua and JavaScript would print &ldquo;Bill&rdquo;. Those languages don&rsquo;t
really have a notion of &ldquo;methods&rdquo;. Everything is sort of functions-in-fields, so
it&rsquo;s not clear that <code>jane</code> &ldquo;owns&rdquo; <code>sayName</code> any more than <code>bill</code> does.</p>
<p>Lox, though, has real class syntax so we do know which callable things are
methods and which are functions. Thus, like Python, C#, and others, we will have
methods &ldquo;bind&rdquo; <code>this</code> to the original instance when the method is first grabbed.
Python calls <span name="bound">these</span> <strong>bound methods</strong>.</p>
<aside name="bound">
<p>I know, imaginative name, right?</p>
</aside>
<p>In practice, that&rsquo;s usually what you want. If you take a reference to a method
on some object so you can use it as a callback later, you want to remember the
instance it belonged to, even if that callback happens to be stored in a field
on some other object.</p>
<p>OK, that&rsquo;s a lot of semantics to load into your head. Forget about the edge
cases for a bit. We&rsquo;ll get back to those. For now, let&rsquo;s get basic method calls
working. We&rsquo;re already parsing the method declarations inside the class body, so
the next step is to resolve them.</p>
<div class="codehilite"><pre class="insert-before">    define(stmt.name);
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">

    <span class="k">for</span> (<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">method</span> : <span class="i">stmt</span>.<span class="i">methods</span>) {
      <span class="t">FunctionType</span> <span class="i">declaration</span> = <span class="t">FunctionType</span>.<span class="i">METHOD</span>;
      <span class="i">resolveFunction</span>(<span class="i">method</span>, <span class="i">declaration</span>);<span name="local"> </span>
    }

</pre><pre class="insert-after">    return null;
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<aside name="local">
<p>Storing the function type in a local variable is pointless right now, but we&rsquo;ll
expand this code before too long and it will make more sense.</p>
</aside>
<p>We iterate through the methods in the class body and call the
<code>resolveFunction()</code> method we wrote for handling function declarations already.
The only difference is that we pass in a new FunctionType enum value.</p>
<div class="codehilite"><pre class="insert-before">    NONE,
</pre><pre class="insert-before">    <span class="i">FUNCTION</span><span class="insert-comma">,</span>
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in enum <em>FunctionType</em><br>
add <em>&ldquo;,&rdquo;</em> to previous line</div>
<pre class="insert">    <span class="i">METHOD</span>
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in enum <em>FunctionType</em>, add <em>&ldquo;,&rdquo;</em> to previous line</div>

<p>That&rsquo;s going to be important when we resolve <code>this</code> expressions. For now, don&rsquo;t
worry about it. The interesting stuff is in the interpreter.</p>
<div class="codehilite"><pre class="insert-before">    environment.define(stmt.name.lexeme, null);
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitClassStmt</em>()<br>
replace 1 line</div>
<pre class="insert">

    <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">LoxFunction</span>&gt; <span class="i">methods</span> = <span class="k">new</span> <span class="t">HashMap</span>&lt;&gt;();
    <span class="k">for</span> (<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">method</span> : <span class="i">stmt</span>.<span class="i">methods</span>) {
      <span class="t">LoxFunction</span> <span class="i">function</span> = <span class="k">new</span> <span class="t">LoxFunction</span>(<span class="i">method</span>, <span class="i">environment</span>);
      <span class="i">methods</span>.<span class="i">put</span>(<span class="i">method</span>.<span class="i">name</span>.<span class="i">lexeme</span>, <span class="i">function</span>);
    }

    <span class="t">LoxClass</span> <span class="i">klass</span> = <span class="k">new</span> <span class="t">LoxClass</span>(<span class="i">stmt</span>.<span class="i">name</span>.<span class="i">lexeme</span>, <span class="i">methods</span>);
</pre><pre class="insert-after">    environment.assign(stmt.name, klass);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitClassStmt</em>(), replace 1 line</div>

<p>When we interpret a class declaration statement, we turn the syntactic
representation of the class<span class="em">&mdash;</span>its AST node<span class="em">&mdash;</span>into its runtime representation.
Now, we need to do that for the methods contained in the class as well. Each
method declaration blossoms into a LoxFunction object.</p>
<p>We take all of those and wrap them up into a map, keyed by the method names.
That gets stored in LoxClass.</p>
<div class="codehilite"><pre class="insert-before">  final String name;
</pre><div class="source-file"><em>lox/LoxClass.java</em><br>
in class <em>LoxClass</em><br>
replace 4 lines</div>
<pre class="insert">  <span class="k">private</span> <span class="k">final</span> <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">LoxFunction</span>&gt; <span class="i">methods</span>;

  <span class="t">LoxClass</span>(<span class="t">String</span> <span class="i">name</span>, <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">LoxFunction</span>&gt; <span class="i">methods</span>) {
    <span class="k">this</span>.<span class="i">name</span> = <span class="i">name</span>;
    <span class="k">this</span>.<span class="i">methods</span> = <span class="i">methods</span>;
  }
</pre><pre class="insert-after">

  @Override
  public String toString() {
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, in class <em>LoxClass</em>, replace 4 lines</div>

<p>Where an instance stores state, the class stores behavior. LoxInstance has its
map of fields, and LoxClass gets a map of methods. Even though methods are
owned by the class, they are still accessed through instances of that class.</p>
<div class="codehilite"><pre class="insert-before">  Object get(Token name) {
    if (fields.containsKey(name.lexeme)) {
      return fields.get(name.lexeme);
    }

</pre><div class="source-file"><em>lox/LoxInstance.java</em><br>
in <em>get</em>()</div>
<pre class="insert">    <span class="t">LoxFunction</span> <span class="i">method</span> = <span class="i">klass</span>.<span class="i">findMethod</span>(<span class="i">name</span>.<span class="i">lexeme</span>);
    <span class="k">if</span> (<span class="i">method</span> != <span class="k">null</span>) <span class="k">return</span> <span class="i">method</span>;

</pre><pre class="insert-after">    throw new RuntimeError(name,<span name="hidden"> </span>
        &quot;Undefined property '&quot; + name.lexeme + &quot;'.&quot;);
</pre></div>
<div class="source-file-narrow"><em>lox/LoxInstance.java</em>, in <em>get</em>()</div>

<p>When looking up a property on an instance, if we don&rsquo;t <span
name="shadow">find</span> a matching field, we look for a method with that name
on the instance&rsquo;s class. If found, we return that. This is where the distinction
between &ldquo;field&rdquo; and &ldquo;property&rdquo; becomes meaningful. When accessing a property,
you might get a field<span class="em">&mdash;</span>a bit of state stored on the instance<span class="em">&mdash;</span>or you could
hit a method defined on the instance&rsquo;s class.</p>
<p>The method is looked up using this:</p>
<aside name="shadow">
<p>Looking for a field first implies that fields shadow methods, a subtle but
important semantic point.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/LoxClass.java</em><br>
add after <em>LoxClass</em>()</div>
<pre>  <span class="t">LoxFunction</span> <span class="i">findMethod</span>(<span class="t">String</span> <span class="i">name</span>) {
    <span class="k">if</span> (<span class="i">methods</span>.<span class="i">containsKey</span>(<span class="i">name</span>)) {
      <span class="k">return</span> <span class="i">methods</span>.<span class="i">get</span>(<span class="i">name</span>);
    }

    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, add after <em>LoxClass</em>()</div>

<p>You can probably guess this method is going to get more interesting later. For
now, a simple map lookup on the class&rsquo;s method table is enough to get us
started. Give it a try:</p>
<p><span name="crunch"></span></p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Bacon</span> {
  <span class="i">eat</span>() {
    <span class="k">print</span> <span class="s">&quot;Crunch crunch crunch!&quot;</span>;
  }
}

<span class="t">Bacon</span>().<span class="i">eat</span>(); <span class="c">// Prints &quot;Crunch crunch crunch!&quot;.</span>
</pre></div>
<aside name="crunch">
<p>Apologies if you prefer chewy bacon over crunchy. Feel free to adjust the script
to your taste.</p>
</aside>
<h2><a href="#this" id="this"><small>12&#8202;.&#8202;6</small>This</a></h2>
<p>We can define both behavior and state on objects, but they aren&rsquo;t tied together
yet. Inside a method, we have no way to access the fields of the &ldquo;current&rdquo;
object<span class="em">&mdash;</span>the instance that the method was called on<span class="em">&mdash;</span>nor can we call other
methods on that same object.</p>
<p>To get at that instance, it needs a <span name="i">name</span>. Smalltalk,
Ruby, and Swift use &ldquo;self&rdquo;. Simula, C++, Java, and others use &ldquo;this&rdquo;. Python
uses &ldquo;self&rdquo; by convention, but you can technically call it whatever you like.</p>
<aside name="i">
<p>&ldquo;I&rdquo; would have been a great choice, but using &ldquo;i&rdquo; for loop variables predates
OOP and goes all the way back to Fortran. We are victims of the incidental
choices of our forebears.</p>
</aside>
<p>For Lox, since we generally hew to Java-ish style, we&rsquo;ll go with &ldquo;this&rdquo;. Inside
a method body, a <code>this</code> expression evaluates to the instance that the method was
called on. Or, more specifically, since methods are accessed and then invoked as
two steps, it will refer to the object that the method was <em>accessed</em> from.</p>
<p>That makes our job harder. Peep at:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Egotist</span> {
  <span class="i">speak</span>() {
    <span class="k">print</span> <span class="k">this</span>;
  }
}

<span class="k">var</span> <span class="i">method</span> = <span class="t">Egotist</span>().<span class="i">speak</span>;
<span class="i">method</span>();
</pre></div>
<p>On the second-to-last line, we grab a reference to the <code>speak()</code> method off an
instance of the class. That returns a function, and that function needs to
remember the instance it was pulled off of so that <em>later</em>, on the last line, it
can still find it when the function is called.</p>
<p>We need to take <code>this</code> at the point that the method is accessed and attach it to
the function somehow so that it stays around as long as we need it to. Hmm<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>a
way to store some extra data that hangs around a function, eh? That sounds an
awful lot like a <em>closure</em>, doesn&rsquo;t it?</p>
<p>If we defined <code>this</code> as a sort of hidden variable in an environment that
surrounds the function returned when looking up a method, then uses of <code>this</code> in
the body would be able to find it later. LoxFunction already has the ability to
hold on to a surrounding environment, so we have the machinery we need.</p>
<p>Let&rsquo;s walk through an example to see how it works:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Cake</span> {
  <span class="i">taste</span>() {
    <span class="k">var</span> <span class="i">adjective</span> = <span class="s">&quot;delicious&quot;</span>;
    <span class="k">print</span> <span class="s">&quot;The &quot;</span> + <span class="k">this</span>.<span class="i">flavor</span> + <span class="s">&quot; cake is &quot;</span> + <span class="i">adjective</span> + <span class="s">&quot;!&quot;</span>;
  }
}

<span class="k">var</span> <span class="i">cake</span> = <span class="t">Cake</span>();
<span class="i">cake</span>.<span class="i">flavor</span> = <span class="s">&quot;German chocolate&quot;</span>;
<span class="i">cake</span>.<span class="i">taste</span>(); <span class="c">// Prints &quot;The German chocolate cake is delicious!&quot;.</span>
</pre></div>
<p>When we first evaluate the class definition, we create a LoxFunction for
<code>taste()</code>. Its closure is the environment surrounding the class, in this case
the global one. So the LoxFunction we store in the class&rsquo;s method map looks
like so:</p><img src="image/classes/closure.png" alt="The initial closure for the method." />
<p>When we evaluate the <code>cake.taste</code> get expression, we create a new environment
that binds <code>this</code> to the object the method is accessed from (here, <code>cake</code>). Then
we make a <em>new</em> LoxFunction with the same code as the original one but using
that new environment as its closure.</p><img src="image/classes/bound-method.png" alt="The new closure that binds 'this'." />
<p>This is the LoxFunction that gets returned when evaluating the get expression
for the method name. When that function is later called by a <code>()</code> expression,
we create an environment for the method body as usual.</p><img src="image/classes/call.png" alt="Calling the bound method and creating a new environment for the method body." />
<p>The parent of the body environment is the environment we created earlier to bind
<code>this</code> to the current object. Thus any use of <code>this</code> inside the body
successfully resolves to that instance.</p>
<p>Reusing our environment code for implementing <code>this</code> also takes care of
interesting cases where methods and functions interact, like:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Thing</span> {
  <span class="i">getCallback</span>() {
    <span class="k">fun</span> <span class="i">localFunction</span>() {
      <span class="k">print</span> <span class="k">this</span>;
    }

    <span class="k">return</span> <span class="i">localFunction</span>;
  }
}

<span class="k">var</span> <span class="i">callback</span> = <span class="t">Thing</span>().<span class="i">getCallback</span>();
<span class="i">callback</span>();
</pre></div>
<p>In, say, JavaScript, it&rsquo;s common to return a callback from inside a method. That
callback may want to hang on to and retain access to the original object<span class="em">&mdash;</span>the
<code>this</code> value<span class="em">&mdash;</span>that the method was associated with. Our existing support for
closures and environment chains should do all this correctly.</p>
<p>Let&rsquo;s code it up. The first step is adding <span name="this-ast">new
syntax</span> for <code>this</code>.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Set      : Expr object, Token name, Expr value&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;This     : Token keyword&quot;</span>,
</pre><pre class="insert-after">      &quot;Unary    : Token operator, Expr right&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="this-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#this-expression">Appendix II</a>.</p>
</aside>
<p>Parsing is simple since it&rsquo;s a single token which our lexer already
recognizes as a reserved word.</p>
<div class="codehilite"><pre class="insert-before">      return new Expr.Literal(previous().literal);
    }
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>primary</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">match</span>(<span class="i">THIS</span>)) <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">This</span>(<span class="i">previous</span>());
</pre><pre class="insert-after">

    if (match(IDENTIFIER)) {
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>primary</em>()</div>

<p>You can start to see how <code>this</code> works like a variable when we get to the
resolver.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitSetExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitThisExpr</span>(<span class="t">Expr</span>.<span class="t">This</span> <span class="i">expr</span>) {
    <span class="i">resolveLocal</span>(<span class="i">expr</span>, <span class="i">expr</span>.<span class="i">keyword</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }

</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitSetExpr</em>()</div>

<p>We resolve it exactly like any other local variable using &ldquo;this&rdquo; as the name for
the &ldquo;variable&rdquo;. Of course, that&rsquo;s not going to work right now, because &ldquo;this&rdquo;
<em>isn&rsquo;t</em> declared in any scope. Let&rsquo;s fix that over in <code>visitClassStmt()</code>.</p>
<div class="codehilite"><pre class="insert-before">    define(stmt.name);

</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">    <span class="i">beginScope</span>();
    <span class="i">scopes</span>.<span class="i">peek</span>().<span class="i">put</span>(<span class="s">&quot;this&quot;</span>, <span class="k">true</span>);

</pre><pre class="insert-after">    for (Stmt.Function method : stmt.methods) {
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>Before we step in and start resolving the method bodies, we push a new scope and
define &ldquo;this&rdquo; in it as if it were a variable. Then, when we&rsquo;re done, we discard
that surrounding scope.</p>
<div class="codehilite"><pre class="insert-before">    }

</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">    <span class="i">endScope</span>();

</pre><pre class="insert-after">    return null;
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>Now, whenever a <code>this</code> expression is encountered (at least inside a method) it
will resolve to a &ldquo;local variable&rdquo; defined in an implicit scope just outside of
the block for the method body.</p>
<p>The resolver has a new <em>scope</em> for <code>this</code>, so the interpreter needs to create a
corresponding <em>environment</em> for it. Remember, we always have to keep the
resolver&rsquo;s scope chains and the interpreter&rsquo;s linked environments in sync with
each other. At runtime, we create the environment after we find the method on
the instance. We replace the previous line of code that simply returned the
method&rsquo;s LoxFunction with this:</p>
<div class="codehilite"><pre class="insert-before">    LoxFunction method = klass.findMethod(name.lexeme);
</pre><div class="source-file"><em>lox/LoxInstance.java</em><br>
in <em>get</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">method</span> != <span class="k">null</span>) <span class="k">return</span> <span class="i">method</span>.<span class="i">bind</span>(<span class="k">this</span>);
</pre><pre class="insert-after">

    throw new RuntimeError(name,<span name="hidden"> </span>
        &quot;Undefined property '&quot; + name.lexeme + &quot;'.&quot;);
</pre></div>
<div class="source-file-narrow"><em>lox/LoxInstance.java</em>, in <em>get</em>(), replace 1 line</div>

<p>Note the new call to <code>bind()</code>. That looks like so:</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxFunction.java</em><br>
add after <em>LoxFunction</em>()</div>
<pre>  <span class="t">LoxFunction</span> <span class="i">bind</span>(<span class="t">LoxInstance</span> <span class="i">instance</span>) {
    <span class="t">Environment</span> <span class="i">environment</span> = <span class="k">new</span> <span class="t">Environment</span>(<span class="i">closure</span>);
    <span class="i">environment</span>.<span class="i">define</span>(<span class="s">&quot;this&quot;</span>, <span class="i">instance</span>);
    <span class="k">return</span> <span class="k">new</span> <span class="t">LoxFunction</span>(<span class="i">declaration</span>, <span class="i">environment</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, add after <em>LoxFunction</em>()</div>

<p>There isn&rsquo;t much to it. We create a new environment nestled inside the method&rsquo;s
original closure. Sort of a closure-within-a-closure. When the method is called,
that will become the parent of the method body&rsquo;s environment.</p>
<p>We declare &ldquo;this&rdquo; as a variable in that environment and bind it to the given
instance, the instance that the method is being accessed from. <em>Et voilà</em>, the
returned LoxFunction now carries around its own little persistent world where
&ldquo;this&rdquo; is bound to the object.</p>
<p>The remaining task is interpreting those <code>this</code> expressions. Similar to the
resolver, it is the same as interpreting a variable expression.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitSetExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitThisExpr</span>(<span class="t">Expr</span>.<span class="t">This</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">lookUpVariable</span>(<span class="i">expr</span>.<span class="i">keyword</span>, <span class="i">expr</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitSetExpr</em>()</div>

<p>Go ahead and give it a try using that cake example from earlier. With less than
twenty lines of code, our interpreter handles <code>this</code> inside methods even in all
of the weird ways it can interact with nested classes, functions inside methods,
handles to methods, etc.</p>
<h3><a href="#invalid-uses-of-this" id="invalid-uses-of-this"><small>12&#8202;.&#8202;6&#8202;.&#8202;1</small>Invalid uses of this</a></h3>
<p>Wait a minute. What happens if you try to use <code>this</code> <em>outside</em> of a method? What
about:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="k">this</span>;
</pre></div>
<p>Or:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">notAMethod</span>() {
  <span class="k">print</span> <span class="k">this</span>;
}
</pre></div>
<p>There is no instance for <code>this</code> to point to if you&rsquo;re not in a method. We could
give it some default value like <code>nil</code> or make it a runtime error, but the user
has clearly made a mistake. The sooner they find and fix that mistake, the
happier they&rsquo;ll be.</p>
<p>Our resolution pass is a fine place to detect this error statically. It already
detects <code>return</code> statements outside of functions. We&rsquo;ll do something similar for
<code>this</code>. In the vein of our existing FunctionType enum, we define a new ClassType
one.</p>
<div class="codehilite"><pre class="insert-before">  }
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
add after enum <em>FunctionType</em></div>
<pre class="insert">

  <span class="k">private</span> <span class="k">enum</span> <span class="t">ClassType</span> {
    <span class="i">NONE</span>,
    <span class="i">CLASS</span>
  }

  <span class="k">private</span> <span class="t">ClassType</span> <span class="i">currentClass</span> = <span class="t">ClassType</span>.<span class="i">NONE</span>;

</pre><pre class="insert-after">  void resolve(List&lt;Stmt&gt; statements) {
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after enum <em>FunctionType</em></div>

<p>Yes, it could be a Boolean. When we get to inheritance, it will get a third
value, hence the enum right now. We also add a corresponding field,
<code>currentClass</code>. Its value tells us if we are currently inside a class
declaration while traversing the syntax tree. It starts out <code>NONE</code> which means
we aren&rsquo;t in one.</p>
<p>When we begin to resolve a class declaration, we change that.</p>
<div class="codehilite"><pre class="insert-before">  public Void visitClassStmt(Stmt.Class stmt) {
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">    <span class="t">ClassType</span> <span class="i">enclosingClass</span> = <span class="i">currentClass</span>;
    <span class="i">currentClass</span> = <span class="t">ClassType</span>.<span class="i">CLASS</span>;

</pre><pre class="insert-after">    declare(stmt.name);
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>As with <code>currentFunction</code>, we store the previous value of the field in a local
variable. This lets us piggyback onto the JVM to keep a stack of <code>currentClass</code>
values. That way we don&rsquo;t lose track of the previous value if one class nests
inside another.</p>
<p>Once the methods have been resolved, we &ldquo;pop&rdquo; that stack by restoring the old
value.</p>
<div class="codehilite"><pre class="insert-before">    endScope();

</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">    <span class="i">currentClass</span> = <span class="i">enclosingClass</span>;
</pre><pre class="insert-after">    return null;
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>When we resolve a <code>this</code> expression, the <code>currentClass</code> field gives us the bit
of data we need to report an error if the expression doesn&rsquo;t occur nestled
inside a method body.</p>
<div class="codehilite"><pre class="insert-before">  public Void visitThisExpr(Expr.This expr) {
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitThisExpr</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">currentClass</span> == <span class="t">ClassType</span>.<span class="i">NONE</span>) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">expr</span>.<span class="i">keyword</span>,
          <span class="s">&quot;Can&#39;t use &#39;this&#39; outside of a class.&quot;</span>);
      <span class="k">return</span> <span class="k">null</span>;
    }

</pre><pre class="insert-after">    resolveLocal(expr, expr.keyword);
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitThisExpr</em>()</div>

<p>That should help users use <code>this</code> correctly, and it saves us from having to
handle misuse at runtime in the interpreter.</p>
<h2><a href="#constructors-and-initializers" id="constructors-and-initializers"><small>12&#8202;.&#8202;7</small>Constructors and Initializers</a></h2>
<p>We can do almost everything with classes now, and as we near the end of the
chapter we find ourselves strangely focused on a beginning. Methods and fields
let us encapsulate state and behavior together so that an object always <em>stays</em>
in a valid configuration. But how do we ensure a brand new object <em>starts</em> in a
good state?</p>
<p>For that, we need constructors. I find them one of the trickiest parts of a
language to design, and if you peer closely at most other languages, you&rsquo;ll see
<span name="cracks">cracks</span> around object construction where the seams of
the design don&rsquo;t quite fit together perfectly. Maybe there&rsquo;s something
intrinsically messy about the moment of birth.</p>
<aside name="cracks">
<p>A few examples: In Java, even though final fields must be initialized, it is
still possible to read one <em>before</em> it has been. Exceptions<span class="em">&mdash;</span>a huge, complex
feature<span class="em">&mdash;</span>were added to C++ mainly as a way to emit errors from constructors.</p>
</aside>
<p>&ldquo;Constructing&rdquo; an object is actually a pair of operations:</p>
<ol>
<li>
<p>The runtime <span name="allocate"><em>allocates</em></span> the memory required for
a fresh instance. In most languages, this operation is at a fundamental
level beneath what user code is able to access.</p>
<aside name="allocate">
<p>C++&rsquo;s &ldquo;<a href="https://en.wikipedia.org/wiki/Placement_syntax">placement new</a>&rdquo; is a rare example where the bowels of allocation
are laid bare for the programmer to prod.</p>
</aside></li>
<li>
<p>Then, a user-provided chunk of code is called which <em>initializes</em> the
unformed object.</p>
</li>
</ol>
<p>The latter is what we tend to think of when we hear &ldquo;constructor&rdquo;, but the
language itself has usually done some groundwork for us before we get to that
point. In fact, our Lox interpreter already has that covered when it creates a
new LoxInstance object.</p>
<p>We&rsquo;ll do the remaining part<span class="em">&mdash;</span>user-defined initialization<span class="em">&mdash;</span>now. Languages
have a variety of notations for the chunk of code that sets up a new object for
a class. C++, Java, and C# use a method whose name matches the class name. Ruby
and Python call it <code>init()</code>. The latter is nice and short, so we&rsquo;ll do that.</p>
<p>In LoxClass&rsquo;s implementation of LoxCallable, we add a few more lines.</p>
<div class="codehilite"><pre class="insert-before">                     List&lt;Object&gt; arguments) {
    LoxInstance instance = new LoxInstance(this);
</pre><div class="source-file"><em>lox/LoxClass.java</em><br>
in <em>call</em>()</div>
<pre class="insert">    <span class="t">LoxFunction</span> <span class="i">initializer</span> = <span class="i">findMethod</span>(<span class="s">&quot;init&quot;</span>);
    <span class="k">if</span> (<span class="i">initializer</span> != <span class="k">null</span>) {
      <span class="i">initializer</span>.<span class="i">bind</span>(<span class="i">instance</span>).<span class="i">call</span>(<span class="i">interpreter</span>, <span class="i">arguments</span>);
    }

</pre><pre class="insert-after">    return instance;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, in <em>call</em>()</div>

<p>When a class is called, after the LoxInstance is created, we look for an &ldquo;init&rdquo;
method. If we find one, we immediately bind and invoke it just like a normal
method call. The argument list is forwarded along.</p>
<p>That argument list means we also need to tweak how a class declares its arity.</p>
<div class="codehilite"><pre class="insert-before">  public int arity() {
</pre><div class="source-file"><em>lox/LoxClass.java</em><br>
in <em>arity</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">LoxFunction</span> <span class="i">initializer</span> = <span class="i">findMethod</span>(<span class="s">&quot;init&quot;</span>);
    <span class="k">if</span> (<span class="i">initializer</span> == <span class="k">null</span>) <span class="k">return</span> <span class="n">0</span>;
    <span class="k">return</span> <span class="i">initializer</span>.<span class="i">arity</span>();
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, in <em>arity</em>(), replace 1 line</div>

<p>If there is an initializer, that method&rsquo;s arity determines how many arguments
you must pass when you call the class itself. We don&rsquo;t <em>require</em> a class to
define an initializer, though, as a convenience. If you don&rsquo;t have an
initializer, the arity is still zero.</p>
<p>That&rsquo;s basically it. Since we bind the <code>init()</code> method before we call it, it has
access to <code>this</code> inside its body. That, along with the arguments passed to the
class, are all you need to be able to set up the new instance however you
desire.</p>
<h3><a href="#invoking-init-directly" id="invoking-init-directly"><small>12&#8202;.&#8202;7&#8202;.&#8202;1</small>Invoking init() directly</a></h3>
<p>As usual, exploring this new semantic territory rustles up a few weird
creatures. Consider:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Foo</span> {
  <span class="i">init</span>() {
    <span class="k">print</span> <span class="k">this</span>;
  }
}

<span class="k">var</span> <span class="i">foo</span> = <span class="t">Foo</span>();
<span class="k">print</span> <span class="i">foo</span>.<span class="i">init</span>();
</pre></div>
<p>Can you &ldquo;re-initialize&rdquo; an object by directly calling its <code>init()</code> method? If
you do, what does it return? A <span name="compromise">reasonable</span> answer
would be <code>nil</code> since that&rsquo;s what it appears the body returns.</p>
<p>However<span class="em">&mdash;</span>and I generally dislike compromising to satisfy the
implementation<span class="em">&mdash;</span>it will make clox&rsquo;s implementation of constructors much
easier if we say that <code>init()</code> methods always return <code>this</code>, even when
directly called. In order to keep jlox compatible with that, we add a little
special case code in LoxFunction.</p>
<aside name="compromise">
<p>Maybe &ldquo;dislike&rdquo; is too strong a claim. It&rsquo;s reasonable to have the constraints
and resources of your implementation affect the design of the language. There
are only so many hours in the day, and if a cut corner here or there lets you get
more features to users in less time, it may very well be a net win for their
happiness and productivity. The trick is figuring out <em>which</em> corners to cut
that won&rsquo;t cause your users and future self to curse your shortsightedness.</p>
</aside>
<div class="codehilite"><pre class="insert-before">      return returnValue.value;
    }
</pre><div class="source-file"><em>lox/LoxFunction.java</em><br>
in <em>call</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">isInitializer</span>) <span class="k">return</span> <span class="i">closure</span>.<span class="i">getAt</span>(<span class="n">0</span>, <span class="s">&quot;this&quot;</span>);
</pre><pre class="insert-after">    return null;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, in <em>call</em>()</div>

<p>If the function is an initializer, we override the actual return value and
forcibly return <code>this</code>. That relies on a new <code>isInitializer</code> field.</p>
<div class="codehilite"><pre class="insert-before">  private final Environment closure;

</pre><div class="source-file"><em>lox/LoxFunction.java</em><br>
in class <em>LoxFunction</em><br>
replace 1 line</div>
<pre class="insert">  <span class="k">private</span> <span class="k">final</span> <span class="t">boolean</span> <span class="i">isInitializer</span>;

  <span class="t">LoxFunction</span>(<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">declaration</span>, <span class="t">Environment</span> <span class="i">closure</span>,
              <span class="t">boolean</span> <span class="i">isInitializer</span>) {
    <span class="k">this</span>.<span class="i">isInitializer</span> = <span class="i">isInitializer</span>;
</pre><pre class="insert-after">    this.closure = closure;
    this.declaration = declaration;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, in class <em>LoxFunction</em>, replace 1 line</div>

<p>We can&rsquo;t simply see if the name of the LoxFunction is &ldquo;init&rdquo; because the user
could have defined a <em>function</em> with that name. In that case, there <em>is</em> no
<code>this</code> to return. To avoid <em>that</em> weird edge case, we&rsquo;ll directly store whether
the LoxFunction represents an initializer method. That means we need to go back
and fix the few places where we create LoxFunctions.</p>
<div class="codehilite"><pre class="insert-before">  public Void visitFunctionStmt(Stmt.Function stmt) {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitFunctionStmt</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">LoxFunction</span> <span class="i">function</span> = <span class="k">new</span> <span class="t">LoxFunction</span>(<span class="i">stmt</span>, <span class="i">environment</span>,
                                           <span class="k">false</span>);
</pre><pre class="insert-after">    environment.define(stmt.name.lexeme, function);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitFunctionStmt</em>(), replace 1 line</div>

<p>For actual function declarations, <code>isInitializer</code> is always false. For methods,
we check the name.</p>
<div class="codehilite"><pre class="insert-before">    for (Stmt.Function method : stmt.methods) {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitClassStmt</em>()<br>
replace 1 line</div>
<pre class="insert">      <span class="t">LoxFunction</span> <span class="i">function</span> = <span class="k">new</span> <span class="t">LoxFunction</span>(<span class="i">method</span>, <span class="i">environment</span>,
          <span class="i">method</span>.<span class="i">name</span>.<span class="i">lexeme</span>.<span class="i">equals</span>(<span class="s">&quot;init&quot;</span>));
</pre><pre class="insert-after">      methods.put(method.name.lexeme, function);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitClassStmt</em>(), replace 1 line</div>

<p>And then in <code>bind()</code> where we create the closure that binds <code>this</code> to a method,
we pass along the original method&rsquo;s value.</p>
<div class="codehilite"><pre class="insert-before">    environment.define(&quot;this&quot;, instance);
</pre><div class="source-file"><em>lox/LoxFunction.java</em><br>
in <em>bind</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">return</span> <span class="k">new</span> <span class="t">LoxFunction</span>(<span class="i">declaration</span>, <span class="i">environment</span>,
                           <span class="i">isInitializer</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, in <em>bind</em>(), replace 1 line</div>

<h3><a href="#returning-from-init" id="returning-from-init"><small>12&#8202;.&#8202;7&#8202;.&#8202;2</small>Returning from init()</a></h3>
<p>We aren&rsquo;t out of the woods yet. We&rsquo;ve been assuming that a user-written
initializer doesn&rsquo;t explicitly return a value because most constructors don&rsquo;t.
What should happen if a user tries:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Foo</span> {
  <span class="i">init</span>() {
    <span class="k">return</span> <span class="s">&quot;something else&quot;</span>;
  }
}
</pre></div>
<p>It&rsquo;s definitely not going to do what they want, so we may as well make it a
static error. Back in the resolver, we add another case to FunctionType.</p>
<div class="codehilite"><pre class="insert-before">    FUNCTION,
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in enum <em>FunctionType</em></div>
<pre class="insert">    <span class="i">INITIALIZER</span>,
</pre><pre class="insert-after">    METHOD
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in enum <em>FunctionType</em></div>

<p>We use the visited method&rsquo;s name to determine if we&rsquo;re resolving an initializer
or not.</p>
<div class="codehilite"><pre class="insert-before">      FunctionType declaration = FunctionType.METHOD;
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">      <span class="k">if</span> (<span class="i">method</span>.<span class="i">name</span>.<span class="i">lexeme</span>.<span class="i">equals</span>(<span class="s">&quot;init&quot;</span>)) {
        <span class="i">declaration</span> = <span class="t">FunctionType</span>.<span class="i">INITIALIZER</span>;
      }

</pre><pre class="insert-after">      resolveFunction(method, declaration);<span name="local"> </span>
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>When we later traverse into a <code>return</code> statement, we check that field and make
it an error to return a value from inside an <code>init()</code> method.</p>
<div class="codehilite"><pre class="insert-before">    if (stmt.value != null) {
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitReturnStmt</em>()</div>
<pre class="insert">      <span class="k">if</span> (<span class="i">currentFunction</span> == <span class="t">FunctionType</span>.<span class="i">INITIALIZER</span>) {
        <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">stmt</span>.<span class="i">keyword</span>,
            <span class="s">&quot;Can&#39;t return a value from an initializer.&quot;</span>);
      }

</pre><pre class="insert-after">      resolve(stmt.value);
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitReturnStmt</em>()</div>

<p>We&rsquo;re <em>still</em> not done. We statically disallow returning a <em>value</em> from an
initializer, but you can still use an empty early <code>return</code>.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Foo</span> {
  <span class="i">init</span>() {
    <span class="k">return</span>;
  }
}
</pre></div>
<p>That is actually kind of useful sometimes, so we don&rsquo;t want to disallow it
entirely. Instead, it should return <code>this</code> instead of <code>nil</code>. That&rsquo;s an easy fix
over in LoxFunction.</p>
<div class="codehilite"><pre class="insert-before">    } catch (Return returnValue) {
</pre><div class="source-file"><em>lox/LoxFunction.java</em><br>
in <em>call</em>()</div>
<pre class="insert">      <span class="k">if</span> (<span class="i">isInitializer</span>) <span class="k">return</span> <span class="i">closure</span>.<span class="i">getAt</span>(<span class="n">0</span>, <span class="s">&quot;this&quot;</span>);

</pre><pre class="insert-after">      return returnValue.value;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, in <em>call</em>()</div>

<p>If we&rsquo;re in an initializer and execute a <code>return</code> statement, instead of
returning the value (which will always be <code>nil</code>), we again return <code>this</code>.</p>
<p>Phew! That was a whole list of tasks but our reward is that our little
interpreter has grown an entire programming paradigm. Classes, methods, fields,
<code>this</code>, and constructors. Our baby language is looking awfully grown-up.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>We have methods on instances, but there is no way to define &ldquo;static&rdquo; methods
that can be called directly on the class object itself. Add support for
them. Use a <code>class</code> keyword preceding the method to indicate a static method
that hangs off the class object.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Math</span> {
  <span class="k">class</span> <span class="i">square</span>(<span class="i">n</span>) {
    <span class="k">return</span> <span class="i">n</span> * <span class="i">n</span>;
  }
}

<span class="k">print</span> <span class="t">Math</span>.<span class="i">square</span>(<span class="n">3</span>); <span class="c">// Prints &quot;9&quot;.</span>
</pre></div>
<p>You can solve this however you like, but the &ldquo;<a href="https://en.wikipedia.org/wiki/Metaclass">metaclasses</a>&rdquo; used by
Smalltalk and Ruby are a particularly elegant approach. <em>Hint: Make LoxClass
extend LoxInstance and go from there.</em></p>
</li>
<li>
<p>Most modern languages support &ldquo;getters&rdquo; and &ldquo;setters&rdquo;<span class="em">&mdash;</span>members on a class
that look like field reads and writes but that actually execute user-defined
code. Extend Lox to support getter methods. These are declared without a
parameter list. The body of the getter is executed when a property with that
name is accessed.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Circle</span> {
  <span class="i">init</span>(<span class="i">radius</span>) {
    <span class="k">this</span>.<span class="i">radius</span> = <span class="i">radius</span>;
  }

  <span class="i">area</span> {
    <span class="k">return</span> <span class="n">3.141592653</span> * <span class="k">this</span>.<span class="i">radius</span> * <span class="k">this</span>.<span class="i">radius</span>;
  }
}

<span class="k">var</span> <span class="i">circle</span> = <span class="t">Circle</span>(<span class="n">4</span>);
<span class="k">print</span> <span class="i">circle</span>.<span class="i">area</span>; <span class="c">// Prints roughly &quot;50.2655&quot;.</span>
</pre></div>
</li>
<li>
<p>Python and JavaScript allow you to freely access an object&rsquo;s fields from
outside of its own methods. Ruby and Smalltalk encapsulate instance state.
Only methods on the class can access the raw fields, and it is up to the
class to decide which state is exposed. Most statically typed languages
offer modifiers like <code>private</code> and <code>public</code> to control which parts of a
class are externally accessible on a per-member basis.</p>
<p>What are the trade-offs between these approaches and why might a language
prefer one or the other?</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Prototypes and Power</a></h2>
<p>In this chapter, we introduced two new runtime entities, LoxClass and
LoxInstance. The former is where behavior for objects lives, and the latter is
for state. What if you could define methods right on a single object, inside
LoxInstance? In that case, we wouldn&rsquo;t need LoxClass at all. LoxInstance would
be a complete package for defining the behavior and state of an object.</p>
<p>We&rsquo;d still want some way, without classes, to reuse behavior across multiple
instances. We could let a LoxInstance <a href="https://en.wikipedia.org/wiki/Prototype-based_programming#Delegation"><em>delegate</em></a> directly to another
LoxInstance to reuse its fields and methods, sort of like inheritance.</p>
<p>Users would model their program as a constellation of objects, some of which
delegate to each other to reflect commonality. Objects used as delegates
represent &ldquo;canonical&rdquo; or &ldquo;prototypical&rdquo; objects that others refine. The result
is a simpler runtime with only a single internal construct, LoxInstance.</p>
<p>That&rsquo;s where the name <strong><a href="https://en.wikipedia.org/wiki/Prototype-based_programming">prototypes</a></strong> comes from for this paradigm. It
was invented by David Ungar and Randall Smith in a language called <a href="http://www.selflanguage.org/">Self</a>.
They came up with it by starting with Smalltalk and following the above mental
exercise to see how much they could pare it down.</p>
<p>Prototypes were an academic curiosity for a long time, a fascinating one that
generated interesting research but didn&rsquo;t make a dent in the larger world of
programming. That is, until Brendan Eich crammed prototypes into JavaScript,
which then promptly took over the world. Many (many) <span
name="words">words</span> have been written about prototypes in JavaScript.
Whether that shows that prototypes are brilliant or confusing<span class="em">&mdash;</span>or both!<span class="em">&mdash;</span>is
an open question.</p>
<aside name="words">
<p>Including <a href="http://gameprogrammingpatterns.com/prototype.html">more than a handful</a> by yours truly.</p>
</aside>
<p>I won&rsquo;t get into whether or not I think prototypes are a good idea for a
language. I&rsquo;ve made languages that are <a href="http://finch.stuffwithstuff.com/">prototypal</a> and
<a href="http://wren.io/">class-based</a>, and my opinions of both are complex. What I want to discuss
is the role of <em>simplicity</em> in a language.</p>
<p>Prototypes are simpler than classes<span class="em">&mdash;</span>less code for the language implementer to
write, and fewer concepts for the user to learn and understand. Does that make
them better? We language nerds have a tendency to fetishize minimalism.
Personally, I think simplicity is only part of the equation. What we really want
to give the user is <em>power</em>, which I define as:</p>
<div class="codehilite"><pre>power = breadth × ease ÷ complexity
</pre></div>
<p>None of these are precise numeric measures. I&rsquo;m using math as analogy here, not
actual quantification.</p>
<ul>
<li>
<p><strong>Breadth</strong> is the range of different things the language lets you express.
C has a lot of breadth<span class="em">&mdash;</span>it&rsquo;s been used for everything from operating
systems to user applications to games. Domain-specific languages like
AppleScript and Matlab have less breadth.</p>
</li>
<li>
<p><strong>Ease</strong> is how little effort it takes to make the language do what you
want. &ldquo;Usability&rdquo; might be another term, though it carries more baggage than
I want to bring in. &ldquo;Higher-level&rdquo; languages tend to have more ease than
&ldquo;lower-level&rdquo; ones. Most languages have a &ldquo;grain&rdquo; to them where some things
feel easier to express than others.</p>
</li>
<li>
<p><strong>Complexity</strong> is how big the language (including its runtime, core libraries,
tools, ecosystem, etc.) is. People talk about how many pages are in a
language&rsquo;s spec, or how many keywords it has. It&rsquo;s how much the user has to
load into their wetware before they can be productive in the system. It is
the antonym of simplicity.</p>
</li>
</ul>
<p>Reducing complexity <em>does</em> increase power. The smaller the denominator, the
larger the resulting value, so our intuition that simplicity is good is valid.
However, when reducing complexity, we must take care not to sacrifice breadth or
ease in the process, or the total power may go down. Java would be a strictly
<em>simpler</em> language if it removed strings, but it probably wouldn&rsquo;t handle text
manipulation tasks well, nor would it be as easy to get things done.</p>
<p>The art, then, is finding <em>accidental</em> complexity that can be omitted<span class="em">&mdash;</span>language features and interactions that don&rsquo;t carry their weight by increasing
the breadth or ease of using the language.</p>
<p>If users want to express their program in terms of categories of objects, then
baking classes into the language increases the ease of doing that, hopefully by
a large enough margin to pay for the added complexity. But if that isn&rsquo;t how
users are using your language, then by all means leave classes out.</p>
</div>

<footer>
<a href="inheritance.html" class="next">
  Next Chapter: &ldquo;Inheritance&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/closures.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Closures &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Closures<small>25</small></a></h3>

<ul>
    <li><a href="#closure-objects"><small>25.1</small> Closure Objects</a></li>
    <li><a href="#upvalues"><small>25.2</small> Upvalues</a></li>
    <li><a href="#upvalue-objects"><small>25.3</small> Upvalue Objects</a></li>
    <li><a href="#closed-upvalues"><small>25.4</small> Closed Upvalues</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Closing Over the Loop Variable</a></li>
</ul>


<div class="prev-next">
    <a href="calls-and-functions.html" title="Calls and Functions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="garbage-collection.html" title="Garbage Collection" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="calls-and-functions.html" title="Calls and Functions" class="prev">←</a>
<a href="garbage-collection.html" title="Garbage Collection" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Closures<small>25</small></a></h3>

<ul>
    <li><a href="#closure-objects"><small>25.1</small> Closure Objects</a></li>
    <li><a href="#upvalues"><small>25.2</small> Upvalues</a></li>
    <li><a href="#upvalue-objects"><small>25.3</small> Upvalue Objects</a></li>
    <li><a href="#closed-upvalues"><small>25.4</small> Closed Upvalues</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Closing Over the Loop Variable</a></li>
</ul>


<div class="prev-next">
    <a href="calls-and-functions.html" title="Calls and Functions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="garbage-collection.html" title="Garbage Collection" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">25</div>
  <h1>Closures</h1>

<blockquote>
<p>As the man said, for every complex problem there&rsquo;s a simple solution, and it&rsquo;s
wrong.</p>
<p><cite>Umberto Eco, <em>Foucault&rsquo;s Pendulum</em></cite></p>
</blockquote>
<p>Thanks to our diligent labor in <a href="calls-and-functions.html">the last chapter</a>, we have a virtual
machine with working functions. What it lacks is closures. Aside from global
variables, which are their own breed of animal, a function has no way to
reference a variable declared outside of its own body.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">x</span> = <span class="s">&quot;global&quot;</span>;
<span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="s">&quot;outer&quot;</span>;
  <span class="k">fun</span> <span class="i">inner</span>() {
    <span class="k">print</span> <span class="i">x</span>;
  }
  <span class="i">inner</span>();
}
<span class="i">outer</span>();
</pre></div>
<p>Run this example now and it prints &ldquo;global&rdquo;. It&rsquo;s supposed to print &ldquo;outer&rdquo;. To
fix this, we need to include the entire lexical scope of all surrounding
functions when resolving a variable.</p>
<p>This problem is harder in clox than it was in jlox because our bytecode VM
stores locals on a stack. We used a stack because I claimed locals have stack
semantics<span class="em">&mdash;</span>variables are discarded in the reverse order that they are created.
But with closures, that&rsquo;s only <em>mostly</em> true.</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">makeClosure</span>() {
  <span class="k">var</span> <span class="i">local</span> = <span class="s">&quot;local&quot;</span>;
  <span class="k">fun</span> <span class="i">closure</span>() {
    <span class="k">print</span> <span class="i">local</span>;
  }
  <span class="k">return</span> <span class="i">closure</span>;
}

<span class="k">var</span> <span class="i">closure</span> = <span class="i">makeClosure</span>();
<span class="i">closure</span>();
</pre></div>
<p>The outer function <code>makeClosure()</code> declares a variable, <code>local</code>. It also creates
an inner function, <code>closure()</code> that captures that variable. Then <code>makeClosure()</code>
returns a reference to that function. Since the closure <span
name="flying">escapes</span> while holding on to the local variable, <code>local</code> must
outlive the function call where it was created.</p>
<aside name="flying"><img src="image/closures/flying.png" class="above" alt="A local variable flying away from the stack."/>
<p>Oh no, it&rsquo;s escaping!</p>
</aside>
<p>We could solve this problem by dynamically allocating memory for all local
variables. That&rsquo;s what jlox does by putting everything in those Environment
objects that float around in Java&rsquo;s heap. But we don&rsquo;t want to. Using a <span
name="stack">stack</span> is <em>really</em> fast. Most local variables are <em>not</em>
captured by closures and do have stack semantics. It would suck to make all of
those slower for the benefit of the rare local that is captured.</p>
<aside name="stack">
<p>There is a reason that C and Java use the stack for their local variables, after
all.</p>
</aside>
<p>This means a more complex approach than we used in our Java interpreter. Because
some locals have very different lifetimes, we will have two implementation
strategies. For locals that aren&rsquo;t used in closures, we&rsquo;ll keep them just as
they are on the stack. When a local is captured by a closure, we&rsquo;ll adopt
another solution that lifts them onto the heap where they can live as long as
needed.</p>
<p>Closures have been around since the early Lisp days when bytes of memory and CPU
cycles were more precious than emeralds. Over the intervening decades, hackers
devised all <span name="lambda">manner</span> of ways to compile closures to
optimized runtime representations. Some are more efficient but require a more
complex compilation process than we could easily retrofit into clox.</p>
<aside name="lambda">
<p>Search for &ldquo;closure conversion&rdquo; or &ldquo;lambda lifting&rdquo; to start exploring.</p>
</aside>
<p>The technique I explain here comes from the design of the Lua VM. It is fast,
parsimonious with memory, and implemented with relatively little code. Even more
impressive, it fits naturally into the single-pass compilers clox and Lua both
use. It is somewhat intricate, though. It might take a while before all the
pieces click together in your mind. We&rsquo;ll build them one step at a time, and
I&rsquo;ll try to introduce the concepts in stages.</p>
<h2><a href="#closure-objects" id="closure-objects"><small>25&#8202;.&#8202;1</small>Closure Objects</a></h2>
<p>Our VM represents functions at runtime using ObjFunction. These objects are
created by the front end during compilation. At runtime, all the VM does is load
the function object from a constant table and bind it to a name. There is no
operation to &ldquo;create&rdquo; a function at runtime. Much like string and number <span
name="literal">literals</span>, they are constants instantiated purely at
compile time.</p>
<aside name="literal">
<p>In other words, a function declaration in Lox <em>is</em> a kind of literal<span class="em">&mdash;</span>a piece
of syntax that defines a constant value of a built-in type.</p>
</aside>
<p>That made sense because all of the data that composes a function is known at
compile time: the chunk of bytecode compiled from the function&rsquo;s body, and the
constants used in the body. Once we introduce closures, though, that
representation is no longer sufficient. Take a gander at:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">makeClosure</span>(<span class="i">value</span>) {
  <span class="k">fun</span> <span class="i">closure</span>() {
    <span class="k">print</span> <span class="i">value</span>;
  }
  <span class="k">return</span> <span class="i">closure</span>;
}

<span class="k">var</span> <span class="i">doughnut</span> = <span class="i">makeClosure</span>(<span class="s">&quot;doughnut&quot;</span>);
<span class="k">var</span> <span class="i">bagel</span> = <span class="i">makeClosure</span>(<span class="s">&quot;bagel&quot;</span>);
<span class="i">doughnut</span>();
<span class="i">bagel</span>();
</pre></div>
<p>The <code>makeClosure()</code> function defines and returns a function. We call it twice
and get two closures back. They are created by the same nested function
declaration, <code>closure</code>, but close over different values. When we call the two
closures, each prints a different string. That implies we need some runtime
representation for a closure that captures the local variables surrounding the
function as they exist when the function declaration is <em>executed</em>, not just
when it is compiled.</p>
<p>We&rsquo;ll work our way up to capturing variables, but a good first step is defining
that object representation. Our existing ObjFunction type represents the <span
name="raw">&ldquo;raw&rdquo;</span> compile-time state of a function declaration, since all
closures created from a single declaration share the same code and constants. At
runtime, when we execute a function declaration, we wrap the ObjFunction in a
new ObjClosure structure. The latter has a reference to the underlying bare
function along with runtime state for the variables the function closes over.</p>
<aside name="raw">
<p>The Lua implementation refers to the raw function object containing the bytecode
as a &ldquo;prototype&rdquo;, which is a great word to describe this, except that word also
gets overloaded to refer to <a href="https://en.wikipedia.org/wiki/Prototype-based_programming">prototypal inheritance</a>.</p>
</aside><img src="image/closures/obj-closure.png" alt="An ObjClosure with a reference to an ObjFunction."/>
<p>We&rsquo;ll wrap every function in an ObjClosure, even if the function doesn&rsquo;t
actually close over and capture any surrounding local variables. This is a
little wasteful, but it simplifies the VM because we can always assume that the
function we&rsquo;re calling is an ObjClosure. That new struct starts out like this:</p>
<div class="codehilite"><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjString</em></div>
<pre><span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">ObjFunction</span>* <span class="i">function</span>;
} <span class="t">ObjClosure</span>;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjString</em></div>

<p>Right now, it simply points to an ObjFunction and adds the necessary object
header stuff. Grinding through the usual ceremony for adding a new object type
to clox, we declare a C function to create a new closure.</p>
<div class="codehilite"><pre class="insert-before">} ObjClosure;

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjClosure</em></div>
<pre class="insert"><span class="t">ObjClosure</span>* <span class="i">newClosure</span>(<span class="t">ObjFunction</span>* <span class="i">function</span>);
</pre><pre class="insert-after">ObjFunction* newFunction();
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjClosure</em></div>

<p>Then we implement it here:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>allocateObject</em>()</div>
<pre><span class="t">ObjClosure</span>* <span class="i">newClosure</span>(<span class="t">ObjFunction</span>* <span class="i">function</span>) {
  <span class="t">ObjClosure</span>* <span class="i">closure</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjClosure</span>, <span class="a">OBJ_CLOSURE</span>);
  <span class="i">closure</span>-&gt;<span class="i">function</span> = <span class="i">function</span>;
  <span class="k">return</span> <span class="i">closure</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>allocateObject</em>()</div>

<p>It takes a pointer to the ObjFunction it wraps. It also initializes the type
field to a new type.</p>
<div class="codehilite"><pre class="insert-before">typedef enum {
</pre><div class="source-file"><em>object.h</em><br>
in enum <em>ObjType</em></div>
<pre class="insert">  <span class="a">OBJ_CLOSURE</span>,
</pre><pre class="insert-after">  OBJ_FUNCTION,
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in enum <em>ObjType</em></div>

<p>And when we&rsquo;re done with a closure, we release its memory.</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_CLOSURE</span>: {
      <span class="a">FREE</span>(<span class="t">ObjClosure</span>, <span class="i">object</span>);
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_FUNCTION: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>We free only the ObjClosure itself, not the ObjFunction. That&rsquo;s because the
closure doesn&rsquo;t <em>own</em> the function. There may be multiple closures that all
reference the same function, and none of them claims any special privilege over
it. We can&rsquo;t free the ObjFunction until <em>all</em> objects referencing it are gone<span class="em">&mdash;</span>including even the surrounding function whose constant table contains it.
Tracking that sounds tricky, and it is! That&rsquo;s why we&rsquo;ll write a garbage
collector soon to manage it for us.</p>
<p>We also have the usual <span name="macro">macros</span> for checking a value&rsquo;s
type.</p>
<aside name="macro">
<p>Perhaps I should have defined a macro to make it easier to generate these
macros. Maybe that would be a little too meta.</p>
</aside>
<div class="codehilite"><pre class="insert-before">#define OBJ_TYPE(value)        (AS_OBJ(value)-&gt;type)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define IS_CLOSURE(value)      isObjType(value, OBJ_CLOSURE)</span>
</pre><pre class="insert-after">#define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>And to cast a value:</p>
<div class="codehilite"><pre class="insert-before">#define IS_STRING(value)       isObjType(value, OBJ_STRING)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define AS_CLOSURE(value)      ((ObjClosure*)AS_OBJ(value))</span>
</pre><pre class="insert-after">#define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>Closures are first-class objects, so you can print them.</p>
<div class="codehilite"><pre class="insert-before">  switch (OBJ_TYPE(value)) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>printObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_CLOSURE</span>:
      <span class="i">printFunction</span>(<span class="a">AS_CLOSURE</span>(<span class="i">value</span>)-&gt;<span class="i">function</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_FUNCTION:
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printObject</em>()</div>

<p>They display exactly as ObjFunction does. From the user&rsquo;s perspective, the
difference between ObjFunction and ObjClosure is purely a hidden implementation
detail. With that out of the way, we have a working but empty representation for
closures.</p>
<h3><a href="#compiling-to-closure-objects" id="compiling-to-closure-objects"><small>25&#8202;.&#8202;1&#8202;.&#8202;1</small>Compiling to closure objects</a></h3>
<p>We have closure objects, but our VM never creates them. The next step is getting
the compiler to emit instructions to tell the runtime when to create a new
ObjClosure to wrap a given ObjFunction. This happens right at the end of a
function declaration.</p>
<div class="codehilite"><pre class="insert-before">  ObjFunction* function = endCompiler();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>function</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="i">emitBytes</span>(<span class="a">OP_CLOSURE</span>, <span class="i">makeConstant</span>(<span class="a">OBJ_VAL</span>(<span class="i">function</span>)));
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>function</em>(), replace 1 line</div>

<p>Before, the final bytecode for a function declaration was a single <code>OP_CONSTANT</code>
instruction to load the compiled function from the surrounding function&rsquo;s
constant table and push it onto the stack. Now we have a new instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_CALL,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_CLOSURE</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>Like <code>OP_CONSTANT</code>, it takes a single operand that represents a constant table
index for the function. But when we get over to the runtime implementation, we
do something more interesting.</p>
<p>First, let&rsquo;s be diligent VM hackers and slot in disassembler support for the
instruction.</p>
<div class="codehilite"><pre class="insert-before">    case OP_CALL:
      return byteInstruction(&quot;OP_CALL&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_CLOSURE</span>: {
      <span class="i">offset</span>++;
      <span class="t">uint8_t</span> <span class="i">constant</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span>++];
      <span class="i">printf</span>(<span class="s">&quot;%-16s %4d &quot;</span>, <span class="s">&quot;OP_CLOSURE&quot;</span>, <span class="i">constant</span>);
      <span class="i">printValue</span>(<span class="i">chunk</span>-&gt;<span class="i">constants</span>.<span class="i">values</span>[<span class="i">constant</span>]);
      <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
      <span class="k">return</span> <span class="i">offset</span>;
    }
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>There&rsquo;s more going on here than we usually have in the disassembler. By the end
of the chapter, you&rsquo;ll discover that <code>OP_CLOSURE</code> is quite an unusual
instruction. It&rsquo;s straightforward right now<span class="em">&mdash;</span>just a single byte operand<span class="em">&mdash;</span>but
we&rsquo;ll be adding to it. This code here anticipates that future.</p>
<h3><a href="#interpreting-function-declarations" id="interpreting-function-declarations"><small>25&#8202;.&#8202;1&#8202;.&#8202;2</small>Interpreting function declarations</a></h3>
<p>Most of the work we need to do is in the runtime. We have to handle the new
instruction, naturally. But we also need to touch every piece of code in the VM
that works with ObjFunction and change it to use ObjClosure instead<span class="em">&mdash;</span>function
calls, call frames, etc. We&rsquo;ll start with the instruction, though.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_CLOSURE</span>: {
        <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="a">AS_FUNCTION</span>(<span class="a">READ_CONSTANT</span>());
        <span class="t">ObjClosure</span>* <span class="i">closure</span> = <span class="i">newClosure</span>(<span class="i">function</span>);
        <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">closure</span>));
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Like the <code>OP_CONSTANT</code> instruction we used before, first we load the compiled
function from the constant table. The difference now is that we wrap that
function in a new ObjClosure and push the result onto the stack.</p>
<p>Once you have a closure, you&rsquo;ll eventually want to call it.</p>
<div class="codehilite"><pre class="insert-before">    switch (OBJ_TYPE(callee)) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>callValue</em>()<br>
replace 2 lines</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OBJ_CLOSURE</span>:
        <span class="k">return</span> <span class="i">call</span>(<span class="a">AS_CLOSURE</span>(<span class="i">callee</span>), <span class="i">argCount</span>);
</pre><pre class="insert-after">      case OBJ_NATIVE: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>callValue</em>(), replace 2 lines</div>

<p>We remove the code for calling objects whose type is <code>OBJ_FUNCTION</code>. Since we
wrap all functions in ObjClosures, the runtime will never try to invoke a bare
ObjFunction anymore. Those objects live only in constant tables and get
immediately <span name="naked">wrapped</span> in closures before anything else
sees them.</p>
<aside name="naked">
<p>We don&rsquo;t want any naked functions wandering around the VM! What would the
neighbors say?</p>
</aside>
<p>We replace the old code with very similar code for calling a closure instead.
The only difference is the type of object we pass to <code>call()</code>. The real changes
are over in that function. First, we update its signature.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
function <em>call</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">bool</span> <span class="i">call</span>(<span class="t">ObjClosure</span>* <span class="i">closure</span>, <span class="t">int</span> <span class="i">argCount</span>) {
</pre><pre class="insert-after">  if (argCount != function-&gt;arity) {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, function <em>call</em>(), replace 1 line</div>

<p>Then, in the body, we need to fix everything that referenced the function to
handle the fact that we&rsquo;ve introduced a layer of indirection. We start with the
arity checking:</p>
<div class="codehilite"><pre class="insert-before">static bool call(ObjClosure* closure, int argCount) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>call</em>()<br>
replace 3 lines</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">argCount</span> != <span class="i">closure</span>-&gt;<span class="i">function</span>-&gt;<span class="i">arity</span>) {
    <span class="i">runtimeError</span>(<span class="s">&quot;Expected %d arguments but got %d.&quot;</span>,
        <span class="i">closure</span>-&gt;<span class="i">function</span>-&gt;<span class="i">arity</span>, <span class="i">argCount</span>);
</pre><pre class="insert-after">    return false;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>call</em>(), replace 3 lines</div>

<p>The only change is that we unwrap the closure to get to the underlying function.
The next thing <code>call()</code> does is create a new CallFrame. We change that code to
store the closure in the CallFrame and get the bytecode pointer from the
closure&rsquo;s function.</p>
<div class="codehilite"><pre class="insert-before">  CallFrame* frame = &amp;vm.frames[vm.frameCount++];
</pre><div class="source-file"><em>vm.c</em><br>
in <em>call</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="i">frame</span>-&gt;<span class="i">closure</span> = <span class="i">closure</span>;
  <span class="i">frame</span>-&gt;<span class="i">ip</span> = <span class="i">closure</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">code</span>;
</pre><pre class="insert-after">  frame-&gt;slots = vm.stackTop - argCount - 1;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>call</em>(), replace 2 lines</div>

<p>This necessitates changing the declaration of CallFrame too.</p>
<div class="codehilite"><pre class="insert-before">typedef struct {
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>CallFrame</em><br>
replace 1 line</div>
<pre class="insert">  <span class="t">ObjClosure</span>* <span class="i">closure</span>;
</pre><pre class="insert-after">  uint8_t* ip;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>CallFrame</em>, replace 1 line</div>

<p>That change triggers a few other cascading changes. Every place in the VM that
accessed CallFrame&rsquo;s function needs to use a closure instead. First, the macro
for reading a constant from the current function&rsquo;s constant table:</p>
<div class="codehilite"><pre class="insert-before">    (uint16_t)((frame-&gt;ip[-2] &lt;&lt; 8) | frame-&gt;ip[-1]))

</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 2 lines</div>
<pre class="insert"><span class="a">#define READ_CONSTANT() \</span>
<span class="a">    (frame-&gt;closure-&gt;function-&gt;chunk.constants.values[READ_BYTE()])</span>
</pre><pre class="insert-after">

#define READ_STRING() AS_STRING(READ_CONSTANT())
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 2 lines</div>

<p>When <code>DEBUG_TRACE_EXECUTION</code> is enabled, it needs to get to the chunk from the
closure.</p>
<div class="codehilite"><pre class="insert-before">    printf(&quot;\n&quot;);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 2 lines</div>
<pre class="insert">    <span class="i">disassembleInstruction</span>(&amp;<span class="i">frame</span>-&gt;<span class="i">closure</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>,
        (<span class="t">int</span>)(<span class="i">frame</span>-&gt;<span class="i">ip</span> - <span class="i">frame</span>-&gt;<span class="i">closure</span>-&gt;<span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">code</span>));
</pre><pre class="insert-after">#endif
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 2 lines</div>

<p>Likewise when reporting a runtime error:</p>
<div class="codehilite"><pre class="insert-before">    CallFrame* frame = &amp;vm.frames[i];
</pre><div class="source-file"><em>vm.c</em><br>
in <em>runtimeError</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="i">frame</span>-&gt;<span class="i">closure</span>-&gt;<span class="i">function</span>;
</pre><pre class="insert-after">    size_t instruction = frame-&gt;ip - function-&gt;chunk.code - 1;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>runtimeError</em>(), replace 1 line</div>

<p>Almost there. The last piece is the blob of code that sets up the very first
CallFrame to begin executing the top-level code for a Lox script.</p>
<div class="codehilite"><pre class="insert-before">  push(OBJ_VAL(function));
</pre><div class="source-file"><em>vm.c</em><br>
in <em>interpret</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">ObjClosure</span>* <span class="i">closure</span> = <span class="i">newClosure</span>(<span class="i">function</span>);
  <span class="i">pop</span>();
  <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">closure</span>));
  <span class="i">call</span>(<span class="i">closure</span>, <span class="n">0</span>);
</pre><pre class="insert-after">

  return run();
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>interpret</em>(), replace 1 line</div>

<p><span name="pop">The</span> compiler still returns a raw ObjFunction when
compiling a script. That&rsquo;s fine, but it means we need to wrap it in an
ObjClosure here, before the VM can execute it.</p>
<aside name="pop">
<p>The code looks a little silly because we still push the original ObjFunction
onto the stack. Then we pop it after creating the closure, only to then push the
closure. Why put the ObjFunction on there at all? As usual, when you see weird
stack stuff going on, it&rsquo;s to keep the <a href="garbage-collection.html">forthcoming garbage collector</a> aware
of some heap-allocated objects.</p>
</aside>
<p>We are back to a working interpreter. The <em>user</em> can&rsquo;t tell any difference, but
the compiler now generates code telling the VM to create a closure for each
function declaration. Every time the VM executes a function declaration, it
wraps the ObjFunction in a new ObjClosure. The rest of the VM now handles those
ObjClosures floating around. That&rsquo;s the boring stuff out of the way. Now we&rsquo;re
ready to make these closures actually <em>do</em> something.</p>
<h2><a href="#upvalues" id="upvalues"><small>25&#8202;.&#8202;2</small>Upvalues</a></h2>
<p>Our existing instructions for reading and writing local variables are limited to
a single function&rsquo;s stack window. Locals from a surrounding function are outside
of the inner function&rsquo;s window. We&rsquo;re going to need some new instructions.</p>
<p>The easiest approach might be an instruction that takes a relative stack slot
offset that can reach <em>before</em> the current function&rsquo;s window. That would work if
closed-over variables were always on the stack. But as we saw earlier, these
variables sometimes outlive the function where they are declared. That means
they won&rsquo;t always be on the stack.</p>
<p>The next easiest approach, then, would be to take any local variable that gets
closed over and have it always live on the heap. When the local variable
declaration in the surrounding function is executed, the VM would allocate
memory for it dynamically. That way it could live as long as needed.</p>
<p>This would be a fine approach if clox didn&rsquo;t have a single-pass compiler. But
that restriction we chose in our implementation makes things harder. Take a look
at this example:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="n">1</span>;    <span class="c">// (1)</span>
  <span class="i">x</span> = <span class="n">2</span>;        <span class="c">// (2)</span>
  <span class="k">fun</span> <span class="i">inner</span>() { <span class="c">// (3)</span>
    <span class="k">print</span> <span class="i">x</span>;
  }
  <span class="i">inner</span>();
}
</pre></div>
<p>Here, the compiler compiles the declaration of <code>x</code> at <code>(1)</code> and emits code for
the assignment at <code>(2)</code>. It does that before reaching the declaration of
<code>inner()</code> at <code>(3)</code> and discovering that <code>x</code> is in fact closed over. We don&rsquo;t
have an easy way to go back and fix that already-emitted code to treat <code>x</code>
specially. Instead, we want a solution that allows a closed-over variable to
live on the stack exactly like a normal local variable <em>until the point that it
is closed over</em>.</p>
<p>Fortunately, thanks to the Lua dev team, we have a solution. We use a level of
indirection that they call an <strong>upvalue</strong>. An upvalue refers to a local variable
in an enclosing function. Every closure maintains an array of upvalues, one for
each surrounding local variable that the closure uses.</p>
<p>The upvalue points back into the stack to where the variable it captured lives.
When the closure needs to access a closed-over variable, it goes through the
corresponding upvalue to reach it. When a function declaration is first executed
and we create a closure for it, the VM creates the array of upvalues and wires
them up to &ldquo;capture&rdquo; the surrounding local variables that the closure needs.</p>
<p>For example, if we throw this program at clox,</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span> = <span class="n">3</span>;
  <span class="k">fun</span> <span class="i">f</span>() {
    <span class="k">print</span> <span class="i">a</span>;
  }
}
</pre></div>
<p>the compiler and runtime will conspire together to build up a set of objects in
memory like this:</p><img src="image/closures/open-upvalue.png" alt="The object graph of the stack, ObjClosure, ObjFunction, and upvalue array."/>
<p>That might look overwhelming, but fear not. We&rsquo;ll work our way through it. The
important part is that upvalues serve as the layer of indirection needed to
continue to find a captured local variable even after it moves off the stack.
But before we get to all that, let&rsquo;s focus on compiling captured variables.</p>
<h3><a href="#compiling-upvalues" id="compiling-upvalues"><small>25&#8202;.&#8202;2&#8202;.&#8202;1</small>Compiling upvalues</a></h3>
<p>As usual, we want to do as much work as possible during compilation to keep
execution simple and fast. Since local variables are lexically scoped in Lox, we
have enough knowledge at compile time to resolve which surrounding local
variables a function accesses and where those locals are declared. That, in
turn, means we know <em>how many</em> upvalues a closure needs, <em>which</em> variables they
capture, and <em>which stack slots</em> contain those variables in the declaring
function&rsquo;s stack window.</p>
<p>Currently, when the compiler resolves an identifier, it walks the block scopes
for the current function from innermost to outermost. If we don&rsquo;t find the
variable in that function, we assume the variable must be a global. We don&rsquo;t
consider the local scopes of enclosing functions<span class="em">&mdash;</span>they get skipped right over.
The first change, then, is inserting a resolution step for those outer local
scopes.</p>
<div class="codehilite"><pre class="insert-before">  if (arg != -1) {
    getOp = OP_GET_LOCAL;
    setOp = OP_SET_LOCAL;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>namedVariable</em>()</div>
<pre class="insert">  } <span class="k">else</span> <span class="k">if</span> ((<span class="i">arg</span> = <span class="i">resolveUpvalue</span>(<span class="i">current</span>, &amp;<span class="i">name</span>)) != -<span class="n">1</span>) {
    <span class="i">getOp</span> = <span class="a">OP_GET_UPVALUE</span>;
    <span class="i">setOp</span> = <span class="a">OP_SET_UPVALUE</span>;
</pre><pre class="insert-after">  } else {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>namedVariable</em>()</div>

<p>This new <code>resolveUpvalue()</code> function looks for a local variable declared in any
of the surrounding functions. If it finds one, it returns an &ldquo;upvalue index&rdquo; for
that variable. (We&rsquo;ll get into what that means later.) Otherwise, it returns -1
to indicate the variable wasn&rsquo;t found. If it was found, we use these two new
instructions for reading or writing to the variable through its upvalue:</p>
<div class="codehilite"><pre class="insert-before">  OP_SET_GLOBAL,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_GET_UPVALUE</span>,
  <span class="a">OP_SET_UPVALUE</span>,
</pre><pre class="insert-after">  OP_EQUAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>We&rsquo;re implementing this sort of top-down, so I&rsquo;ll show you how these work at
runtime soon. The part to focus on now is how the compiler actually resolves the
identifier.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>resolveLocal</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">resolveUpvalue</span>(<span class="t">Compiler</span>* <span class="i">compiler</span>, <span class="t">Token</span>* <span class="i">name</span>) {
  <span class="k">if</span> (<span class="i">compiler</span>-&gt;<span class="i">enclosing</span> == <span class="a">NULL</span>) <span class="k">return</span> -<span class="n">1</span>;

  <span class="t">int</span> <span class="i">local</span> = <span class="i">resolveLocal</span>(<span class="i">compiler</span>-&gt;<span class="i">enclosing</span>, <span class="i">name</span>);
  <span class="k">if</span> (<span class="i">local</span> != -<span class="n">1</span>) {
    <span class="k">return</span> <span class="i">addUpvalue</span>(<span class="i">compiler</span>, (<span class="t">uint8_t</span>)<span class="i">local</span>, <span class="k">true</span>);
  }

  <span class="k">return</span> -<span class="n">1</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>resolveLocal</em>()</div>

<p>We call this after failing to resolve a local variable in the current function&rsquo;s
scope, so we know the variable isn&rsquo;t in the current compiler. Recall that
Compiler stores a pointer to the Compiler for the enclosing function, and these
pointers form a linked chain that goes all the way to the root Compiler for the
top-level code. Thus, if the enclosing Compiler is <code>NULL</code>, we know we&rsquo;ve reached
the outermost function without finding a local variable. The variable must be
<span name="undefined">global</span>, so we return -1.</p>
<aside name="undefined">
<p>It might end up being an entirely undefined variable and not even global. But in
Lox, we don&rsquo;t detect that error until runtime, so from the compiler&rsquo;s
perspective, it&rsquo;s &ldquo;hopefully global&rdquo;.</p>
</aside>
<p>Otherwise, we try to resolve the identifier as a <em>local</em> variable in the
<em>enclosing</em> compiler. In other words, we look for it right outside the current
function. For example:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="n">1</span>;
  <span class="k">fun</span> <span class="i">inner</span>() {
    <span class="k">print</span> <span class="i">x</span>; <span class="c">// (1)</span>
  }
  <span class="i">inner</span>();
}
</pre></div>
<p>When compiling the identifier expression at <code>(1)</code>, <code>resolveUpvalue()</code> looks for
a local variable <code>x</code> declared in <code>outer()</code>. If found<span class="em">&mdash;</span>like it is in this
example<span class="em">&mdash;</span>then we&rsquo;ve successfully resolved the variable. We create an upvalue
so that the inner function can access the variable through that. The upvalue is
created here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>resolveLocal</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">addUpvalue</span>(<span class="t">Compiler</span>* <span class="i">compiler</span>, <span class="t">uint8_t</span> <span class="i">index</span>,
                      <span class="t">bool</span> <span class="i">isLocal</span>) {
  <span class="t">int</span> <span class="i">upvalueCount</span> = <span class="i">compiler</span>-&gt;<span class="i">function</span>-&gt;<span class="i">upvalueCount</span>;
  <span class="i">compiler</span>-&gt;<span class="i">upvalues</span>[<span class="i">upvalueCount</span>].<span class="i">isLocal</span> = <span class="i">isLocal</span>;
  <span class="i">compiler</span>-&gt;<span class="i">upvalues</span>[<span class="i">upvalueCount</span>].<span class="i">index</span> = <span class="i">index</span>;
  <span class="k">return</span> <span class="i">compiler</span>-&gt;<span class="i">function</span>-&gt;<span class="i">upvalueCount</span>++;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>resolveLocal</em>()</div>

<p>The compiler keeps an array of upvalue structures to track the closed-over
identifiers that it has resolved in the body of each function. Remember how the
compiler&rsquo;s Local array mirrors the stack slot indexes where locals live at
runtime? This new upvalue array works the same way. The indexes in the
compiler&rsquo;s array match the indexes where upvalues will live in the ObjClosure at
runtime.</p>
<p>This function adds a new upvalue to that array. It also keeps track of the
number of upvalues the function uses. It stores that count directly in the
ObjFunction itself because we&rsquo;ll also <span name="bridge">need</span> that
number for use at runtime.</p>
<aside name="bridge">
<p>Like constants and function arity, the upvalue count is another one of those
little pieces of data that form the bridge between the compiler and runtime.</p>
</aside>
<p>The <code>index</code> field tracks the closed-over local variable&rsquo;s slot index. That way
the compiler knows <em>which</em> variable in the enclosing function needs to be
captured. We&rsquo;ll circle back to what that <code>isLocal</code> field is for before too long.
Finally, <code>addUpvalue()</code> returns the index of the created upvalue in the
function&rsquo;s upvalue list. That index becomes the operand to the <code>OP_GET_UPVALUE</code>
and <code>OP_SET_UPVALUE</code> instructions.</p>
<p>That&rsquo;s the basic idea for resolving upvalues, but the function isn&rsquo;t fully
baked. A closure may reference the same variable in a surrounding function
multiple times. In that case, we don&rsquo;t want to waste time and memory creating a
separate upvalue for each identifier expression. To fix that, before we add a
new upvalue, we first check to see if the function already has an upvalue that
closes over that variable.</p>
<div class="codehilite"><pre class="insert-before">  int upvalueCount = compiler-&gt;function-&gt;upvalueCount;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>addUpvalue</em>()</div>
<pre class="insert">

  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">upvalueCount</span>; <span class="i">i</span>++) {
    <span class="t">Upvalue</span>* <span class="i">upvalue</span> = &amp;<span class="i">compiler</span>-&gt;<span class="i">upvalues</span>[<span class="i">i</span>];
    <span class="k">if</span> (<span class="i">upvalue</span>-&gt;<span class="i">index</span> == <span class="i">index</span> &amp;&amp; <span class="i">upvalue</span>-&gt;<span class="i">isLocal</span> == <span class="i">isLocal</span>) {
      <span class="k">return</span> <span class="i">i</span>;
    }
  }

</pre><pre class="insert-after">  compiler-&gt;upvalues[upvalueCount].isLocal = isLocal;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>addUpvalue</em>()</div>

<p>If we find an upvalue in the array whose slot index matches the one we&rsquo;re
adding, we just return that <em>upvalue</em> index and reuse it. Otherwise, we fall
through and add the new upvalue.</p>
<p>These two functions access and modify a bunch of new state, so let&rsquo;s define
that. First, we add the upvalue count to ObjFunction.</p>
<div class="codehilite"><pre class="insert-before">  int arity;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>ObjFunction</em></div>
<pre class="insert">  <span class="t">int</span> <span class="i">upvalueCount</span>;
</pre><pre class="insert-after">  Chunk chunk;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>ObjFunction</em></div>

<p>We&rsquo;re conscientious C programmers, so we zero-initialize that when an
ObjFunction is first allocated.</p>
<div class="codehilite"><pre class="insert-before">  function-&gt;arity = 0;
</pre><div class="source-file"><em>object.c</em><br>
in <em>newFunction</em>()</div>
<pre class="insert">  <span class="i">function</span>-&gt;<span class="i">upvalueCount</span> = <span class="n">0</span>;
</pre><pre class="insert-after">  function-&gt;name = NULL;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>newFunction</em>()</div>

<p>In the compiler, we add a field for the upvalue array.</p>
<div class="codehilite"><pre class="insert-before">  int localCount;
</pre><div class="source-file"><em>compiler.c</em><br>
in struct <em>Compiler</em></div>
<pre class="insert">  <span class="t">Upvalue</span> <span class="i">upvalues</span>[<span class="a">UINT8_COUNT</span>];
</pre><pre class="insert-after">  int scopeDepth;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in struct <em>Compiler</em></div>

<p>For simplicity, I gave it a fixed size. The <code>OP_GET_UPVALUE</code> and
<code>OP_SET_UPVALUE</code> instructions encode an upvalue index using a single byte
operand, so there&rsquo;s a restriction on how many upvalues a function can have<span class="em">&mdash;</span>how many unique variables it can close over. Given that, we can afford a static
array that large. We also need to make sure the compiler doesn&rsquo;t overflow that
limit.</p>
<div class="codehilite"><pre class="insert-before">    if (upvalue-&gt;index == index &amp;&amp; upvalue-&gt;isLocal == isLocal) {
      return i;
    }
  }

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>addUpvalue</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">upvalueCount</span> == <span class="a">UINT8_COUNT</span>) {
    <span class="i">error</span>(<span class="s">&quot;Too many closure variables in function.&quot;</span>);
    <span class="k">return</span> <span class="n">0</span>;
  }

</pre><pre class="insert-after">  compiler-&gt;upvalues[upvalueCount].isLocal = isLocal;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>addUpvalue</em>()</div>

<p>Finally, the Upvalue struct type itself.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after struct <em>Local</em></div>
<pre><span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">uint8_t</span> <span class="i">index</span>;
  <span class="t">bool</span> <span class="i">isLocal</span>;
} <span class="t">Upvalue</span>;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after struct <em>Local</em></div>

<p>The <code>index</code> field stores which local slot the upvalue is capturing. The
<code>isLocal</code> field deserves its own section, which we&rsquo;ll get to next.</p>
<h3><a href="#flattening-upvalues" id="flattening-upvalues"><small>25&#8202;.&#8202;2&#8202;.&#8202;2</small>Flattening upvalues</a></h3>
<p>In the example I showed before, the closure is accessing a variable declared in
the immediately enclosing function. Lox also supports accessing local variables
declared in <em>any</em> enclosing scope, as in:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="n">1</span>;
  <span class="k">fun</span> <span class="i">middle</span>() {
    <span class="k">fun</span> <span class="i">inner</span>() {
      <span class="k">print</span> <span class="i">x</span>;
    }
  }
}
</pre></div>
<p>Here, we&rsquo;re accessing <code>x</code> in <code>inner()</code>. That variable is defined not in
<code>middle()</code>, but all the way out in <code>outer()</code>. We need to handle cases like this
too. You <em>might</em> think that this isn&rsquo;t much harder since the variable will
simply be somewhere farther down on the stack. But consider this <span
name="devious">devious</span> example:</p>
<aside name="devious">
<p>If you work on programming languages long enough, you will develop a
finely honed skill at creating bizarre programs like this that are technically
valid but likely to trip up an implementation written by someone with a less
perverse imagination than you.</p>
</aside>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="s">&quot;value&quot;</span>;
  <span class="k">fun</span> <span class="i">middle</span>() {
    <span class="k">fun</span> <span class="i">inner</span>() {
      <span class="k">print</span> <span class="i">x</span>;
    }

    <span class="k">print</span> <span class="s">&quot;create inner closure&quot;</span>;
    <span class="k">return</span> <span class="i">inner</span>;
  }

  <span class="k">print</span> <span class="s">&quot;return from outer&quot;</span>;
  <span class="k">return</span> <span class="i">middle</span>;
}

<span class="k">var</span> <span class="i">mid</span> = <span class="i">outer</span>();
<span class="k">var</span> <span class="i">in</span> = <span class="i">mid</span>();
<span class="i">in</span>();
</pre></div>
<p>When you run this, it should print:</p>
<div class="codehilite"><pre>return from outer
create inner closure
value
</pre></div>
<p>I know, it&rsquo;s convoluted. The important part is that <code>outer()</code><span class="em">&mdash;</span>where <code>x</code> is
declared<span class="em">&mdash;</span>returns and pops all of its variables off the stack before the
<em>declaration</em> of <code>inner()</code> executes. So, at the point in time that we create the
closure for <code>inner()</code>, <code>x</code> is already off the stack.</p>
<p>Here, I traced out the execution flow for you:</p><img src="image/closures/execution-flow.png" alt="Tracing through the previous example program."/>
<p>See how <code>x</code> is popped &#9312; before it is captured &#9313; and then later
accessed &#9314;? We really have two problems:</p>
<ol>
<li>
<p>We need to resolve local variables that are declared in surrounding
functions beyond the immediately enclosing one.</p>
</li>
<li>
<p>We need to be able to capture variables that have already left the stack.</p>
</li>
</ol>
<p>Fortunately, we&rsquo;re in the middle of adding upvalues to the VM, and upvalues are
explicitly designed for tracking variables that have escaped the stack. So, in a
clever bit of self-reference, we can use upvalues to allow upvalues to capture
variables declared outside of the immediately surrounding function.</p>
<p>The solution is to allow a closure to capture either a local variable or <em>an
existing upvalue</em> in the immediately enclosing function. If a deeply nested
function references a local variable declared several hops away, we&rsquo;ll thread it
through all of the intermediate functions by having each function capture an
upvalue for the next function to grab.</p><img src="image/closures/linked-upvalues.png" alt="An upvalue in inner() points to an upvalue in middle(), which points to a local variable in outer()."/>
<p>In the above example, <code>middle()</code> captures the local variable <code>x</code> in the
immediately enclosing function <code>outer()</code> and stores it in its own upvalue. It
does this even though <code>middle()</code> itself doesn&rsquo;t reference <code>x</code>. Then, when the
declaration of <code>inner()</code> executes, its closure grabs the <em>upvalue</em> from the
ObjClosure for <code>middle()</code> that captured <code>x</code>. A function captures<span class="em">&mdash;</span>either a
local or upvalue<span class="em">&mdash;</span><em>only</em> from the immediately surrounding function, which is
guaranteed to still be around at the point that the inner function declaration
executes.</p>
<p>In order to implement this, <code>resolveUpvalue()</code> becomes recursive.</p>
<div class="codehilite"><pre class="insert-before">  if (local != -1) {
    return addUpvalue(compiler, (uint8_t)local, true);
  }

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>resolveUpvalue</em>()</div>
<pre class="insert">  <span class="t">int</span> <span class="i">upvalue</span> = <span class="i">resolveUpvalue</span>(<span class="i">compiler</span>-&gt;<span class="i">enclosing</span>, <span class="i">name</span>);
  <span class="k">if</span> (<span class="i">upvalue</span> != -<span class="n">1</span>) {
    <span class="k">return</span> <span class="i">addUpvalue</span>(<span class="i">compiler</span>, (<span class="t">uint8_t</span>)<span class="i">upvalue</span>, <span class="k">false</span>);
  }

</pre><pre class="insert-after">  return -1;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>resolveUpvalue</em>()</div>

<p>It&rsquo;s only another three lines of code, but I found this function really
challenging to get right the first time. This in spite of the fact that I wasn&rsquo;t
inventing anything new, just porting the concept over from Lua. Most recursive
functions either do all their work before the recursive call (a <strong>pre-order
traversal</strong>, or &ldquo;on the way down&rdquo;), or they do all the work after the recursive
call (a <strong>post-order traversal</strong>, or &ldquo;on the way back up&rdquo;). This function does
both. The recursive call is right in the middle.</p>
<p>We&rsquo;ll walk through it slowly. First, we look for a matching local variable in
the enclosing function. If we find one, we capture that local and return. That&rsquo;s
the <span name="base">base</span> case.</p>
<aside name="base">
<p>The other base case, of course, is if there is no enclosing function. In that
case, the variable can&rsquo;t be resolved lexically and is treated as global.</p>
</aside>
<p>Otherwise, we look for a local variable beyond the immediately enclosing
function. We do that by recursively calling <code>resolveUpvalue()</code> on the
<em>enclosing</em> compiler, not the current one. This series of <code>resolveUpvalue()</code>
calls works its way along the chain of nested compilers until it hits one of
the base cases<span class="em">&mdash;</span>either it finds an actual local variable to capture or it
runs out of compilers.</p>
<p>When a local variable is found, the most deeply <span name="outer">nested</span>
call to <code>resolveUpvalue()</code> captures it and returns the upvalue index. That
returns to the next call for the inner function declaration. That call captures
the <em>upvalue</em> from the surrounding function, and so on. As each nested call to
<code>resolveUpvalue()</code> returns, we drill back down into the innermost function
declaration where the identifier we are resolving appears. At each step along
the way, we add an upvalue to the intervening function and pass the resulting
upvalue index down to the next call.</p>
<aside name="outer">
<p>Each recursive call to <code>resolveUpvalue()</code> walks <em>out</em> one level of function
nesting. So an inner <em>recursive call</em> refers to an <em>outer</em> nested declaration.
The innermost recursive call to <code>resolveUpvalue()</code> that finds the local variable
will be for the <em>outermost</em> function, just inside the enclosing function where
that variable is actually declared.</p>
</aside>
<p>It might help to walk through the original example when resolving <code>x</code>:</p><img src="image/closures/recursion.png" alt="Tracing through a recursive call to resolveUpvalue()."/>
<p>Note that the new call to <code>addUpvalue()</code> passes <code>false</code> for the <code>isLocal</code>
parameter. Now you see that that flag controls whether the closure captures a
local variable or an upvalue from the surrounding function.</p>
<p>By the time the compiler reaches the end of a function declaration, every
variable reference has been resolved as either a local, an upvalue, or a global.
Each upvalue may in turn capture a local variable from the surrounding function,
or an upvalue in the case of transitive closures. We finally have enough data to
emit bytecode which creates a closure at runtime that captures all of the
correct variables.</p>
<div class="codehilite"><pre class="insert-before">  emitBytes(OP_CLOSURE, makeConstant(OBJ_VAL(function)));
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>function</em>()</div>
<pre class="insert">

  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">function</span>-&gt;<span class="i">upvalueCount</span>; <span class="i">i</span>++) {
    <span class="i">emitByte</span>(<span class="i">compiler</span>.<span class="i">upvalues</span>[<span class="i">i</span>].<span class="i">isLocal</span> ? <span class="n">1</span> : <span class="n">0</span>);
    <span class="i">emitByte</span>(<span class="i">compiler</span>.<span class="i">upvalues</span>[<span class="i">i</span>].<span class="i">index</span>);
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>function</em>()</div>

<p>The <code>OP_CLOSURE</code> instruction is unique in that it has a variably sized encoding.
For each upvalue the closure captures, there are two single-byte operands. Each
pair of operands specifies what that upvalue captures. If the first byte is one,
it captures a local variable in the enclosing function. If zero, it captures one
of the function&rsquo;s upvalues. The next byte is the local slot or upvalue index to
capture.</p>
<p>This odd encoding means we need some bespoke support in the disassembly code
for <code>OP_CLOSURE</code>.</p>
<div class="codehilite"><pre class="insert-before">      printf(&quot;\n&quot;);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">

      <span class="t">ObjFunction</span>* <span class="i">function</span> = <span class="a">AS_FUNCTION</span>(
          <span class="i">chunk</span>-&gt;<span class="i">constants</span>.<span class="i">values</span>[<span class="i">constant</span>]);
      <span class="k">for</span> (<span class="t">int</span> <span class="i">j</span> = <span class="n">0</span>; <span class="i">j</span> &lt; <span class="i">function</span>-&gt;<span class="i">upvalueCount</span>; <span class="i">j</span>++) {
        <span class="t">int</span> <span class="i">isLocal</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span>++];
        <span class="t">int</span> <span class="i">index</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span>++];
        <span class="i">printf</span>(<span class="s">&quot;%04d      |                     %s %d</span><span class="e">\n</span><span class="s">&quot;</span>,
               <span class="i">offset</span> - <span class="n">2</span>, <span class="i">isLocal</span> ? <span class="s">&quot;local&quot;</span> : <span class="s">&quot;upvalue&quot;</span>, <span class="i">index</span>);
      }

</pre><pre class="insert-after">      return offset;
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>For example, take this script:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
  <span class="k">var</span> <span class="i">b</span> = <span class="n">2</span>;
  <span class="k">fun</span> <span class="i">middle</span>() {
    <span class="k">var</span> <span class="i">c</span> = <span class="n">3</span>;
    <span class="k">var</span> <span class="i">d</span> = <span class="n">4</span>;
    <span class="k">fun</span> <span class="i">inner</span>() {
      <span class="k">print</span> <span class="i">a</span> + <span class="i">c</span> + <span class="i">b</span> + <span class="i">d</span>;
    }
  }
}
</pre></div>
<p>If we disassemble the instruction that creates the closure for <code>inner()</code>, it
prints this:</p>
<div class="codehilite"><pre>0004    9 OP_CLOSURE          2 &lt;fn inner&gt;
0006      |                     upvalue 0
0008      |                     local 1
0010      |                     upvalue 1
0012      |                     local 2
</pre></div>
<p>We have two other, simpler instructions to add disassembler support for.</p>
<div class="codehilite"><pre class="insert-before">    case OP_SET_GLOBAL:
      return constantInstruction(&quot;OP_SET_GLOBAL&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_GET_UPVALUE</span>:
      <span class="k">return</span> <span class="i">byteInstruction</span>(<span class="s">&quot;OP_GET_UPVALUE&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_SET_UPVALUE</span>:
      <span class="k">return</span> <span class="i">byteInstruction</span>(<span class="s">&quot;OP_SET_UPVALUE&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_EQUAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>These both have a single-byte operand, so there&rsquo;s nothing exciting going on. We
do need to add an include so the debug module can get to <code>AS_FUNCTION()</code>.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;debug.h&quot;
</pre><div class="source-file"><em>debug.c</em></div>
<pre class="insert"><span class="a">#include &quot;object.h&quot;</span>
</pre><pre class="insert-after">#include &quot;value.h&quot;
</pre></div>
<div class="source-file-narrow"><em>debug.c</em></div>

<p>With that, our compiler is where we want it. For each function declaration, it
outputs an <code>OP_CLOSURE</code> instruction followed by a series of operand byte pairs
for each upvalue it needs to capture at runtime. It&rsquo;s time to hop over to that
side of the VM and get things running.</p>
<h2><a href="#upvalue-objects" id="upvalue-objects"><small>25&#8202;.&#8202;3</small>Upvalue Objects</a></h2>
<p>Each <code>OP_CLOSURE</code> instruction is now followed by the series of bytes that
specify the upvalues the ObjClosure should own. Before we process those
operands, we need a runtime representation for upvalues.</p>
<div class="codehilite"><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjString</em></div>
<pre><span class="k">typedef</span> <span class="k">struct</span> <span class="t">ObjUpvalue</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">Value</span>* <span class="i">location</span>;
} <span class="t">ObjUpvalue</span>;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjString</em></div>

<p>We know upvalues must manage closed-over variables that no longer live on the
stack, which implies some amount of dynamic allocation. The easiest way to do
that in our VM is by building on the object system we already have. That way,
when we implement a garbage collector in <a href="garbage-collection.html">the next chapter</a>, the GC can
manage memory for upvalues too.</p>
<p>Thus, our runtime upvalue structure is an ObjUpvalue with the typical Obj header
field. Following that is a <code>location</code> field that points to the closed-over
variable. Note that this is a <em>pointer</em> to a Value, not a Value itself. It&rsquo;s a
reference to a <em>variable</em>, not a <em>value</em>. This is important because it means
that when we assign to the variable the upvalue captures, we&rsquo;re assigning to the
actual variable, not a copy. For example:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="s">&quot;before&quot;</span>;
  <span class="k">fun</span> <span class="i">inner</span>() {
    <span class="i">x</span> = <span class="s">&quot;assigned&quot;</span>;
  }
  <span class="i">inner</span>();
  <span class="k">print</span> <span class="i">x</span>;
}
<span class="i">outer</span>();
</pre></div>
<p>This program should print &ldquo;assigned&rdquo; even though the closure assigns to <code>x</code> and
the surrounding function accesses it.</p>
<p>Because upvalues are objects, we&rsquo;ve got all the usual object machinery, starting
with a constructor-like function:</p>
<div class="codehilite"><pre class="insert-before">ObjString* copyString(const char* chars, int length);
</pre><div class="source-file"><em>object.h</em><br>
add after <em>copyString</em>()</div>
<pre class="insert"><span class="t">ObjUpvalue</span>* <span class="i">newUpvalue</span>(<span class="t">Value</span>* <span class="i">slot</span>);
</pre><pre class="insert-after">void printObject(Value value);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after <em>copyString</em>()</div>

<p>It takes the address of the slot where the closed-over variable lives. Here is
the implementation:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>copyString</em>()</div>
<pre><span class="t">ObjUpvalue</span>* <span class="i">newUpvalue</span>(<span class="t">Value</span>* <span class="i">slot</span>) {
  <span class="t">ObjUpvalue</span>* <span class="i">upvalue</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjUpvalue</span>, <span class="a">OBJ_UPVALUE</span>);
  <span class="i">upvalue</span>-&gt;<span class="i">location</span> = <span class="i">slot</span>;
  <span class="k">return</span> <span class="i">upvalue</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>copyString</em>()</div>

<p>We simply initialize the object and store the pointer. That requires a new
object type.</p>
<div class="codehilite"><pre class="insert-before">  OBJ_STRING,
</pre><div class="source-file"><em>object.h</em><br>
in enum <em>ObjType</em></div>
<pre class="insert">  <span class="a">OBJ_UPVALUE</span>
</pre><pre class="insert-after">} ObjType;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in enum <em>ObjType</em></div>

<p>And on the back side, a destructor-like function:</p>
<div class="codehilite"><pre class="insert-before">      FREE(ObjString, object);
      break;
    }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_UPVALUE</span>:
      <span class="a">FREE</span>(<span class="t">ObjUpvalue</span>, <span class="i">object</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>Multiple closures can close over the same variable, so ObjUpvalue does not own
the variable it references. Thus, the only thing to free is the ObjUpvalue
itself.</p>
<p>And, finally, to print:</p>
<div class="codehilite"><pre class="insert-before">    case OBJ_STRING:
      printf(&quot;%s&quot;, AS_CSTRING(value));
      break;
</pre><div class="source-file"><em>object.c</em><br>
in <em>printObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_UPVALUE</span>:
      <span class="i">printf</span>(<span class="s">&quot;upvalue&quot;</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printObject</em>()</div>

<p>Printing isn&rsquo;t useful to end users. Upvalues are objects only so that we can
take advantage of the VM&rsquo;s memory management. They aren&rsquo;t first-class values
that a Lox user can directly access in a program. So this code will never
actually execute<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>but it keeps the compiler from yelling at us about an
unhandled switch case, so here we are.</p>
<h3><a href="#upvalues-in-closures" id="upvalues-in-closures"><small>25&#8202;.&#8202;3&#8202;.&#8202;1</small>Upvalues in closures</a></h3>
<p>When I first introduced upvalues, I said each closure has an array of them.
We&rsquo;ve finally worked our way back to implementing that.</p>
<div class="codehilite"><pre class="insert-before">  ObjFunction* function;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>ObjClosure</em></div>
<pre class="insert">  <span class="t">ObjUpvalue</span>** <span class="i">upvalues</span>;
  <span class="t">int</span> <span class="i">upvalueCount</span>;
</pre><pre class="insert-after">} ObjClosure;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>ObjClosure</em></div>

<p><span name="count">Different</span> closures may have different numbers of
upvalues, so we need a dynamic array. The upvalues themselves are dynamically
allocated too, so we end up with a double pointer<span class="em">&mdash;</span>a pointer to a dynamically
allocated array of pointers to upvalues. We also store the number of elements in
the array.</p>
<aside name="count">
<p>Storing the upvalue count in the closure is redundant because the ObjFunction
that the ObjClosure references also keeps that count. As usual, this weird code
is to appease the GC. The collector may need to know an ObjClosure&rsquo;s upvalue
array size after the closure&rsquo;s corresponding ObjFunction has already been freed.</p>
</aside>
<p>When we create an ObjClosure, we allocate an upvalue array of the proper size,
which we determined at compile time and stored in the ObjFunction.</p>
<div class="codehilite"><pre class="insert-before">ObjClosure* newClosure(ObjFunction* function) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>newClosure</em>()</div>
<pre class="insert">  <span class="t">ObjUpvalue</span>** <span class="i">upvalues</span> = <span class="a">ALLOCATE</span>(<span class="t">ObjUpvalue</span>*,
                                   <span class="i">function</span>-&gt;<span class="i">upvalueCount</span>);
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">function</span>-&gt;<span class="i">upvalueCount</span>; <span class="i">i</span>++) {
    <span class="i">upvalues</span>[<span class="i">i</span>] = <span class="a">NULL</span>;
  }

</pre><pre class="insert-after">  ObjClosure* closure = ALLOCATE_OBJ(ObjClosure, OBJ_CLOSURE);
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>newClosure</em>()</div>

<p>Before creating the closure object itself, we allocate the array of upvalues and
initialize them all to <code>NULL</code>. This weird ceremony around memory is a careful
dance to please the (forthcoming) garbage collection deities. It ensures the
memory manager never sees uninitialized memory.</p>
<p>Then we store the array in the new closure, as well as copy the count over from
the ObjFunction.</p>
<div class="codehilite"><pre class="insert-before">  closure-&gt;function = function;
</pre><div class="source-file"><em>object.c</em><br>
in <em>newClosure</em>()</div>
<pre class="insert">  <span class="i">closure</span>-&gt;<span class="i">upvalues</span> = <span class="i">upvalues</span>;
  <span class="i">closure</span>-&gt;<span class="i">upvalueCount</span> = <span class="i">function</span>-&gt;<span class="i">upvalueCount</span>;
</pre><pre class="insert-after">  return closure;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>newClosure</em>()</div>

<p>When we free an ObjClosure, we also free the upvalue array.</p>
<div class="codehilite"><pre class="insert-before">    case OBJ_CLOSURE: {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">      <span class="t">ObjClosure</span>* <span class="i">closure</span> = (<span class="t">ObjClosure</span>*)<span class="i">object</span>;
      <span class="a">FREE_ARRAY</span>(<span class="t">ObjUpvalue</span>*, <span class="i">closure</span>-&gt;<span class="i">upvalues</span>,
                 <span class="i">closure</span>-&gt;<span class="i">upvalueCount</span>);
</pre><pre class="insert-after">      FREE(ObjClosure, object);
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>ObjClosure does not own the ObjUpvalue objects themselves, but it does own <em>the
array</em> containing pointers to those upvalues.</p>
<p>We fill the upvalue array over in the interpreter when it creates a closure.
This is where we walk through all of the operands after <code>OP_CLOSURE</code> to see what
kind of upvalue each slot captures.</p>
<div class="codehilite"><pre class="insert-before">        push(OBJ_VAL(closure));
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">        <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">closure</span>-&gt;<span class="i">upvalueCount</span>; <span class="i">i</span>++) {
          <span class="t">uint8_t</span> <span class="i">isLocal</span> = <span class="a">READ_BYTE</span>();
          <span class="t">uint8_t</span> <span class="i">index</span> = <span class="a">READ_BYTE</span>();
          <span class="k">if</span> (<span class="i">isLocal</span>) {
            <span class="i">closure</span>-&gt;<span class="i">upvalues</span>[<span class="i">i</span>] =
                <span class="i">captureUpvalue</span>(<span class="i">frame</span>-&gt;<span class="i">slots</span> + <span class="i">index</span>);
          } <span class="k">else</span> {
            <span class="i">closure</span>-&gt;<span class="i">upvalues</span>[<span class="i">i</span>] = <span class="i">frame</span>-&gt;<span class="i">closure</span>-&gt;<span class="i">upvalues</span>[<span class="i">index</span>];
          }
        }
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>This code is the magic moment when a closure comes to life. We iterate over each
upvalue the closure expects. For each one, we read a pair of operand bytes. If
the upvalue closes over a local variable in the enclosing function, we let
<code>captureUpvalue()</code> do the work.</p>
<p>Otherwise, we capture an upvalue from the surrounding function. An <code>OP_CLOSURE</code>
instruction is emitted at the end of a function declaration. At the moment that
we are executing that declaration, the <em>current</em> function is the surrounding
one. That means the current function&rsquo;s closure is stored in the CallFrame at the
top of the callstack. So, to grab an upvalue from the enclosing function, we can
read it right from the <code>frame</code> local variable, which caches a reference to that
CallFrame.</p>
<p>Closing over a local variable is more interesting. Most of the work happens in a
separate function, but first we calculate the argument to pass to it. We need to
grab a pointer to the captured local&rsquo;s slot in the surrounding function&rsquo;s stack
window. That window begins at <code>frame-&gt;slots</code>, which points to slot zero. Adding
<code>index</code> offsets that to the local slot we want to capture. We pass that pointer
here:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>callValue</em>()</div>
<pre><span class="k">static</span> <span class="t">ObjUpvalue</span>* <span class="i">captureUpvalue</span>(<span class="t">Value</span>* <span class="i">local</span>) {
  <span class="t">ObjUpvalue</span>* <span class="i">createdUpvalue</span> = <span class="i">newUpvalue</span>(<span class="i">local</span>);
  <span class="k">return</span> <span class="i">createdUpvalue</span>;
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>callValue</em>()</div>

<p>This seems a little silly. All it does is create a new ObjUpvalue that captures
the given stack slot and returns it. Did we need a separate function for this?
Well, no, not <em>yet</em>. But you know we are going to end up sticking more code in
here.</p>
<p>First, let&rsquo;s wrap up what we&rsquo;re working on. Back in the interpreter code for
handling <code>OP_CLOSURE</code>, we eventually finish iterating through the upvalue
array and initialize each one. When that completes, we have a new closure with
an array full of upvalues pointing to variables.</p>
<p>With that in hand, we can implement the instructions that work with those
upvalues.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_GET_UPVALUE</span>: {
        <span class="t">uint8_t</span> <span class="i">slot</span> = <span class="a">READ_BYTE</span>();
        <span class="i">push</span>(*<span class="i">frame</span>-&gt;<span class="i">closure</span>-&gt;<span class="i">upvalues</span>[<span class="i">slot</span>]-&gt;<span class="i">location</span>);
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>The operand is the index into the current function&rsquo;s upvalue array. So we simply
look up the corresponding upvalue and dereference its location pointer to read
the value in that slot. Setting a variable is similar.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_SET_UPVALUE</span>: {
        <span class="t">uint8_t</span> <span class="i">slot</span> = <span class="a">READ_BYTE</span>();
        *<span class="i">frame</span>-&gt;<span class="i">closure</span>-&gt;<span class="i">upvalues</span>[<span class="i">slot</span>]-&gt;<span class="i">location</span> = <span class="i">peek</span>(<span class="n">0</span>);
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We <span name="assign">take</span> the value on top of the stack and store it
into the slot pointed to by the chosen upvalue. Just as with the instructions
for local variables, it&rsquo;s important that these instructions are fast. User
programs are constantly reading and writing variables, so if that&rsquo;s slow,
everything is slow. And, as usual, the way we make them fast is by keeping them
simple. These two new instructions are pretty good: no control flow, no complex
arithmetic, just a couple of pointer indirections and a <code>push()</code>.</p>
<aside name="assign">
<p>The set instruction doesn&rsquo;t <em>pop</em> the value from the stack because, remember,
assignment is an expression in Lox. So the result of the assignment<span class="em">&mdash;</span>the
assigned value<span class="em">&mdash;</span>needs to remain on the stack for the surrounding expression.</p>
</aside>
<p>This is a milestone. As long as all of the variables remain on the stack, we
have working closures. Try this:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="s">&quot;outside&quot;</span>;
  <span class="k">fun</span> <span class="i">inner</span>() {
    <span class="k">print</span> <span class="i">x</span>;
  }
  <span class="i">inner</span>();
}
<span class="i">outer</span>();
</pre></div>
<p>Run this, and it correctly prints &ldquo;outside&rdquo;.</p>
<h2><a href="#closed-upvalues" id="closed-upvalues"><small>25&#8202;.&#8202;4</small>Closed Upvalues</a></h2>
<p>Of course, a key feature of closures is that they hold on to the variable as
long as needed, even after the function that declares the variable has returned.
Here&rsquo;s another example that <em>should</em> work:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outer</span>() {
  <span class="k">var</span> <span class="i">x</span> = <span class="s">&quot;outside&quot;</span>;
  <span class="k">fun</span> <span class="i">inner</span>() {
    <span class="k">print</span> <span class="i">x</span>;
  }

  <span class="k">return</span> <span class="i">inner</span>;
}

<span class="k">var</span> <span class="i">closure</span> = <span class="i">outer</span>();
<span class="i">closure</span>();
</pre></div>
<p>But if you run it right now<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>who knows what it does? At runtime, it will end
up reading from a stack slot that no longer contains the closed-over variable.
Like I&rsquo;ve mentioned a few times, the crux of the issue is that variables in
closures don&rsquo;t have stack semantics. That means we&rsquo;ve got to hoist them off the
stack when the function where they were declared returns. This final section of
the chapter does that.</p>
<h3><a href="#values-and-variables" id="values-and-variables"><small>25&#8202;.&#8202;4&#8202;.&#8202;1</small>Values and variables</a></h3>
<p>Before we get to writing code, I want to dig into an important semantic point.
Does a closure close over a <em>value</em> or a <em>variable?</em> This isn&rsquo;t purely an <span
name="academic">academic</span> question. I&rsquo;m not just splitting hairs.
Consider:</p>
<aside name="academic">
<p>If Lox didn&rsquo;t allow assignment, it <em>would</em> be an academic question.</p>
</aside>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">globalSet</span>;
<span class="k">var</span> <span class="i">globalGet</span>;

<span class="k">fun</span> <span class="i">main</span>() {
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;initial&quot;</span>;

  <span class="k">fun</span> <span class="i">set</span>() { <span class="i">a</span> = <span class="s">&quot;updated&quot;</span>; }
  <span class="k">fun</span> <span class="i">get</span>() { <span class="k">print</span> <span class="i">a</span>; }

  <span class="i">globalSet</span> = <span class="i">set</span>;
  <span class="i">globalGet</span> = <span class="i">get</span>;
}

<span class="i">main</span>();
<span class="i">globalSet</span>();
<span class="i">globalGet</span>();
</pre></div>
<p>The outer <code>main()</code> function creates two closures and stores them in <span
name="global">global</span> variables so that they outlive the execution of
<code>main()</code> itself. Both of those closures capture the same variable. The first
closure assigns a new value to it and the second closure reads the variable.</p>
<aside name="global">
<p>The fact that I&rsquo;m using a couple of global variables isn&rsquo;t significant. I needed
some way to return two values from a function, and without any kind of
collection type in Lox, my options were limited.</p>
</aside>
<p>What does the call to <code>globalGet()</code> print? If closures capture <em>values</em> then
each closure gets its own copy of <code>a</code> with the value that <code>a</code> had at the point
in time that the closure&rsquo;s function declaration executed. The call to
<code>globalSet()</code> will modify <code>set()</code>&rsquo;s copy of <code>a</code>, but <code>get()</code>&rsquo;s copy will be
unaffected. Thus, the call to <code>globalGet()</code> will print &ldquo;initial&rdquo;.</p>
<p>If closures close over variables, then <code>get()</code> and <code>set()</code> will both capture<span class="em">&mdash;</span>reference<span class="em">&mdash;</span>the <em>same mutable variable</em>. When <code>set()</code> changes <code>a</code>, it changes
the same <code>a</code> that <code>get()</code> reads from. There is only one <code>a</code>. That, in turn,
implies the call to <code>globalGet()</code> will print &ldquo;updated&rdquo;.</p>
<p>Which is it? The answer for Lox and most other languages I know with closures is
the latter. Closures capture variables. You can think of them as capturing <em>the
place the value lives</em>. This is important to keep in mind as we deal with
closed-over variables that are no longer on the stack. When a variable moves to
the heap, we need to ensure that all closures capturing that variable retain a
reference to its <em>one</em> new location. That way, when the variable is mutated, all
closures see the change.</p>
<h3><a href="#closing-upvalues" id="closing-upvalues"><small>25&#8202;.&#8202;4&#8202;.&#8202;2</small>Closing upvalues</a></h3>
<p>We know that local variables always start out on the stack. This is faster, and
lets our single-pass compiler emit code before it discovers the variable has
been captured. We also know that closed-over variables need to move to the heap
if the closure outlives the function where the captured variable is declared.</p>
<p>Following Lua, we&rsquo;ll use <strong>open upvalue</strong> to refer to an upvalue that points to
a local variable still on the stack. When a variable moves to the heap, we are
<em>closing</em> the upvalue and the result is, naturally, a <strong>closed upvalue</strong>. The
two questions we need to answer are:</p>
<ol>
<li>
<p>Where on the heap does the closed-over variable go?</p>
</li>
<li>
<p>When do we close the upvalue?</p>
</li>
</ol>
<p>The answer to the first question is easy. We already have a convenient object on
the heap that represents a reference to a variable<span class="em">&mdash;</span>ObjUpvalue itself. The
closed-over variable will move into a new field right inside the ObjUpvalue
struct. That way we don&rsquo;t need to do any additional heap allocation to close an
upvalue.</p>
<p>The second question is straightforward too. As long as the variable is on the
stack, there may be code that refers to it there, and that code must work
correctly. So the logical time to hoist the variable to the heap is as late as
possible. If we move the local variable right when it goes out of scope, we are
certain that no code after that point will try to access it from the stack.
<span name="after">After</span> the variable is out of scope, the compiler will
have reported an error if any code tried to use it.</p>
<aside name="after">
<p>By &ldquo;after&rdquo; here, I mean in the lexical or textual sense<span class="em">&mdash;</span>code past the <code>}</code>
for the block containing the declaration of the closed-over variable.</p>
</aside>
<p>The compiler already emits an <code>OP_POP</code> instruction when a local variable goes
out of scope. If a variable is captured by a closure, we will instead emit a
different instruction to hoist that variable out of the stack and into its
corresponding upvalue. To do that, the compiler needs to know which <span
name="param">locals</span> are closed over.</p>
<aside name="param">
<p>The compiler doesn&rsquo;t pop parameters and locals declared immediately inside the
body of a function. We&rsquo;ll handle those too, in the runtime.</p>
</aside>
<p>The compiler already maintains an array of Upvalue structs for each local
variable in the function to track exactly that state. That array is good for
answering &ldquo;Which variables does this closure use?&rdquo; But it&rsquo;s poorly suited for
answering, &ldquo;Does <em>any</em> function capture this local variable?&rdquo; In particular,
once the Compiler for some closure has finished, the Compiler for the enclosing
function whose variable has been captured no longer has access to any of the
upvalue state.</p>
<p>In other words, the compiler maintains pointers from upvalues to the locals they
capture, but not in the other direction. So we first need to add some extra
tracking inside the existing Local struct so that we can tell if a given local
is captured by a closure.</p>
<div class="codehilite"><pre class="insert-before">  int depth;
</pre><div class="source-file"><em>compiler.c</em><br>
in struct <em>Local</em></div>
<pre class="insert">  <span class="t">bool</span> <span class="i">isCaptured</span>;
</pre><pre class="insert-after">} Local;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in struct <em>Local</em></div>

<p>This field is <code>true</code> if the local is captured by any later nested function
declaration. Initially, all locals are not captured.</p>
<div class="codehilite"><pre class="insert-before">  local-&gt;depth = -1;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>addLocal</em>()</div>
<pre class="insert">  <span class="i">local</span>-&gt;<span class="i">isCaptured</span> = <span class="k">false</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>addLocal</em>()</div>

<p><span name="zero">Likewise</span>, the special &ldquo;slot zero local&rdquo; that the
compiler implicitly declares is not captured.</p>
<aside name="zero">
<p>Later in the book, it <em>will</em> become possible for a user to capture this
variable. Just building some anticipation here.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  local-&gt;depth = 0;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>initCompiler</em>()</div>
<pre class="insert">  <span class="i">local</span>-&gt;<span class="i">isCaptured</span> = <span class="k">false</span>;
</pre><pre class="insert-after">  local-&gt;name.start = &quot;&quot;;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>initCompiler</em>()</div>

<p>When resolving an identifier, if we end up creating an upvalue for a local
variable, we mark it as captured.</p>
<div class="codehilite"><pre class="insert-before">  if (local != -1) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>resolveUpvalue</em>()</div>
<pre class="insert">    <span class="i">compiler</span>-&gt;<span class="i">enclosing</span>-&gt;<span class="i">locals</span>[<span class="i">local</span>].<span class="i">isCaptured</span> = <span class="k">true</span>;
</pre><pre class="insert-after">    return addUpvalue(compiler, (uint8_t)local, true);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>resolveUpvalue</em>()</div>

<p>Now, at the end of a block scope when the compiler emits code to free the stack
slots for the locals, we can tell which ones need to get hoisted onto the heap.
We&rsquo;ll use a new instruction for that.</p>
<div class="codehilite"><pre class="insert-before">  while (current-&gt;localCount &gt; 0 &amp;&amp;
         current-&gt;locals[current-&gt;localCount - 1].depth &gt;
            current-&gt;scopeDepth) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>endScope</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">locals</span>[<span class="i">current</span>-&gt;<span class="i">localCount</span> - <span class="n">1</span>].<span class="i">isCaptured</span>) {
      <span class="i">emitByte</span>(<span class="a">OP_CLOSE_UPVALUE</span>);
    } <span class="k">else</span> {
      <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
    }
</pre><pre class="insert-after">    current-&gt;localCount--;
  }
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>endScope</em>(), replace 1 line</div>

<p>The instruction requires no operand. We know that the variable will always be
right on top of the stack at the point that this instruction executes. We
declare the instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_CLOSURE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_CLOSE_UPVALUE</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>And add trivial disassembler support for it:</p>
<div class="codehilite"><pre class="insert-before">    }
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_CLOSE_UPVALUE</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_CLOSE_UPVALUE&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>Excellent. Now the generated bytecode tells the runtime exactly when each
captured local variable must move to the heap. Better, it does so only for the
locals that <em>are</em> used by a closure and need this special treatment. This aligns
with our general performance goal that we want users to pay only for
functionality that they use. Variables that aren&rsquo;t used by closures live and die
entirely on the stack just as they did before.</p>
<h3><a href="#tracking-open-upvalues" id="tracking-open-upvalues"><small>25&#8202;.&#8202;4&#8202;.&#8202;3</small>Tracking open upvalues</a></h3>
<p>Let&rsquo;s move over to the runtime side. Before we can interpret <code>OP_CLOSE_UPVALUE</code>
instructions, we have an issue to resolve. Earlier, when I talked about whether
closures capture variables or values, I said it was important that if multiple
closures access the same variable that they end up with a reference to the
exact same storage location in memory. That way if one closure writes to the
variable, the other closure sees the change.</p>
<p>Right now, if two closures capture the same <span name="indirect">local</span>
variable, the VM creates a separate Upvalue for each one. The necessary sharing
is missing. When we move the variable off the stack, if we move it into only one
of the upvalues, the other upvalue will have an orphaned value.</p>
<aside name="indirect">
<p>The VM <em>does</em> share upvalues if one closure captures an <em>upvalue</em> from a
surrounding function. The nested case works correctly. But if two <em>sibling</em>
closures capture the same local variable, they each create a separate
ObjUpvalue.</p>
</aside>
<p>To fix that, whenever the VM needs an upvalue that captures a particular local
variable slot, we will first search for an existing upvalue pointing to that
slot. If found, we reuse that. The challenge is that all of the previously
created upvalues are squirreled away inside the upvalue arrays of the various
closures. Those closures could be anywhere in the VM&rsquo;s memory.</p>
<p>The first step is to give the VM its own list of all open upvalues that point to
variables still on the stack. Searching a list each time the VM needs an upvalue
sounds like it might be slow, but in practice, it&rsquo;s not bad. The number of
variables on the stack that actually get closed over tends to be small. And
function declarations that <span name="create">create</span> closures are rarely
on performance critical execution paths in the user&rsquo;s program.</p>
<aside name="create">
<p>Closures are frequently <em>invoked</em> inside hot loops. Think about the closures
passed to typical higher-order functions on collections like <a href="https://en.wikipedia.org/wiki/Map_(higher-order_function)"><code>map()</code></a> and
<a href="https://en.wikipedia.org/wiki/Filter_(higher-order_function)"><code>filter()</code></a>. That should be fast. But the function declaration that
<em>creates</em> the closure happens only once and is usually outside of the loop.</p>
</aside>
<p>Even better, we can order the list of open upvalues by the stack slot index they
point to. The common case is that a slot has <em>not</em> already been captured<span class="em">&mdash;</span>sharing variables between closures is uncommon<span class="em">&mdash;</span>and closures tend to capture
locals near the top of the stack. If we store the open upvalue array in stack
slot order, as soon as we step past the slot where the local we&rsquo;re capturing
lives, we know it won&rsquo;t be found. When that local is near the top of the stack,
we can exit the loop pretty early.</p>
<p>Maintaining a sorted list requires inserting elements in the middle efficiently.
That suggests using a linked list instead of a dynamic array. Since we defined
the ObjUpvalue struct ourselves, the easiest implementation is an intrusive list
that puts the next pointer right inside the ObjUpvalue struct itself.</p>
<div class="codehilite"><pre class="insert-before">  Value* location;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>ObjUpvalue</em></div>
<pre class="insert">  <span class="k">struct</span> <span class="t">ObjUpvalue</span>* <span class="i">next</span>;
</pre><pre class="insert-after">} ObjUpvalue;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>ObjUpvalue</em></div>

<p>When we allocate an upvalue, it is not attached to any list yet so the link is
<code>NULL</code>.</p>
<div class="codehilite"><pre class="insert-before">  upvalue-&gt;location = slot;
</pre><div class="source-file"><em>object.c</em><br>
in <em>newUpvalue</em>()</div>
<pre class="insert">  <span class="i">upvalue</span>-&gt;<span class="i">next</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">  return upvalue;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>newUpvalue</em>()</div>

<p>The VM owns the list, so the head pointer goes right inside the main VM struct.</p>
<div class="codehilite"><pre class="insert-before">  Table strings;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">ObjUpvalue</span>* <span class="i">openUpvalues</span>;
</pre><pre class="insert-after">  Obj* objects;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>The list starts out empty.</p>
<div class="codehilite"><pre class="insert-before">  vm.frameCount = 0;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>resetStack</em>()</div>
<pre class="insert">  <span class="i">vm</span>.<span class="i">openUpvalues</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>resetStack</em>()</div>

<p>Starting with the first upvalue pointed to by the VM, each open upvalue points
to the next open upvalue that references a local variable farther down the
stack. This script, for example,</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
  <span class="k">fun</span> <span class="i">f</span>() {
    <span class="k">print</span> <span class="i">a</span>;
  }
  <span class="k">var</span> <span class="i">b</span> = <span class="n">2</span>;
  <span class="k">fun</span> <span class="i">g</span>() {
    <span class="k">print</span> <span class="i">b</span>;
  }
  <span class="k">var</span> <span class="i">c</span> = <span class="n">3</span>;
  <span class="k">fun</span> <span class="i">h</span>() {
    <span class="k">print</span> <span class="i">c</span>;
  }
}
</pre></div>
<p>should produce a series of linked upvalues like so:</p><img src="image/closures/linked-list.png" alt="Three upvalues in a linked list."/>
<p>Whenever we close over a local variable, before creating a new upvalue, we look
for an existing one in the list.</p>
<div class="codehilite"><pre class="insert-before">static ObjUpvalue* captureUpvalue(Value* local) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>captureUpvalue</em>()</div>
<pre class="insert">  <span class="t">ObjUpvalue</span>* <span class="i">prevUpvalue</span> = <span class="a">NULL</span>;
  <span class="t">ObjUpvalue</span>* <span class="i">upvalue</span> = <span class="i">vm</span>.<span class="i">openUpvalues</span>;
  <span class="k">while</span> (<span class="i">upvalue</span> != <span class="a">NULL</span> &amp;&amp; <span class="i">upvalue</span>-&gt;<span class="i">location</span> &gt; <span class="i">local</span>) {
    <span class="i">prevUpvalue</span> = <span class="i">upvalue</span>;
    <span class="i">upvalue</span> = <span class="i">upvalue</span>-&gt;<span class="i">next</span>;
  }

  <span class="k">if</span> (<span class="i">upvalue</span> != <span class="a">NULL</span> &amp;&amp; <span class="i">upvalue</span>-&gt;<span class="i">location</span> == <span class="i">local</span>) {
    <span class="k">return</span> <span class="i">upvalue</span>;
  }

</pre><pre class="insert-after">  ObjUpvalue* createdUpvalue = newUpvalue(local);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>captureUpvalue</em>()</div>

<p>We start at the <span name="head">head</span> of the list, which is the upvalue
closest to the top of the stack. We walk through the list, using a little
pointer comparison to iterate past every upvalue pointing to slots above the one
we&rsquo;re looking for. While we do that, we keep track of the preceding upvalue on
the list. We&rsquo;ll need to update that node&rsquo;s <code>next</code> pointer if we end up inserting
a node after it.</p>
<aside name="head">
<p>It&rsquo;s a singly linked list. It&rsquo;s not like we have any other choice than to start
at the head and go forward from there.</p>
</aside>
<p>There are three reasons we can exit the loop:</p>
<ol>
<li>
<p><strong>The local slot we stopped at <em>is</em> the slot we&rsquo;re looking for.</strong> We found
an existing upvalue capturing the variable, so we reuse that upvalue.</p>
</li>
<li>
<p><strong>We ran out of upvalues to search.</strong> When <code>upvalue</code> is <code>NULL</code>, it means
every open upvalue in the list points to locals above the slot we&rsquo;re looking
for, or (more likely) the upvalue list is empty. Either way, we didn&rsquo;t find
an upvalue for our slot.</p>
</li>
<li>
<p><strong>We found an upvalue whose local slot is <em>below</em> the one we&rsquo;re looking
for.</strong> Since the list is sorted, that means we&rsquo;ve gone past the slot we are
closing over, and thus there must not be an existing upvalue for it.</p>
</li>
</ol>
<p>In the first case, we&rsquo;re done and we&rsquo;ve returned. Otherwise, we create a new
upvalue for our local slot and insert it into the list at the right location.</p>
<div class="codehilite"><pre class="insert-before">  ObjUpvalue* createdUpvalue = newUpvalue(local);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>captureUpvalue</em>()</div>
<pre class="insert">  <span class="i">createdUpvalue</span>-&gt;<span class="i">next</span> = <span class="i">upvalue</span>;

  <span class="k">if</span> (<span class="i">prevUpvalue</span> == <span class="a">NULL</span>) {
    <span class="i">vm</span>.<span class="i">openUpvalues</span> = <span class="i">createdUpvalue</span>;
  } <span class="k">else</span> {
    <span class="i">prevUpvalue</span>-&gt;<span class="i">next</span> = <span class="i">createdUpvalue</span>;
  }

</pre><pre class="insert-after">  return createdUpvalue;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>captureUpvalue</em>()</div>

<p>The current incarnation of this function already creates the upvalue, so we only
need to add code to insert the upvalue into the list. We exited the list
traversal by either going past the end of the list, or by stopping on the first
upvalue whose stack slot is below the one we&rsquo;re looking for. In either case,
that means we need to insert the new upvalue <em>before</em> the object pointed at by
<code>upvalue</code> (which may be <code>NULL</code> if we hit the end of the list).</p>
<p>As you may have learned in Data Structures 101, to insert a node into a linked
list, you set the <code>next</code> pointer of the previous node to point to your new one.
We have been conveniently keeping track of that preceding node as we walked the
list. We also need to handle the <span name="double">special</span> case where
we are inserting a new upvalue at the head of the list, in which case the &ldquo;next&rdquo;
pointer is the VM&rsquo;s head pointer.</p>
<aside name="double">
<p>There is a shorter implementation that handles updating either the head pointer
or the previous upvalue&rsquo;s <code>next</code> pointer uniformly by using a pointer to a
pointer, but that kind of code confuses almost everyone who hasn&rsquo;t reached some
Zen master level of pointer expertise. I went with the basic <code>if</code> statement
approach.</p>
</aside>
<p>With this updated function, the VM now ensures that there is only ever a single
ObjUpvalue for any given local slot. If two closures capture the same variable,
they will get the same upvalue. We&rsquo;re ready to move those upvalues off the
stack now.</p>
<h3><a href="#closing-upvalues-at-runtime" id="closing-upvalues-at-runtime"><small>25&#8202;.&#8202;4&#8202;.&#8202;4</small>Closing upvalues at runtime</a></h3>
<p>The compiler helpfully emits an <code>OP_CLOSE_UPVALUE</code> instruction to tell the VM
exactly when a local variable should be hoisted onto the heap. Executing that
instruction is the interpreter&rsquo;s responsibility.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_CLOSE_UPVALUE</span>:
        <span class="i">closeUpvalues</span>(<span class="i">vm</span>.<span class="i">stackTop</span> - <span class="n">1</span>);
        <span class="i">pop</span>();
        <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>When we reach the instruction, the variable we are hoisting is right on top of
the stack. We call a helper function, passing the address of that stack slot.
That function is responsible for closing the upvalue and moving the local from
the stack to the heap. After that, the VM is free to discard the stack slot,
which it does by calling <code>pop()</code>.</p>
<p>The fun stuff happens here:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>captureUpvalue</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">closeUpvalues</span>(<span class="t">Value</span>* <span class="i">last</span>) {
  <span class="k">while</span> (<span class="i">vm</span>.<span class="i">openUpvalues</span> != <span class="a">NULL</span> &amp;&amp;
         <span class="i">vm</span>.<span class="i">openUpvalues</span>-&gt;<span class="i">location</span> &gt;= <span class="i">last</span>) {
    <span class="t">ObjUpvalue</span>* <span class="i">upvalue</span> = <span class="i">vm</span>.<span class="i">openUpvalues</span>;
    <span class="i">upvalue</span>-&gt;<span class="i">closed</span> = *<span class="i">upvalue</span>-&gt;<span class="i">location</span>;
    <span class="i">upvalue</span>-&gt;<span class="i">location</span> = &amp;<span class="i">upvalue</span>-&gt;<span class="i">closed</span>;
    <span class="i">vm</span>.<span class="i">openUpvalues</span> = <span class="i">upvalue</span>-&gt;<span class="i">next</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>captureUpvalue</em>()</div>

<p>This function takes a pointer to a stack slot. It closes every open upvalue it
can find that points to that slot or any slot above it on the stack. Right now,
we pass a pointer only to the top slot on the stack, so the &ldquo;or above it&rdquo; part
doesn&rsquo;t come into play, but it will soon.</p>
<p>To do this, we walk the VM&rsquo;s list of open upvalues, again from top to bottom. If
an upvalue&rsquo;s location points into the range of slots we&rsquo;re closing, we close the
upvalue. Otherwise, once we reach an upvalue outside of the range, we know the
rest will be too, so we stop iterating.</p>
<p>The way an upvalue gets closed is pretty <span name="cool">cool</span>. First,
we copy the variable&rsquo;s value into the <code>closed</code> field in the ObjUpvalue. That&rsquo;s
where closed-over variables live on the heap. The <code>OP_GET_UPVALUE</code> and
<code>OP_SET_UPVALUE</code> instructions need to look for the variable there after it&rsquo;s
been moved. We could add some conditional logic in the interpreter code for
those instructions to check some flag for whether the upvalue is open or closed.</p>
<p>But there is already a level of indirection in play<span class="em">&mdash;</span>those instructions
dereference the <code>location</code> pointer to get to the variable&rsquo;s value. When the
variable moves from the stack to the <code>closed</code> field, we simply update that
<code>location</code> to the address of the ObjUpvalue&rsquo;s <em>own</em> <code>closed</code> field.</p>
<aside name="cool">
<p>I&rsquo;m not praising myself here. This is all the Lua dev team&rsquo;s innovation.</p>
</aside><img src="image/closures/closing.png" alt="Moving a value from the stack to the upvalue's 'closed' field and then pointing the 'value' field to it."/>
<p>We don&rsquo;t need to change how <code>OP_GET_UPVALUE</code> and <code>OP_SET_UPVALUE</code> are
interpreted at all. That keeps them simple, which in turn keeps them fast. We do
need to add the new field to ObjUpvalue, though.</p>
<div class="codehilite"><pre class="insert-before">  Value* location;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>ObjUpvalue</em></div>
<pre class="insert">  <span class="t">Value</span> <span class="i">closed</span>;
</pre><pre class="insert-after">  struct ObjUpvalue* next;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>ObjUpvalue</em></div>

<p>And we should zero it out when we create an ObjUpvalue so there&rsquo;s no
uninitialized memory floating around.</p>
<div class="codehilite"><pre class="insert-before">  ObjUpvalue* upvalue = ALLOCATE_OBJ(ObjUpvalue, OBJ_UPVALUE);
</pre><div class="source-file"><em>object.c</em><br>
in <em>newUpvalue</em>()</div>
<pre class="insert">  <span class="i">upvalue</span>-&gt;<span class="i">closed</span> = <span class="a">NIL_VAL</span>;
</pre><pre class="insert-after">  upvalue-&gt;location = slot;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>newUpvalue</em>()</div>

<p>Whenever the compiler reaches the end of a block, it discards all local
variables in that block and emits an <code>OP_CLOSE_UPVALUE</code> for each local variable
that was closed over. The compiler <span name="close">does</span> <em>not</em> emit any
instructions at the end of the outermost block scope that defines a function
body. That scope contains the function&rsquo;s parameters and any locals declared
immediately inside the function. Those need to get closed too.</p>
<aside name="close">
<p>There&rsquo;s nothing <em>preventing</em> us from closing the outermost function scope in the
compiler and emitting <code>OP_POP</code> and <code>OP_CLOSE_UPVALUE</code> instructions. Doing so is
just unnecessary because the runtime discards all of the stack slots used by the
function implicitly when it pops the call frame.</p>
</aside>
<p>This is the reason <code>closeUpvalues()</code> accepts a pointer to a stack slot. When a
function returns, we call that same helper and pass in the first stack slot
owned by the function.</p>
<div class="codehilite"><pre class="insert-before">        Value result = pop();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">        <span class="i">closeUpvalues</span>(<span class="i">frame</span>-&gt;<span class="i">slots</span>);
</pre><pre class="insert-after">        vm.frameCount--;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>By passing the first slot in the function&rsquo;s stack window, we close every
remaining open upvalue owned by the returning function. And with that, we now
have a fully functioning closure implementation. Closed-over variables live as
long as they are needed by the functions that capture them.</p>
<p>This was a lot of work! In jlox, closures fell out naturally from our
environment representation. In clox, we had to add a lot of code<span class="em">&mdash;</span>new bytecode
instructions, more data structures in the compiler, and new runtime objects. The
VM very much treats variables in closures as different from other variables.</p>
<p>There is a rationale for that. In terms of implementation complexity, jlox gave
us closures &ldquo;for free&rdquo;. But in terms of <em>performance</em>, jlox&rsquo;s closures are
anything but. By allocating <em>all</em> environments on the heap, jlox pays a
significant performance price for <em>all</em> local variables, even the majority which
are never captured by closures.</p>
<p>With clox, we have a more complex system, but that allows us to tailor the
implementation to fit the two use patterns we observe for local variables. For
most variables which do have stack semantics, we allocate them entirely on the
stack which is simple and fast. Then, for the few local variables where that
doesn&rsquo;t work, we have a second slower path we can opt in to as needed.</p>
<p>Fortunately, users don&rsquo;t perceive the complexity. From their perspective, local
variables in Lox are simple and uniform. The <em>language itself</em> is as simple as
jlox&rsquo;s implementation. But under the hood, clox is watching what the user does
and optimizing for their specific uses. As your language implementations grow in
sophistication, you&rsquo;ll find yourself doing this more. A large fraction of
&ldquo;optimization&rdquo; is about adding special case code that detects certain uses and
provides a custom-built, faster path for code that fits that pattern.</p>
<p>We have lexical scoping fully working in clox now, which is a major milestone.
And, now that we have functions and variables with complex lifetimes, we also
have a <em>lot</em> of objects floating around in clox&rsquo;s heap, with a web of pointers
stringing them together. The <a href="garbage-collection.html">next step</a> is figuring out how to manage that
memory so that we can free some of those objects when they&rsquo;re no longer needed.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Wrapping every ObjFunction in an ObjClosure introduces a level of
indirection that has a performance cost. That cost isn&rsquo;t necessary for
functions that do not close over any variables, but it does let the runtime
treat all calls uniformly.</p>
<p>Change clox to only wrap functions in ObjClosures that need upvalues. How
does the code complexity and performance compare to always wrapping
functions? Take care to benchmark programs that do and do not use closures.
How should you weight the importance of each benchmark? If one gets slower
and one faster, how do you decide what trade-off to make to choose an
implementation strategy?</p>
</li>
<li>
<p>Read the design note below. I&rsquo;ll wait. Now, how do you think Lox <em>should</em>
behave? Change the implementation to create a new variable for each loop
iteration.</p>
</li>
<li>
<p>A <a href="http://wiki.c2.com/?ClosuresAndObjectsAreEquivalent">famous koan</a> teaches us that &ldquo;objects are a poor man&rsquo;s closure&rdquo;
(and vice versa). Our VM doesn&rsquo;t support objects yet, but now that we have
closures we can approximate them. Using closures, write a Lox program that
models two-dimensional vector &ldquo;objects&rdquo;. It should:</p>
<ul>
<li>
<p>Define a &ldquo;constructor&rdquo; function to create a new vector with the given
<em>x</em> and <em>y</em> coordinates.</p>
</li>
<li>
<p>Provide &ldquo;methods&rdquo; to access the <em>x</em> and <em>y</em> coordinates of values
returned from that constructor.</p>
</li>
<li>
<p>Define an addition &ldquo;method&rdquo; that adds two vectors and produces a third.</p>
</li>
</ul>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Closing Over the Loop Variable</a></h2>
<p>Closures capture variables. When two closures capture the same variable, they
share a reference to the same underlying storage location. This fact is visible
when new values are assigned to the variable. Obviously, if two closures capture
<em>different</em> variables, there is no sharing.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">globalOne</span>;
<span class="k">var</span> <span class="i">globalTwo</span>;

<span class="k">fun</span> <span class="i">main</span>() {
  {
    <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;one&quot;</span>;
    <span class="k">fun</span> <span class="i">one</span>() {
      <span class="k">print</span> <span class="i">a</span>;
    }
    <span class="i">globalOne</span> = <span class="i">one</span>;
  }

  {
    <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;two&quot;</span>;
    <span class="k">fun</span> <span class="i">two</span>() {
      <span class="k">print</span> <span class="i">a</span>;
    }
    <span class="i">globalTwo</span> = <span class="i">two</span>;
  }
}

<span class="i">main</span>();
<span class="i">globalOne</span>();
<span class="i">globalTwo</span>();
</pre></div>
<p>This prints &ldquo;one&rdquo; then &ldquo;two&rdquo;. In this example, it&rsquo;s pretty clear that the two
<code>a</code> variables are different. But it&rsquo;s not always so obvious. Consider:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">globalOne</span>;
<span class="k">var</span> <span class="i">globalTwo</span>;

<span class="k">fun</span> <span class="i">main</span>() {
  <span class="k">for</span> (<span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>; <span class="i">a</span> &lt;= <span class="n">2</span>; <span class="i">a</span> = <span class="i">a</span> + <span class="n">1</span>) {
    <span class="k">fun</span> <span class="i">closure</span>() {
      <span class="k">print</span> <span class="i">a</span>;
    }
    <span class="k">if</span> (<span class="i">globalOne</span> == <span class="k">nil</span>) {
      <span class="i">globalOne</span> = <span class="i">closure</span>;
    } <span class="k">else</span> {
      <span class="i">globalTwo</span> = <span class="i">closure</span>;
    }
  }
}

<span class="i">main</span>();
<span class="i">globalOne</span>();
<span class="i">globalTwo</span>();
</pre></div>
<p>The code is convoluted because Lox has no collection types. The important part
is that the <code>main()</code> function does two iterations of a <code>for</code> loop. Each time
through the loop, it creates a closure that captures the loop variable. It
stores the first closure in <code>globalOne</code> and the second in <code>globalTwo</code>.</p>
<p>There are definitely two different closures. Do they close over two different
variables? Is there only one <code>a</code> for the entire duration of the loop, or does
each iteration get its own distinct <code>a</code> variable?</p>
<p>The script here is strange and contrived, but this does show up in real code
in languages that aren&rsquo;t as minimal as clox. Here&rsquo;s a JavaScript example:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">closures</span> = [];
<span class="k">for</span> (<span class="k">var</span> <span class="i">i</span> = <span class="n">1</span>; <span class="i">i</span> &lt;= <span class="n">2</span>; <span class="i">i</span>++) {
  <span class="i">closures</span>.<span class="i">push</span>(<span class="k">function</span> () { <span class="i">console</span>.<span class="i">log</span>(<span class="i">i</span>); });
}

<span class="i">closures</span>[<span class="n">0</span>]();
<span class="i">closures</span>[<span class="n">1</span>]();
</pre></div>
<p>Does this print &ldquo;1&rdquo; then &ldquo;2&rdquo;, or does it print <span name="three">&ldquo;3&rdquo;</span>
twice? You may be surprised to hear that it prints &ldquo;3&rdquo; twice. In this JavaScript
program, there is only a single <code>i</code> variable whose lifetime includes all
iterations of the loop, including the final exit.</p>
<aside name="three">
<p>You&rsquo;re wondering how <em>three</em> enters the picture? After the second iteration,
<code>i++</code> is executed, which increments <code>i</code> to three. That&rsquo;s what causes <code>i &lt;= 2</code> to
evaluate to false and end the loop. If <code>i</code> never reached three, the loop would
run forever.</p>
</aside>
<p>If you&rsquo;re familiar with JavaScript, you probably know that variables declared
using <code>var</code> are implicitly <em>hoisted</em> to the surrounding function or top-level
scope. It&rsquo;s as if you really wrote this:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">closures</span> = [];
<span class="k">var</span> <span class="i">i</span>;
<span class="k">for</span> (<span class="i">i</span> = <span class="n">1</span>; <span class="i">i</span> &lt;= <span class="n">2</span>; <span class="i">i</span>++) {
  <span class="i">closures</span>.<span class="i">push</span>(<span class="k">function</span> () { <span class="i">console</span>.<span class="i">log</span>(<span class="i">i</span>); });
}

<span class="i">closures</span>[<span class="n">0</span>]();
<span class="i">closures</span>[<span class="n">1</span>]();
</pre></div>
<p>At that point, it&rsquo;s clearer that there is only a single <code>i</code>. Now consider if
you change the program to use the newer <code>let</code> keyword:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">closures</span> = [];
<span class="k">for</span> (<span class="k">let</span> <span class="i">i</span> = <span class="n">1</span>; <span class="i">i</span> &lt;= <span class="n">2</span>; <span class="i">i</span>++) {
  <span class="i">closures</span>.<span class="i">push</span>(<span class="k">function</span> () { <span class="i">console</span>.<span class="i">log</span>(<span class="i">i</span>); });
}

<span class="i">closures</span>[<span class="n">0</span>]();
<span class="i">closures</span>[<span class="n">1</span>]();
</pre></div>
<p>Does this new program behave the same? Nope. In this case, it prints &ldquo;1&rdquo; then
&ldquo;2&rdquo;. Each closure gets its own <code>i</code>. That&rsquo;s sort of strange when you think about
it. The increment clause is <code>i++</code>. That looks very much like it is assigning to
and mutating an existing variable, not creating a new one.</p>
<p>Let&rsquo;s try some other languages. Here&rsquo;s Python:</p>
<div class="codehilite"><pre><span class="i">closures</span> = []
<span class="k">for</span> <span class="i">i</span> <span class="k">in</span> <span class="k">range</span>(<span class="n">1</span>, <span class="n">3</span>):
  <span class="i">closures</span>.<span class="i">append</span>(<span class="k">lambda</span>: <span class="k">print</span>(<span class="i">i</span>))

<span class="i">closures</span>[<span class="n">0</span>]()
<span class="i">closures</span>[<span class="n">1</span>]()
</pre></div>
<p>Python doesn&rsquo;t really have block scope. Variables are implicitly declared and
are automatically scoped to the surrounding function. Kind of like hoisting in
JS, now that I think about it. So both closures capture the same variable.
Unlike C, though, we don&rsquo;t exit the loop by incrementing <code>i</code> <em>past</em> the last
value, so this prints &ldquo;2&rdquo; twice.</p>
<p>What about Ruby? Ruby has two typical ways to iterate numerically. Here&rsquo;s the
classic imperative style:</p>
<div class="codehilite"><pre><span class="i">closures</span> = []
<span class="k">for</span> <span class="i">i</span> <span class="k">in</span> <span class="n">1</span>..<span class="n">2</span> <span class="k">do</span>
  <span class="i">closures</span> &lt;&lt; <span class="k">lambda</span> { <span class="i">puts</span> <span class="i">i</span> }
<span class="k">end</span>

<span class="i">closures</span>[<span class="n">0</span>].<span class="i">call</span>
<span class="i">closures</span>[<span class="n">1</span>].<span class="i">call</span>
</pre></div>
<p>This, like Python, prints &ldquo;2&rdquo; twice. But the more idiomatic Ruby style is using
a higher-order <code>each()</code> method on range objects:</p>
<div class="codehilite"><pre><span class="i">closures</span> = []
(<span class="n">1</span>..<span class="n">2</span>).<span class="i">each</span> <span class="k">do</span> |<span class="i">i</span>|
  <span class="i">closures</span> &lt;&lt; <span class="k">lambda</span> { <span class="i">puts</span> <span class="i">i</span> }
<span class="k">end</span>

<span class="i">closures</span>[<span class="n">0</span>].<span class="i">call</span>
<span class="i">closures</span>[<span class="n">1</span>].<span class="i">call</span>
</pre></div>
<p>If you&rsquo;re not familiar with Ruby, the <code>do |i| ... end</code> part is basically a
closure that gets created and passed to the <code>each()</code> method. The <code>|i|</code> is the
parameter signature for the closure. The <code>each()</code> method invokes that closure
twice, passing in 1 for <code>i</code> the first time and 2 the second time.</p>
<p>In this case, the &ldquo;loop variable&rdquo; is really a function parameter. And, since
each iteration of the loop is a separate invocation of the function, those are
definitely separate variables for each call. So this prints &ldquo;1&rdquo; then &ldquo;2&rdquo;.</p>
<p>If a language has a higher-level iterator-based looping structure like <code>foreach</code>
in C#, Java&rsquo;s &ldquo;enhanced for&rdquo;, <code>for-of</code> in JavaScript, <code>for-in</code> in Dart, etc.,
then I think it&rsquo;s natural to the reader to have each iteration create a new
variable. The code <em>looks</em> like a new variable because the loop header looks
like a variable declaration. And there&rsquo;s no increment expression that looks like
it&rsquo;s mutating that variable to advance to the next step.</p>
<p>If you dig around StackOverflow and other places, you find evidence that this is
what users expect, because they are very surprised when they <em>don&rsquo;t</em> get it. In
particular, C# originally did <em>not</em> create a new loop variable for each
iteration of a <code>foreach</code> loop. This was such a frequent source of user confusion
that they took the very rare step of shipping a breaking change to the language.
In C# 5, each iteration creates a fresh variable.</p>
<p>Old C-style <code>for</code> loops are harder. The increment clause really does look like
mutation. That implies there is a single variable that&rsquo;s getting updated each
step. But it&rsquo;s almost never <em>useful</em> for each iteration to share a loop
variable. The only time you can even detect this is when closures capture it.
And it&rsquo;s rarely helpful to have a closure that references a variable whose value
is whatever value caused you to exit the loop.</p>
<p>The pragmatically useful answer is probably to do what JavaScript does with
<code>let</code> in <code>for</code> loops. Make it look like mutation but actually create a new
variable each time, because that&rsquo;s what users want. It is kind of weird when you
think about it, though.</p>
</div>

<footer>
<a href="garbage-collection.html" class="next">
  Next Chapter: &ldquo;Garbage Collection&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/compiling-expressions.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Compiling Expressions &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Compiling Expressions<small>17</small></a></h3>

<ul>
    <li><a href="#single-pass-compilation"><small>17.1</small> Single-Pass Compilation</a></li>
    <li><a href="#parsing-tokens"><small>17.2</small> Parsing Tokens</a></li>
    <li><a href="#emitting-bytecode"><small>17.3</small> Emitting Bytecode</a></li>
    <li><a href="#parsing-prefix-expressions"><small>17.4</small> Parsing Prefix Expressions</a></li>
    <li><a href="#parsing-infix-expressions"><small>17.5</small> Parsing Infix Expressions</a></li>
    <li><a href="#a-pratt-parser"><small>17.6</small> A Pratt Parser</a></li>
    <li><a href="#dumping-chunks"><small>17.7</small> Dumping Chunks</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>It&#x27;s Just Parsing</a></li>
</ul>


<div class="prev-next">
    <a href="scanning-on-demand.html" title="Scanning on Demand" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="types-of-values.html" title="Types of Values" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="scanning-on-demand.html" title="Scanning on Demand" class="prev">←</a>
<a href="types-of-values.html" title="Types of Values" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Compiling Expressions<small>17</small></a></h3>

<ul>
    <li><a href="#single-pass-compilation"><small>17.1</small> Single-Pass Compilation</a></li>
    <li><a href="#parsing-tokens"><small>17.2</small> Parsing Tokens</a></li>
    <li><a href="#emitting-bytecode"><small>17.3</small> Emitting Bytecode</a></li>
    <li><a href="#parsing-prefix-expressions"><small>17.4</small> Parsing Prefix Expressions</a></li>
    <li><a href="#parsing-infix-expressions"><small>17.5</small> Parsing Infix Expressions</a></li>
    <li><a href="#a-pratt-parser"><small>17.6</small> A Pratt Parser</a></li>
    <li><a href="#dumping-chunks"><small>17.7</small> Dumping Chunks</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>It&#x27;s Just Parsing</a></li>
</ul>


<div class="prev-next">
    <a href="scanning-on-demand.html" title="Scanning on Demand" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="types-of-values.html" title="Types of Values" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">17</div>
  <h1>Compiling Expressions</h1>

<blockquote>
<p>In the middle of the journey of our life I found myself within a dark woods
where the straight way was lost.</p>
<p><cite>Dante Alighieri, <em>Inferno</em></cite></p>
</blockquote>
<p>This chapter is exciting for not one, not two, but <em>three</em> reasons. First, it
provides the final segment of our VM&rsquo;s execution pipeline. Once in place, we can
plumb the user&rsquo;s source code from scanning all the way through to executing it.</p><img src="image/compiling-expressions/pipeline.png" alt="Lowering the 'compiler' section of pipe between 'scanner' and 'VM'." />
<p>Second, we get to write an actual, honest-to-God <em>compiler</em>. It parses source
code and outputs a low-level series of binary instructions. Sure, it&rsquo;s <span
name="wirth">bytecode</span> and not some chip&rsquo;s native instruction set, but
it&rsquo;s way closer to the metal than jlox was. We&rsquo;re about to be real language
hackers.</p>
<aside name="wirth">
<p>Bytecode was good enough for Niklaus Wirth, and no one questions his street
cred.</p>
</aside>
<p><span name="pratt">Third</span> and finally, I get to show you one of my
absolute favorite algorithms: Vaughan Pratt&rsquo;s &ldquo;top-down operator precedence
parsing&rdquo;. It&rsquo;s the most elegant way I know to parse expressions. It gracefully
handles prefix operators, postfix, infix, <em>mixfix</em>, any kind of <em>-fix</em> you got.
It deals with precedence and associativity without breaking a sweat. I love it.</p>
<aside name="pratt">
<p>Pratt parsers are a sort of oral tradition in industry. No compiler or language
book I&rsquo;ve read teaches them. Academia is very focused on generated parsers, and
Pratt&rsquo;s technique is for handwritten ones, so it gets overlooked.</p>
<p>But in production compilers, where hand-rolled parsers are common, you&rsquo;d be
surprised how many people know it. Ask where they learned it, and it&rsquo;s always,
&ldquo;Oh, I worked on this compiler years ago and my coworker said they took it from
this old front end<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>&rdquo;</p>
</aside>
<p>As usual, before we get to the fun stuff, we&rsquo;ve got some preliminaries to work
through. You have to eat your vegetables before you get dessert. First, let&rsquo;s
ditch that temporary scaffolding we wrote for testing the scanner and replace it
with something more useful.</p>
<div class="codehilite"><pre class="insert-before">InterpretResult interpret(const char* source) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>interpret</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="t">Chunk</span> <span class="i">chunk</span>;
  <span class="i">initChunk</span>(&amp;<span class="i">chunk</span>);

  <span class="k">if</span> (!<span class="i">compile</span>(<span class="i">source</span>, &amp;<span class="i">chunk</span>)) {
    <span class="i">freeChunk</span>(&amp;<span class="i">chunk</span>);
    <span class="k">return</span> <span class="a">INTERPRET_COMPILE_ERROR</span>;
  }

  <span class="i">vm</span>.<span class="i">chunk</span> = &amp;<span class="i">chunk</span>;
  <span class="i">vm</span>.<span class="i">ip</span> = <span class="i">vm</span>.<span class="i">chunk</span>-&gt;<span class="i">code</span>;

  <span class="t">InterpretResult</span> <span class="i">result</span> = <span class="i">run</span>();

  <span class="i">freeChunk</span>(&amp;<span class="i">chunk</span>);
  <span class="k">return</span> <span class="i">result</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>interpret</em>(), replace 2 lines</div>

<p>We create a new empty chunk and pass it over to the compiler. The compiler will
take the user&rsquo;s program and fill up the chunk with bytecode. At least, that&rsquo;s
what it will do if the program doesn&rsquo;t have any compile errors. If it does
encounter an error, <code>compile()</code> returns <code>false</code> and we discard the unusable
chunk.</p>
<p>Otherwise, we send the completed chunk over to the VM to be executed. When the
VM finishes, we free the chunk and we&rsquo;re done. As you can see, the signature to
<code>compile()</code> is different now.</p>
<div class="codehilite"><pre class="insert-before">#define clox_compiler_h

</pre><div class="source-file"><em>compiler.h</em><br>
replace 1 line</div>
<pre class="insert"><span class="a">#include &quot;vm.h&quot;</span>

<span class="t">bool</span> <span class="i">compile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>, <span class="t">Chunk</span>* <span class="i">chunk</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>compiler.h</em>, replace 1 line</div>

<p>We pass in the chunk where the compiler will write the code, and then
<code>compile()</code> returns whether or not compilation succeeded. We make the same
change to the signature in the implementation.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;scanner.h&quot;

</pre><div class="source-file"><em>compiler.c</em><br>
function <em>compile</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="t">bool</span> <span class="i">compile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>, <span class="t">Chunk</span>* <span class="i">chunk</span>) {
</pre><pre class="insert-after">  initScanner(source);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>compile</em>(), replace 1 line</div>

<p>That call to <code>initScanner()</code> is the only line that survives this chapter. Rip
out the temporary code we wrote to test the scanner and replace it with these
three lines:</p>
<div class="codehilite"><pre class="insert-before">  initScanner(source);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()<br>
replace 13 lines</div>
<pre class="insert">  <span class="i">advance</span>();
  <span class="i">expression</span>();
  <span class="i">consume</span>(<span class="a">TOKEN_EOF</span>, <span class="s">&quot;Expect end of expression.&quot;</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>(), replace 13 lines</div>

<p>The call to <code>advance()</code> &ldquo;primes the pump&rdquo; on the scanner. We&rsquo;ll see what it does
soon. Then we parse a single expression. We aren&rsquo;t going to do statements yet,
so that&rsquo;s the only subset of the grammar we support. We&rsquo;ll revisit this when we
<a href="global-variables.html">add statements in a few chapters</a>. After we compile the expression, we
should be at the end of the source code, so we check for the sentinel EOF token.</p>
<p>We&rsquo;re going to spend the rest of the chapter making this function work,
especially that little <code>expression()</code> call. Normally, we&rsquo;d dive right into that
function definition and work our way through the implementation from top to
bottom.</p>
<p>This chapter is <span name="blog">different</span>. Pratt&rsquo;s parsing technique is
remarkably simple once you have it all loaded in your head, but it&rsquo;s a little
tricky to break into bite-sized pieces. It&rsquo;s recursive, of course, which is part
of the problem. But it also relies on a big table of data. As we build up the
algorithm, that table grows additional columns.</p>
<aside name="blog">
<p>If this chapter isn&rsquo;t clicking with you and you&rsquo;d like another take on the
concepts, I wrote an article that teaches the same algorithm but using Java and
an object-oriented style: <a href="http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/">&ldquo;Pratt Parsing: Expression Parsing Made Easy&rdquo;</a>.</p>
</aside>
<p>I don&rsquo;t want to revisit 40-something lines of code each time we extend the
table. So we&rsquo;re going to work our way into the core of the parser from the
outside and cover all of the surrounding bits before we get to the juicy center.
This will require a little more patience and mental scratch space than most
chapters, but it&rsquo;s the best I could do.</p>
<h2><a href="#single-pass-compilation" id="single-pass-compilation"><small>17&#8202;.&#8202;1</small>Single-Pass Compilation</a></h2>
<p>A compiler has roughly two jobs. It parses the user&rsquo;s source code to understand
what it means. Then it takes that knowledge and outputs low-level instructions
that produce the same semantics. Many languages split those two roles into two
separate <span name="passes">passes</span> in the implementation. A parser
produces an AST<span class="em">&mdash;</span>just like jlox does<span class="em">&mdash;</span>and then a code generator traverses
the AST and outputs target code.</p>
<aside name="passes">
<p>In fact, most sophisticated optimizing compilers have a heck of a lot more than
two passes. Determining not just <em>what</em> optimization passes to have, but how to
order them to squeeze the most performance out of the compiler<span class="em">&mdash;</span>since the
optimizations often interact in complex ways<span class="em">&mdash;</span>is somewhere between an &ldquo;open
area of research&rdquo; and a &ldquo;dark art&rdquo;.</p>
</aside>
<p>In clox, we&rsquo;re taking an old-school approach and merging these two passes into
one. Back in the day, language hackers did this because computers literally
didn&rsquo;t have enough memory to store an entire source file&rsquo;s AST. We&rsquo;re doing it
because it keeps our compiler simpler, which is a real asset when programming in
C.</p>
<p>Single-pass compilers like we&rsquo;re going to build don&rsquo;t work well for all
languages. Since the compiler has only a peephole view into the user&rsquo;s program
while generating code, the language must be designed such that you don&rsquo;t need
much surrounding context to understand a piece of syntax. Fortunately, tiny,
dynamically typed Lox is <span name="lox">well-suited</span> to that.</p>
<aside name="lox">
<p>Not that this should come as much of a surprise. I did design the language
specifically for this book after all.</p><img src="image/compiling-expressions/keyhole.png" alt="Peering through a keyhole at 'var x;'" />
</aside>
<p>What this means in practical terms is that our &ldquo;compiler&rdquo; C module has
functionality you&rsquo;ll recognize from jlox for parsing<span class="em">&mdash;</span>consuming tokens,
matching expected token types, etc. And it also has functions for code gen<span class="em">&mdash;</span>emitting bytecode and adding constants to the destination chunk. (And it means
I&rsquo;ll use &ldquo;parsing&rdquo; and &ldquo;compiling&rdquo; interchangeably throughout this and later
chapters.)</p>
<p>We&rsquo;ll build the parsing and code generation halves first. Then we&rsquo;ll stitch them
together with the code in the middle that uses Pratt&rsquo;s technique to parse Lox&rsquo;s
particular grammar and output the right bytecode.</p>
<h2><a href="#parsing-tokens" id="parsing-tokens"><small>17&#8202;.&#8202;2</small>Parsing Tokens</a></h2>
<p>First up, the front half of the compiler. This function&rsquo;s name should sound
familiar.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;scanner.h&quot;
</pre><div class="source-file"><em>compiler.c</em></div>
<pre class="insert">

<span class="k">static</span> <span class="t">void</span> <span class="i">advance</span>() {
  <span class="i">parser</span>.<span class="i">previous</span> = <span class="i">parser</span>.<span class="i">current</span>;

  <span class="k">for</span> (;;) {
    <span class="i">parser</span>.<span class="i">current</span> = <span class="i">scanToken</span>();
    <span class="k">if</span> (<span class="i">parser</span>.<span class="i">current</span>.<span class="i">type</span> != <span class="a">TOKEN_ERROR</span>) <span class="k">break</span>;

    <span class="i">errorAtCurrent</span>(<span class="i">parser</span>.<span class="i">current</span>.<span class="i">start</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em></div>

<p>Just like in jlox, it steps forward through the token stream. It asks the
scanner for the next token and stores it for later use. Before doing that, it
takes the old <code>current</code> token and stashes that in a <code>previous</code> field. That will
come in handy later so that we can get at the lexeme after we match a token.</p>
<p>The code to read the next token is wrapped in a loop. Remember, clox&rsquo;s scanner
doesn&rsquo;t report lexical errors. Instead, it creates special <em>error tokens</em> and
leaves it up to the parser to report them. We do that here.</p>
<p>We keep looping, reading tokens and reporting the errors, until we hit a
non-error one or reach the end. That way, the rest of the parser sees only valid
tokens. The current and previous token are stored in this struct:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;scanner.h&quot;
</pre><div class="source-file"><em>compiler.c</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Token</span> <span class="i">current</span>;
  <span class="t">Token</span> <span class="i">previous</span>;
} <span class="t">Parser</span>;

<span class="t">Parser</span> <span class="i">parser</span>;
</pre><pre class="insert-after">

static void advance() {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em></div>

<p>Like we did in other modules, we have a single global variable of this struct
type so we don&rsquo;t need to pass the state around from function to function in the
compiler.</p>
<h3><a href="#handling-syntax-errors" id="handling-syntax-errors"><small>17&#8202;.&#8202;2&#8202;.&#8202;1</small>Handling syntax errors</a></h3>
<p>If the scanner hands us an error token, we need to actually tell the user. That
happens using this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after variable <em>parser</em></div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">errorAtCurrent</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">message</span>) {
  <span class="i">errorAt</span>(&amp;<span class="i">parser</span>.<span class="i">current</span>, <span class="i">message</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after variable <em>parser</em></div>

<p>We pull the location out of the current token in order to tell the user where
the error occurred and forward it to <code>errorAt()</code>. More often, we&rsquo;ll report an
error at the location of the token we just consumed, so we give the shorter name
to this other function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after variable <em>parser</em></div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">error</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">message</span>) {
  <span class="i">errorAt</span>(&amp;<span class="i">parser</span>.<span class="i">previous</span>, <span class="i">message</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after variable <em>parser</em></div>

<p>The actual work happens here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after variable <em>parser</em></div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">errorAt</span>(<span class="t">Token</span>* <span class="i">token</span>, <span class="k">const</span> <span class="t">char</span>* <span class="i">message</span>) {
  <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;[line %d] Error&quot;</span>, <span class="i">token</span>-&gt;<span class="i">line</span>);

  <span class="k">if</span> (<span class="i">token</span>-&gt;<span class="i">type</span> == <span class="a">TOKEN_EOF</span>) {
    <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot; at end&quot;</span>);
  } <span class="k">else</span> <span class="k">if</span> (<span class="i">token</span>-&gt;<span class="i">type</span> == <span class="a">TOKEN_ERROR</span>) {
    <span class="c">// Nothing.</span>
  } <span class="k">else</span> {
    <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot; at &#39;%.*s&#39;&quot;</span>, <span class="i">token</span>-&gt;<span class="i">length</span>, <span class="i">token</span>-&gt;<span class="i">start</span>);
  }

  <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;: %s</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">message</span>);
  <span class="i">parser</span>.<span class="i">hadError</span> = <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after variable <em>parser</em></div>

<p>First, we print where the error occurred. We try to show the lexeme if it&rsquo;s
human-readable. Then we print the error message itself. After that, we set this
<code>hadError</code> flag. That records whether any errors occurred during compilation.
This field also lives in the parser struct.</p>
<div class="codehilite"><pre class="insert-before">  Token previous;
</pre><div class="source-file"><em>compiler.c</em><br>
in struct <em>Parser</em></div>
<pre class="insert">  <span class="t">bool</span> <span class="i">hadError</span>;
</pre><pre class="insert-after">} Parser;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in struct <em>Parser</em></div>

<p>Earlier I said that <code>compile()</code> should return <code>false</code> if an error occurred. Now
we can make it do that.</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_EOF, &quot;Expect end of expression.&quot;);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()</div>
<pre class="insert">  <span class="k">return</span> !<span class="i">parser</span>.<span class="i">hadError</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>()</div>

<p>I&rsquo;ve got another flag to introduce for error handling. We want to avoid error
cascades. If the user has a mistake in their code and the parser gets confused
about where it is in the grammar, we don&rsquo;t want it to spew out a whole pile of
meaningless knock-on errors after the first one.</p>
<p>We fixed that in jlox using panic mode error recovery. In the Java interpreter,
we threw an exception to unwind out of all of the parser code to a point where
we could skip tokens and resynchronize. We don&rsquo;t have <span
name="setjmp">exceptions</span> in C. Instead, we&rsquo;ll do a little smoke and
mirrors. We add a flag to track whether we&rsquo;re currently in panic mode.</p>
<aside name="setjmp">
<p>There is <code>setjmp()</code> and <code>longjmp()</code>, but I&rsquo;d rather not go there. Those make it
too easy to leak memory, forget to maintain invariants, or otherwise have a Very
Bad Day.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  bool hadError;
</pre><div class="source-file"><em>compiler.c</em><br>
in struct <em>Parser</em></div>
<pre class="insert">  <span class="t">bool</span> <span class="i">panicMode</span>;
</pre><pre class="insert-after">} Parser;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in struct <em>Parser</em></div>

<p>When an error occurs, we set it.</p>
<div class="codehilite"><pre class="insert-before">static void errorAt(Token* token, const char* message) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>errorAt</em>()</div>
<pre class="insert">  <span class="i">parser</span>.<span class="i">panicMode</span> = <span class="k">true</span>;
</pre><pre class="insert-after">  fprintf(stderr, &quot;[line %d] Error&quot;, token-&gt;line);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>errorAt</em>()</div>

<p>After that, we go ahead and keep compiling as normal as if the error never
occurred. The bytecode will never get executed, so it&rsquo;s harmless to keep on
trucking. The trick is that while the panic mode flag is set, we simply suppress
any other errors that get detected.</p>
<div class="codehilite"><pre class="insert-before">static void errorAt(Token* token, const char* message) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>errorAt</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">parser</span>.<span class="i">panicMode</span>) <span class="k">return</span>;
</pre><pre class="insert-after">  parser.panicMode = true;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>errorAt</em>()</div>

<p>There&rsquo;s a good chance the parser will go off in the weeds, but the user won&rsquo;t
know because the errors all get swallowed. Panic mode ends when the parser
reaches a synchronization point. For Lox, we chose statement boundaries, so when
we later add those to our compiler, we&rsquo;ll clear the flag there.</p>
<p>These new fields need to be initialized.</p>
<div class="codehilite"><pre class="insert-before">  initScanner(source);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()</div>
<pre class="insert">

  <span class="i">parser</span>.<span class="i">hadError</span> = <span class="k">false</span>;
  <span class="i">parser</span>.<span class="i">panicMode</span> = <span class="k">false</span>;

</pre><pre class="insert-after">  advance();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>()</div>

<p>And to display the errors, we need a standard header.</p>
<div class="codehilite"><pre class="insert-before">#include &lt;stdio.h&gt;
</pre><div class="source-file"><em>compiler.c</em></div>
<pre class="insert"><span class="a">#include &lt;stdlib.h&gt;</span>
</pre><pre class="insert-after">

#include &quot;common.h&quot;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em></div>

<p>There&rsquo;s one last parsing function, another old friend from jlox.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>advance</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">consume</span>(<span class="t">TokenType</span> <span class="i">type</span>, <span class="k">const</span> <span class="t">char</span>* <span class="i">message</span>) {
  <span class="k">if</span> (<span class="i">parser</span>.<span class="i">current</span>.<span class="i">type</span> == <span class="i">type</span>) {
    <span class="i">advance</span>();
    <span class="k">return</span>;
  }

  <span class="i">errorAtCurrent</span>(<span class="i">message</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>advance</em>()</div>

<p>It&rsquo;s similar to <code>advance()</code> in that it reads the next token. But it also
validates that the token has an expected type. If not, it reports an error. This
function is the foundation of most syntax errors in the compiler.</p>
<p>OK, that&rsquo;s enough on the front end for now.</p>
<h2><a href="#emitting-bytecode" id="emitting-bytecode"><small>17&#8202;.&#8202;3</small>Emitting Bytecode</a></h2>
<p>After we parse and understand a piece of the user&rsquo;s program, the next step is to
translate that to a series of bytecode instructions. It starts with the easiest
possible step: appending a single byte to the chunk.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>consume</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">emitByte</span>(<span class="t">uint8_t</span> <span class="i">byte</span>) {
  <span class="i">writeChunk</span>(<span class="i">currentChunk</span>(), <span class="i">byte</span>, <span class="i">parser</span>.<span class="i">previous</span>.<span class="i">line</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>consume</em>()</div>

<p>It&rsquo;s hard to believe great things will flow through such a simple function. It
writes the given byte, which may be an opcode or an operand to an instruction.
It sends in the previous token&rsquo;s line information so that runtime errors are
associated with that line.</p>
<p>The chunk that we&rsquo;re writing gets passed into <code>compile()</code>, but it needs to make
its way to <code>emitByte()</code>. To do that, we rely on this intermediary function:</p>
<div class="codehilite"><pre class="insert-before">Parser parser;
</pre><div class="source-file"><em>compiler.c</em><br>
add after variable <em>parser</em></div>
<pre class="insert"><span class="t">Chunk</span>* <span class="i">compilingChunk</span>;

<span class="k">static</span> <span class="t">Chunk</span>* <span class="i">currentChunk</span>() {
  <span class="k">return</span> <span class="i">compilingChunk</span>;
}

</pre><pre class="insert-after">static void errorAt(Token* token, const char* message) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after variable <em>parser</em></div>

<p>Right now, the chunk pointer is stored in a module-level variable like we store
other global state. Later, when we start compiling user-defined functions, the
notion of &ldquo;current chunk&rdquo; gets more complicated. To avoid having to go back and
change a lot of code, I encapsulate that logic in the <code>currentChunk()</code> function.</p>
<p>We initialize this new module variable before we write any bytecode:</p>
<div class="codehilite"><pre class="insert-before">bool compile(const char* source, Chunk* chunk) {
  initScanner(source);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()</div>
<pre class="insert">  <span class="i">compilingChunk</span> = <span class="i">chunk</span>;
</pre><pre class="insert-after">

  parser.hadError = false;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>()</div>

<p>Then, at the very end, when we&rsquo;re done compiling the chunk, we wrap things up.</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_EOF, &quot;Expect end of expression.&quot;);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()</div>
<pre class="insert">  <span class="i">endCompiler</span>();
</pre><pre class="insert-after">  return !parser.hadError;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>()</div>

<p>That calls this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitByte</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">endCompiler</span>() {
  <span class="i">emitReturn</span>();
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitByte</em>()</div>

<p>In this chapter, our VM deals only with expressions. When you run clox, it will
parse, compile, and execute a single expression, then print the result. To print
that value, we are temporarily using the <code>OP_RETURN</code> instruction. So we have the
compiler add one of those to the end of the chunk.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitByte</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">emitReturn</span>() {
  <span class="i">emitByte</span>(<span class="a">OP_RETURN</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitByte</em>()</div>

<p>While we&rsquo;re here in the back end we may as well make our lives easier.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitByte</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">emitBytes</span>(<span class="t">uint8_t</span> <span class="i">byte1</span>, <span class="t">uint8_t</span> <span class="i">byte2</span>) {
  <span class="i">emitByte</span>(<span class="i">byte1</span>);
  <span class="i">emitByte</span>(<span class="i">byte2</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitByte</em>()</div>

<p>Over time, we&rsquo;ll have enough cases where we need to write an opcode followed by
a one-byte operand that it&rsquo;s worth defining this convenience function.</p>
<h2><a href="#parsing-prefix-expressions" id="parsing-prefix-expressions"><small>17&#8202;.&#8202;4</small>Parsing Prefix Expressions</a></h2>
<p>We&rsquo;ve assembled our parsing and code generation utility functions. The missing
piece is the code in the middle that connects those together.</p><img src="image/compiling-expressions/mystery.png" alt="Parsing functions on the left, bytecode emitting functions on the right. What goes in the middle?" />
<p>The only step in <code>compile()</code> that we have left to implement is this function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>endCompiler</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">expression</span>() {
  <span class="c">// What goes here?</span>
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>endCompiler</em>()</div>

<p>We aren&rsquo;t ready to implement every kind of expression in Lox yet. Heck, we don&rsquo;t
even have Booleans. For this chapter, we&rsquo;re only going to worry about four:</p>
<ul>
<li>Number literals: <code>123</code></li>
<li>Parentheses for grouping: <code>(123)</code></li>
<li>Unary negation: <code>-123</code></li>
<li>The Four Horsemen of the Arithmetic: <code>+</code>, <code>-</code>, <code>*</code>, <code>/</code></li>
</ul>
<p>As we work through the functions to compile each of those kinds of expressions,
we&rsquo;ll also assemble the requirements for the table-driven parser that calls
them.</p>
<h3><a href="#parsers-for-tokens" id="parsers-for-tokens"><small>17&#8202;.&#8202;4&#8202;.&#8202;1</small>Parsers for tokens</a></h3>
<p>For now, let&rsquo;s focus on the Lox expressions that are each only a single token.
In this chapter, that&rsquo;s just number literals, but there will be more later. Here&rsquo;s
how we can compile them:</p>
<p>We map each token type to a different kind of expression. We define a function
for each expression that outputs the appropriate bytecode. Then we build an
array of function pointers. The indexes in the array correspond to the
<code>TokenType</code> enum values, and the function at each index is the code to compile
an expression of that token type.</p>
<p>To compile number literals, we store a pointer to the following function at the
<code>TOKEN_NUMBER</code> index in the array.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>endCompiler</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">number</span>() {
  <span class="t">double</span> <span class="i">value</span> = <span class="i">strtod</span>(<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">start</span>, <span class="a">NULL</span>);
  <span class="i">emitConstant</span>(<span class="i">value</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>endCompiler</em>()</div>

<p>We assume the token for the number literal has already been consumed and is
stored in <code>previous</code>. We take that lexeme and use the C standard library to
convert it to a double value. Then we generate the code to load that value using
this function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitReturn</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">emitConstant</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="i">emitBytes</span>(<span class="a">OP_CONSTANT</span>, <span class="i">makeConstant</span>(<span class="i">value</span>));
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitReturn</em>()</div>

<p>First, we add the value to the constant table, then we emit an <code>OP_CONSTANT</code>
instruction that pushes it onto the stack at runtime. To insert an entry in the
constant table, we rely on:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitReturn</em>()</div>
<pre><span class="k">static</span> <span class="t">uint8_t</span> <span class="i">makeConstant</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="t">int</span> <span class="i">constant</span> = <span class="i">addConstant</span>(<span class="i">currentChunk</span>(), <span class="i">value</span>);
  <span class="k">if</span> (<span class="i">constant</span> &gt; <span class="a">UINT8_MAX</span>) {
    <span class="i">error</span>(<span class="s">&quot;Too many constants in one chunk.&quot;</span>);
    <span class="k">return</span> <span class="n">0</span>;
  }

  <span class="k">return</span> (<span class="t">uint8_t</span>)<span class="i">constant</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitReturn</em>()</div>

<p>Most of the work happens in <code>addConstant()</code>, which we defined back in an
<a href="chunks-of-bytecode.html">earlier chapter</a>. That adds the given value to the end of the chunk&rsquo;s
constant table and returns its index. The new function&rsquo;s job is mostly to make
sure we don&rsquo;t have too many constants. Since the <code>OP_CONSTANT</code> instruction uses
a single byte for the index operand, we can store and load only up to <span
name="256">256</span> constants in a chunk.</p>
<aside name="256">
<p>Yes, that limit is pretty low. If this were a full-sized language
implementation, we&rsquo;d want to add another instruction like <code>OP_CONSTANT_16</code> that
stores the index as a two-byte operand so we could handle more constants when
needed.</p>
<p>The code to support that isn&rsquo;t particularly illuminating, so I omitted it from
clox, but you&rsquo;ll want your VMs to scale to larger programs.</p>
</aside>
<p>That&rsquo;s basically all it takes. Provided there is some suitable code that
consumes a <code>TOKEN_NUMBER</code> token, looks up <code>number()</code> in the function pointer
array, and then calls it, we can now compile number literals to bytecode.</p>
<h3><a href="#parentheses-for-grouping" id="parentheses-for-grouping"><small>17&#8202;.&#8202;4&#8202;.&#8202;2</small>Parentheses for grouping</a></h3>
<p>Our as-yet-imaginary array of parsing function pointers would be great if every
expression was only a single token long. Alas, most are longer. However, many
expressions <em>start</em> with a particular token. We call these <em>prefix</em> expressions.
For example, when we&rsquo;re parsing an expression and the current token is <code>(</code>, we
know we must be looking at a parenthesized grouping expression.</p>
<p>It turns out our function pointer array handles those too. The parsing function
for an expression type can consume any additional tokens that it wants to, just
like in a regular recursive descent parser. Here&rsquo;s how parentheses work:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>endCompiler</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">grouping</span>() {
  <span class="i">expression</span>();
  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after expression.&quot;</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>endCompiler</em>()</div>

<p>Again, we assume the initial <code>(</code> has already been consumed. We <span
name="recursive">recursively</span> call back into <code>expression()</code> to compile the
expression between the parentheses, then parse the closing <code>)</code> at the end.</p>
<aside name="recursive">
<p>A Pratt parser isn&rsquo;t a recursive <em>descent</em> parser, but it&rsquo;s still recursive.
That&rsquo;s to be expected since the grammar itself is recursive.</p>
</aside>
<p>As far as the back end is concerned, there&rsquo;s literally nothing to a grouping
expression. Its sole function is syntactic<span class="em">&mdash;</span>it lets you insert a
lower-precedence expression where a higher precedence is expected. Thus, it has
no runtime semantics on its own and therefore doesn&rsquo;t emit any bytecode. The
inner call to <code>expression()</code> takes care of generating bytecode for the
expression inside the parentheses.</p>
<h3><a href="#unary-negation" id="unary-negation"><small>17&#8202;.&#8202;4&#8202;.&#8202;3</small>Unary negation</a></h3>
<p>Unary minus is also a prefix expression, so it works with our model too.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>number</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">unary</span>() {
  <span class="t">TokenType</span> <span class="i">operatorType</span> = <span class="i">parser</span>.<span class="i">previous</span>.<span class="i">type</span>;

  <span class="c">// Compile the operand.</span>
  <span class="i">expression</span>();

  <span class="c">// Emit the operator instruction.</span>
  <span class="k">switch</span> (<span class="i">operatorType</span>) {
    <span class="k">case</span> <span class="a">TOKEN_MINUS</span>: <span class="i">emitByte</span>(<span class="a">OP_NEGATE</span>); <span class="k">break</span>;
    <span class="k">default</span>: <span class="k">return</span>; <span class="c">// Unreachable.</span>
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>number</em>()</div>

<p>The leading <code>-</code> token has been consumed and is sitting in <code>parser.previous</code>. We
grab the token type from that to note which unary operator we&rsquo;re dealing with.
It&rsquo;s unnecessary right now, but this will make more sense when we use this same
function to compile the <code>!</code> operator in <a href="types-of-values.html">the next chapter</a>.</p>
<p>As in <code>grouping()</code>, we recursively call <code>expression()</code> to compile the operand.
After that, we emit the bytecode to perform the negation. It might seem a little
weird to write the negate instruction <em>after</em> its operand&rsquo;s bytecode since the
<code>-</code> appears on the left, but think about it in terms of order of execution:</p>
<ol>
<li>
<p>We evaluate the operand first which leaves its value on the stack.</p>
</li>
<li>
<p>Then we pop that value, negate it, and push the result.</p>
</li>
</ol>
<p>So the <code>OP_NEGATE</code> instruction should be emitted <span name="line">last</span>.
This is part of the compiler&rsquo;s job<span class="em">&mdash;</span>parsing the program in the order it
appears in the source code and rearranging it into the order that execution
happens.</p>
<aside name="line">
<p>Emitting the <code>OP_NEGATE</code> instruction after the operands does mean that the
current token when the bytecode is written is <em>not</em> the <code>-</code> token. That mostly
doesn&rsquo;t matter, except that we use that token for the line number to associate
with that instruction.</p>
<p>This means if you have a multi-line negation expression, like:</p>
<div class="codehilite"><pre><span class="k">print</span> -
  <span class="k">true</span>;
</pre></div>
<p>Then the runtime error will be reported on the wrong line. Here, it would show
the error on line 2, even though the <code>-</code> is on line 1. A more robust approach
would be to store the token&rsquo;s line before compiling the operand and then pass
that into <code>emitByte()</code>, but I wanted to keep things simple for the book.</p>
</aside>
<p>There is one problem with this code, though. The <code>expression()</code> function it
calls will parse any expression for the operand, regardless of precedence. Once
we add binary operators and other syntax, that will do the wrong thing.
Consider:</p>
<div class="codehilite"><pre>-<span class="i">a</span>.<span class="i">b</span> + <span class="i">c</span>;
</pre></div>
<p>Here, the operand to <code>-</code> should be just the <code>a.b</code> expression, not the entire
<code>a.b + c</code>. But if <code>unary()</code> calls <code>expression()</code>, the latter will happily chew
through all of the remaining code including the <code>+</code>. It will erroneously treat
the <code>-</code> as lower precedence than the <code>+</code>.</p>
<p>When parsing the operand to unary <code>-</code>, we need to compile only expressions at a
certain precedence level or higher. In jlox&rsquo;s recursive descent parser we
accomplished that by calling into the parsing method for the lowest-precedence
expression we wanted to allow (in this case, <code>call()</code>). Each method for parsing
a specific expression also parsed any expressions of higher precedence too, so
that included the rest of the precedence table.</p>
<p>The parsing functions like <code>number()</code> and <code>unary()</code> here in clox are different.
Each only parses exactly one type of expression. They don&rsquo;t cascade to include
higher-precedence expression types too. We need a different solution, and it
looks like this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>unary</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">parsePrecedence</span>(<span class="t">Precedence</span> <span class="i">precedence</span>) {
  <span class="c">// What goes here?</span>
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>unary</em>()</div>

<p>This function<span class="em">&mdash;</span>once we implement it<span class="em">&mdash;</span>starts at the current token and parses
any expression at the given precedence level or higher. We have some other setup
to get through before we can write the body of this function, but you can
probably guess that it will use that table of parsing function pointers I&rsquo;ve
been talking about. For now, don&rsquo;t worry too much about how it works. In order
to take the &ldquo;precedence&rdquo; as a parameter, we define it numerically.</p>
<div class="codehilite"><pre class="insert-before">} Parser;
</pre><div class="source-file"><em>compiler.c</em><br>
add after struct <em>Parser</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">enum</span> {
  <span class="a">PREC_NONE</span>,
  <span class="a">PREC_ASSIGNMENT</span>,  <span class="c">// =</span>
  <span class="a">PREC_OR</span>,          <span class="c">// or</span>
  <span class="a">PREC_AND</span>,         <span class="c">// and</span>
  <span class="a">PREC_EQUALITY</span>,    <span class="c">// == !=</span>
  <span class="a">PREC_COMPARISON</span>,  <span class="c">// &lt; &gt; &lt;= &gt;=</span>
  <span class="a">PREC_TERM</span>,        <span class="c">// + -</span>
  <span class="a">PREC_FACTOR</span>,      <span class="c">// * /</span>
  <span class="a">PREC_UNARY</span>,       <span class="c">// ! -</span>
  <span class="a">PREC_CALL</span>,        <span class="c">// . ()</span>
  <span class="a">PREC_PRIMARY</span>
} <span class="t">Precedence</span>;
</pre><pre class="insert-after">

Parser parser;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after struct <em>Parser</em></div>

<p>These are all of Lox&rsquo;s precedence levels in order from lowest to highest. Since
C implicitly gives successively larger numbers for enums, this means that
<code>PREC_CALL</code> is numerically larger than <code>PREC_UNARY</code>. For example, say the
compiler is sitting on a chunk of code like:</p>
<div class="codehilite"><pre>-<span class="i">a</span>.<span class="i">b</span> + <span class="i">c</span>
</pre></div>
<p>If we call <code>parsePrecedence(PREC_ASSIGNMENT)</code>, then it will parse the entire
expression because <code>+</code> has higher precedence than assignment. If instead we
call <code>parsePrecedence(PREC_UNARY)</code>, it will compile the <code>-a.b</code> and stop there.
It doesn&rsquo;t keep going through the <code>+</code> because the addition has lower precedence
than unary operators.</p>
<p>With this function in hand, it&rsquo;s a snap to fill in the missing body for
<code>expression()</code>.</p>
<div class="codehilite"><pre class="insert-before">static void expression() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>expression</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="i">parsePrecedence</span>(<span class="a">PREC_ASSIGNMENT</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>expression</em>(), replace 1 line</div>

<p>We simply parse the lowest precedence level, which subsumes all of the
higher-precedence expressions too. Now, to compile the operand for a unary
expression, we call this new function and limit it to the appropriate level:</p>
<div class="codehilite"><pre class="insert-before">  // Compile the operand.
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>unary</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="i">parsePrecedence</span>(<span class="a">PREC_UNARY</span>);
</pre><pre class="insert-after">

  // Emit the operator instruction.
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>unary</em>(), replace 1 line</div>

<p>We use the unary operator&rsquo;s own <code>PREC_UNARY</code> precedence to permit <span
name="useful">nested</span> unary expressions like <code>!!doubleNegative</code>. Since
unary operators have pretty high precedence, that correctly excludes things like
binary operators. Speaking of which<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<aside name="useful">
<p>Not that nesting unary expressions is particularly useful in Lox. But other
languages let you do it, so we do too.</p>
</aside>
<h2><a href="#parsing-infix-expressions" id="parsing-infix-expressions"><small>17&#8202;.&#8202;5</small>Parsing Infix Expressions</a></h2>
<p>Binary operators are different from the previous expressions because they are
<em>infix</em>. With the other expressions, we know what we are parsing from the very
first token. With infix expressions, we don&rsquo;t know we&rsquo;re in the middle of a
binary operator until <em>after</em> we&rsquo;ve parsed its left operand and then stumbled
onto the operator token in the middle.</p>
<p>Here&rsquo;s an example:</p>
<div class="codehilite"><pre><span class="n">1</span> + <span class="n">2</span>
</pre></div>
<p>Let&rsquo;s walk through trying to compile it with what we know so far:</p>
<ol>
<li>
<p>We call <code>expression()</code>. That in turn calls
<code>parsePrecedence(PREC_ASSIGNMENT)</code>.</p>
</li>
<li>
<p>That function (once we implement it) sees the leading number token and
recognizes it is parsing a number literal. It hands off control to
<code>number()</code>.</p>
</li>
<li>
<p><code>number()</code> creates a constant, emits an <code>OP_CONSTANT</code>, and returns back to
<code>parsePrecedence()</code>.</p>
</li>
</ol>
<p>Now what? The call to <code>parsePrecedence()</code> should consume the entire addition
expression, so it needs to keep going somehow. Fortunately, the parser is right
where we need it to be. Now that we&rsquo;ve compiled the leading number expression,
the next token is <code>+</code>. That&rsquo;s the exact token that <code>parsePrecedence()</code> needs to
detect that we&rsquo;re in the middle of an infix expression and to realize that the
expression we already compiled is actually an operand to that.</p>
<p>So this hypothetical array of function pointers doesn&rsquo;t just list functions to
parse expressions that start with a given token. Instead, it&rsquo;s a <em>table</em> of
function pointers. One column associates prefix parser functions with token
types. The second column associates infix parser functions with token types.</p>
<p>The function we will use as the infix parser for <code>TOKEN_PLUS</code>, <code>TOKEN_MINUS</code>,
<code>TOKEN_STAR</code>, and <code>TOKEN_SLASH</code> is this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>endCompiler</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">binary</span>() {
  <span class="t">TokenType</span> <span class="i">operatorType</span> = <span class="i">parser</span>.<span class="i">previous</span>.<span class="i">type</span>;
  <span class="t">ParseRule</span>* <span class="i">rule</span> = <span class="i">getRule</span>(<span class="i">operatorType</span>);
  <span class="i">parsePrecedence</span>((<span class="t">Precedence</span>)(<span class="i">rule</span>-&gt;<span class="i">precedence</span> + <span class="n">1</span>));

  <span class="k">switch</span> (<span class="i">operatorType</span>) {
    <span class="k">case</span> <span class="a">TOKEN_PLUS</span>:          <span class="i">emitByte</span>(<span class="a">OP_ADD</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_MINUS</span>:         <span class="i">emitByte</span>(<span class="a">OP_SUBTRACT</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_STAR</span>:          <span class="i">emitByte</span>(<span class="a">OP_MULTIPLY</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_SLASH</span>:         <span class="i">emitByte</span>(<span class="a">OP_DIVIDE</span>); <span class="k">break</span>;
    <span class="k">default</span>: <span class="k">return</span>; <span class="c">// Unreachable.</span>
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>endCompiler</em>()</div>

<p>When a prefix parser function is called, the leading token has already been
consumed. An infix parser function is even more <em>in medias res</em><span class="em">&mdash;</span>the entire
left-hand operand expression has already been compiled and the subsequent infix
operator consumed.</p>
<p>The fact that the left operand gets compiled first works out fine. It means at
runtime, that code gets executed first. When it runs, the value it produces will
end up on the stack. That&rsquo;s right where the infix operator is going to need it.</p>
<p>Then we come here to <code>binary()</code> to handle the rest of the arithmetic operators.
This function compiles the right operand, much like how <code>unary()</code> compiles its
own trailing operand. Finally, it emits the bytecode instruction that performs
the binary operation.</p>
<p>When run, the VM will execute the left and right operand code, in that order,
leaving their values on the stack. Then it executes the instruction for the
operator. That pops the two values, computes the operation, and pushes the
result.</p>
<p>The code that probably caught your eye here is that <code>getRule()</code> line. When we
parse the right-hand operand, we again need to worry about precedence. Take an
expression like:</p>
<div class="codehilite"><pre><span class="n">2</span> * <span class="n">3</span> + <span class="n">4</span>
</pre></div>
<p>When we parse the right operand of the <code>*</code> expression, we need to just capture
<code>3</code>, and not <code>3 + 4</code>, because <code>+</code> is lower precedence than <code>*</code>. We could define
a separate function for each binary operator. Each would call
<code>parsePrecedence()</code> and pass in the correct precedence level for its operand.</p>
<p>But that&rsquo;s kind of tedious. Each binary operator&rsquo;s right-hand operand precedence
is one level <span name="higher">higher</span> than its own. We can look that up
dynamically with this <code>getRule()</code> thing we&rsquo;ll get to soon. Using that, we call
<code>parsePrecedence()</code> with one level higher than this operator&rsquo;s level.</p>
<aside name="higher">
<p>We use one <em>higher</em> level of precedence for the right operand because the binary
operators are left-associative. Given a series of the <em>same</em> operator, like:</p>
<div class="codehilite"><pre><span class="n">1</span> + <span class="n">2</span> + <span class="n">3</span> + <span class="n">4</span>
</pre></div>
<p>We want to parse it like:</p>
<div class="codehilite"><pre>((<span class="n">1</span> + <span class="n">2</span>) + <span class="n">3</span>) + <span class="n">4</span>
</pre></div>
<p>Thus, when parsing the right-hand operand to the first <code>+</code>, we want to consume
the <code>2</code>, but not the rest, so we use one level above <code>+</code>&rsquo;s precedence. But if
our operator was <em>right</em>-associative, this would be wrong. Given:</p>
<div class="codehilite"><pre><span class="i">a</span> = <span class="i">b</span> = <span class="i">c</span> = <span class="i">d</span>
</pre></div>
<p>Since assignment is right-associative, we want to parse it as:</p>
<div class="codehilite"><pre><span class="i">a</span> = (<span class="i">b</span> = (<span class="i">c</span> = <span class="i">d</span>))
</pre></div>
<p>To enable that, we would call <code>parsePrecedence()</code> with the <em>same</em> precedence as
the current operator.</p>
</aside>
<p>This way, we can use a single <code>binary()</code> function for all binary operators even
though they have different precedences.</p>
<h2><a href="#a-pratt-parser" id="a-pratt-parser"><small>17&#8202;.&#8202;6</small>A Pratt Parser</a></h2>
<p>We now have all of the pieces and parts of the compiler laid out. We have a
function for each grammar production: <code>number()</code>, <code>grouping()</code>, <code>unary()</code>, and
<code>binary()</code>. We still need to implement <code>parsePrecedence()</code>, and <code>getRule()</code>. We
also know we need a table that, given a token type, lets us find</p>
<ul>
<li>
<p>the function to compile a prefix expression starting with a token of that
type,</p>
</li>
<li>
<p>the function to compile an infix expression whose left operand is followed
by a token of that type, and</p>
</li>
<li>
<p>the precedence of an <span name="prefix">infix</span> expression that uses
that token as an operator.</p>
</li>
</ul>
<aside name="prefix">
<p>We don&rsquo;t need to track the precedence of the <em>prefix</em> expression starting with a
given token because all prefix operators in Lox have the same precedence.</p>
</aside>
<p>We wrap these three properties in a little struct which represents a single row
in the parser table.</p>
<div class="codehilite"><pre class="insert-before">} Precedence;
</pre><div class="source-file"><em>compiler.c</em><br>
add after enum <em>Precedence</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">ParseFn</span> <span class="i">prefix</span>;
  <span class="t">ParseFn</span> <span class="i">infix</span>;
  <span class="t">Precedence</span> <span class="i">precedence</span>;
} <span class="t">ParseRule</span>;
</pre><pre class="insert-after">

Parser parser;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after enum <em>Precedence</em></div>

<p>That ParseFn type is a simple <span name="typedef">typedef</span> for a function
type that takes no arguments and returns nothing.</p>
<aside name="typedef" class="bottom">
<p>C&rsquo;s syntax for function pointer types is so bad that I always hide it behind a
typedef. I understand the intent behind the syntax<span class="em">&mdash;</span>the whole &ldquo;declaration
reflects use&rdquo; thing<span class="em">&mdash;</span>but I think it was a failed syntactic experiment.</p>
</aside>
<div class="codehilite"><pre class="insert-before">} Precedence;
</pre><div class="source-file"><em>compiler.c</em><br>
add after enum <em>Precedence</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="t">void</span> (*<span class="t">ParseFn</span>)();
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after enum <em>Precedence</em></div>

<p>The table that drives our whole parser is an array of ParseRules. We&rsquo;ve been
talking about it forever, and finally you get to see it.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>unary</em>()</div>
<pre><span class="t">ParseRule</span> <span class="i">rules</span>[] = {
  [<span class="a">TOKEN_LEFT_PAREN</span>]    = {<span class="i">grouping</span>, <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_RIGHT_PAREN</span>]   = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_LEFT_BRACE</span>]    = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},<span name="big"> </span>
  [<span class="a">TOKEN_RIGHT_BRACE</span>]   = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_COMMA</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_DOT</span>]           = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_MINUS</span>]         = {<span class="i">unary</span>,    <span class="i">binary</span>, <span class="a">PREC_TERM</span>},
  [<span class="a">TOKEN_PLUS</span>]          = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_TERM</span>},
  [<span class="a">TOKEN_SEMICOLON</span>]     = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_SLASH</span>]         = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_FACTOR</span>},
  [<span class="a">TOKEN_STAR</span>]          = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_FACTOR</span>},
  [<span class="a">TOKEN_BANG</span>]          = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_BANG_EQUAL</span>]    = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_EQUAL</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_EQUAL_EQUAL</span>]   = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_GREATER</span>]       = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_GREATER_EQUAL</span>] = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_LESS</span>]          = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_LESS_EQUAL</span>]    = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_IDENTIFIER</span>]    = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_STRING</span>]        = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_NUMBER</span>]        = {<span class="i">number</span>,   <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_AND</span>]           = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_CLASS</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_ELSE</span>]          = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_FALSE</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_FOR</span>]           = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_FUN</span>]           = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_IF</span>]            = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_NIL</span>]           = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_OR</span>]            = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_PRINT</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_RETURN</span>]        = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_SUPER</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_THIS</span>]          = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_TRUE</span>]          = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_VAR</span>]           = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_WHILE</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_ERROR</span>]         = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
  [<span class="a">TOKEN_EOF</span>]           = {<span class="a">NULL</span>,     <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
};
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>unary</em>()</div>

<aside name="big">
<p>See what I mean about not wanting to revisit the table each time we needed a new
column? It&rsquo;s a beast.</p>
<p>If you haven&rsquo;t seen the <code>[TOKEN_DOT] =</code> syntax in a C array literal, that is
C99&rsquo;s designated initializer syntax. It&rsquo;s clearer than having to count array
indexes by hand.</p>
</aside>
<p>You can see how <code>grouping</code> and <code>unary</code> are slotted into the prefix parser column
for their respective token types. In the next column, <code>binary</code> is wired up to
the four arithmetic infix operators. Those infix operators also have their
precedences set in the last column.</p>
<p>Aside from those, the rest of the table is full of <code>NULL</code> and <code>PREC_NONE</code>. Most
of those empty cells are because there is no expression associated with those
tokens. You can&rsquo;t start an expression with, say, <code>else</code>, and <code>}</code> would make for
a pretty confusing infix operator.</p>
<p>But, also, we haven&rsquo;t filled in the entire grammar yet. In later chapters, as we
add new expression types, some of these slots will get functions in them. One of
the things I like about this approach to parsing is that it makes it very easy
to see which tokens are in use by the grammar and which are available.</p>
<p>Now that we have the table, we are finally ready to write the code that uses it.
This is where our Pratt parser comes to life. The easiest function to define is
<code>getRule()</code>.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>parsePrecedence</em>()</div>
<pre><span class="k">static</span> <span class="t">ParseRule</span>* <span class="i">getRule</span>(<span class="t">TokenType</span> <span class="i">type</span>) {
  <span class="k">return</span> &amp;<span class="i">rules</span>[<span class="i">type</span>];
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>parsePrecedence</em>()</div>

<p>It simply returns the rule at the given index. It&rsquo;s called by <code>binary()</code> to look
up the precedence of the current operator. This function exists solely to handle
a declaration cycle in the C code. <code>binary()</code> is defined <em>before</em> the rules
table so that the table can store a pointer to it. That means the body of
<code>binary()</code> cannot access the table directly.</p>
<p>Instead, we wrap the lookup in a function. That lets us forward declare
<code>getRule()</code> before the definition of <code>binary()</code>, and <span
name="forward">then</span> <em>define</em> <code>getRule()</code> after the table. We&rsquo;ll need a
couple of other forward declarations to handle the fact that our grammar is
recursive, so let&rsquo;s get them all out of the way.</p>
<aside name="forward">
<p>This is what happens when you write your VM in a language that was designed to
be compiled on a PDP-11.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  emitReturn();
}
</pre><div class="source-file"><em>compiler.c</em><br>
add after <em>endCompiler</em>()</div>
<pre class="insert">

<span class="k">static</span> <span class="t">void</span> <span class="i">expression</span>();
<span class="k">static</span> <span class="t">ParseRule</span>* <span class="i">getRule</span>(<span class="t">TokenType</span> <span class="i">type</span>);
<span class="k">static</span> <span class="t">void</span> <span class="i">parsePrecedence</span>(<span class="t">Precedence</span> <span class="i">precedence</span>);

</pre><pre class="insert-after">static void binary() {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>endCompiler</em>()</div>

<p>If you&rsquo;re following along and implementing clox yourself, pay close attention to
the little annotations that tell you where to put these code snippets. Don&rsquo;t
worry, though, if you get it wrong, the C compiler will be happy to tell you.</p>
<h3><a href="#parsing-with-precedence" id="parsing-with-precedence"><small>17&#8202;.&#8202;6&#8202;.&#8202;1</small>Parsing with precedence</a></h3>
<p>Now we&rsquo;re getting to the fun stuff. The maestro that orchestrates all of the
parsing functions we&rsquo;ve defined is <code>parsePrecedence()</code>. Let&rsquo;s start with parsing
prefix expressions.</p>
<div class="codehilite"><pre class="insert-before">static void parsePrecedence(Precedence precedence) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>parsePrecedence</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="i">advance</span>();
  <span class="t">ParseFn</span> <span class="i">prefixRule</span> = <span class="i">getRule</span>(<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">type</span>)-&gt;<span class="i">prefix</span>;
  <span class="k">if</span> (<span class="i">prefixRule</span> == <span class="a">NULL</span>) {
    <span class="i">error</span>(<span class="s">&quot;Expect expression.&quot;</span>);
    <span class="k">return</span>;
  }

  <span class="i">prefixRule</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>parsePrecedence</em>(), replace 1 line</div>

<p>We read the next token and look up the corresponding ParseRule. If there is no
prefix parser, then the token must be a syntax error. We report that and return
to the caller.</p>
<p>Otherwise, we call that prefix parse function and let it do its thing. That
prefix parser compiles the rest of the prefix expression, consuming any other
tokens it needs, and returns back here. Infix expressions are where it gets
interesting since precedence comes into play. The implementation is remarkably
simple.</p>
<div class="codehilite"><pre class="insert-before">  prefixRule();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>parsePrecedence</em>()</div>
<pre class="insert">

  <span class="k">while</span> (<span class="i">precedence</span> &lt;= <span class="i">getRule</span>(<span class="i">parser</span>.<span class="i">current</span>.<span class="i">type</span>)-&gt;<span class="i">precedence</span>) {
    <span class="i">advance</span>();
    <span class="t">ParseFn</span> <span class="i">infixRule</span> = <span class="i">getRule</span>(<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">type</span>)-&gt;<span class="i">infix</span>;
    <span class="i">infixRule</span>();
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>parsePrecedence</em>()</div>

<p>That&rsquo;s the whole thing. Really. Here&rsquo;s how the entire function works: At the
beginning of <code>parsePrecedence()</code>, we look up a prefix parser for the current
token. The first token is <em>always</em> going to belong to some kind of prefix
expression, by definition. It may turn out to be nested as an operand inside one
or more infix expressions, but as you read the code from left to right, the
first token you hit always belongs to a prefix expression.</p>
<p>After parsing that, which may consume more tokens, the prefix expression is
done. Now we look for an infix parser for the next token. If we find one, it
means the prefix expression we already compiled might be an operand for it. But
only if the call to <code>parsePrecedence()</code> has a <code>precedence</code> that is low enough to
permit that infix operator.</p>
<p>If the next token is too low precedence, or isn&rsquo;t an infix operator at all,
we&rsquo;re done. We&rsquo;ve parsed as much expression as we can. Otherwise, we consume the
operator and hand off control to the infix parser we found. It consumes whatever
other tokens it needs (usually the right operand) and returns back to
<code>parsePrecedence()</code>. Then we loop back around and see if the <em>next</em> token is
also a valid infix operator that can take the entire preceding expression as its
operand. We keep looping like that, crunching through infix operators and their
operands until we hit a token that isn&rsquo;t an infix operator or is too low
precedence and stop.</p>
<p>That&rsquo;s a lot of prose, but if you really want to mind meld with Vaughan Pratt
and fully understand the algorithm, step through the parser in your debugger as
it works through some expressions. Maybe a picture will help. There&rsquo;s only a
handful of functions, but they are marvelously intertwined:</p>
<p><span name="connections"></span></p>
<p><img src="image/compiling-expressions/connections.png" alt="The various parsing
functions and how they call each other." /></p>
<aside name="connections">
<p>The <img src="image/compiling-expressions/calls.png" alt="A solid arrow."
class="arrow" /> arrow connects a function to another function it directly
calls. The <img src="image/compiling-expressions/points-to.png" alt="An open
arrow." class="arrow" /> arrow shows the table&rsquo;s pointers to the parsing
functions.</p>
</aside>
<p>Later, we&rsquo;ll need to tweak the code in this chapter to handle assignment. But,
otherwise, what we wrote covers all of our expression compiling needs for the
rest of the book. We&rsquo;ll plug additional parsing functions into the table when we
add new kinds of expressions, but <code>parsePrecedence()</code> is complete.</p>
<h2><a href="#dumping-chunks" id="dumping-chunks"><small>17&#8202;.&#8202;7</small>Dumping Chunks</a></h2>
<p>While we&rsquo;re here in the core of our compiler, we should put in some
instrumentation. To help debug the generated bytecode, we&rsquo;ll add support for
dumping the chunk once the compiler finishes. We had some temporary logging
earlier when we hand-authored the chunk. Now we&rsquo;ll put in some real code so that
we can enable it whenever we want.</p>
<p>Since this isn&rsquo;t for end users, we hide it behind a flag.</p>
<div class="codehilite"><pre class="insert-before">#include &lt;stdint.h&gt;

</pre><div class="source-file"><em>common.h</em></div>
<pre class="insert"><span class="a">#define DEBUG_PRINT_CODE</span>
</pre><pre class="insert-after">#define DEBUG_TRACE_EXECUTION
</pre></div>
<div class="source-file-narrow"><em>common.h</em></div>

<p>When that flag is defined, we use our existing &ldquo;debug&rdquo; module to print out the
chunk&rsquo;s bytecode.</p>
<div class="codehilite"><pre class="insert-before">  emitReturn();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>endCompiler</em>()</div>
<pre class="insert"><span class="a">#ifdef DEBUG_PRINT_CODE</span>
  <span class="k">if</span> (!<span class="i">parser</span>.<span class="i">hadError</span>) {
    <span class="i">disassembleChunk</span>(<span class="i">currentChunk</span>(), <span class="s">&quot;code&quot;</span>);
  }
<span class="a">#endif</span>
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>endCompiler</em>()</div>

<p>We do this only if the code was free of errors. After a syntax error, the
compiler keeps on going but it&rsquo;s in kind of a weird state and might produce
broken code. That&rsquo;s harmless because it won&rsquo;t get executed, but we&rsquo;ll just
confuse ourselves if we try to read it.</p>
<p>Finally, to access <code>disassembleChunk()</code>, we need to include its header.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;scanner.h&quot;
</pre><div class="source-file"><em>compiler.c</em></div>
<pre class="insert">

<span class="a">#ifdef DEBUG_PRINT_CODE</span>
<span class="a">#include &quot;debug.h&quot;</span>
<span class="a">#endif</span>
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em></div>

<p>We made it! This was the last major section to install in our VM&rsquo;s compilation
and execution pipeline. Our interpreter doesn&rsquo;t <em>look</em> like much, but inside it
is scanning, parsing, compiling to bytecode, and executing.</p>
<p>Fire up the VM and type in an expression. If we did everything right, it should
calculate and print the result. We now have a very over-engineered arithmetic
calculator. We have a lot of language features to add in the coming chapters,
but the foundation is in place.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>To really understand the parser, you need to see how execution threads
through the interesting parsing functions<span class="em">&mdash;</span><code>parsePrecedence()</code> and the
parser functions stored in the table. Take this (strange) expression:</p>
<div class="codehilite"><pre>(-<span class="n">1</span> + <span class="n">2</span>) * <span class="n">3</span> - -<span class="n">4</span>
</pre></div>
<p>Write a trace of how those functions are called. Show the order they are
called, which calls which, and the arguments passed to them.</p>
</li>
<li>
<p>The ParseRule row for <code>TOKEN_MINUS</code> has both prefix and infix function
pointers. That&rsquo;s because <code>-</code> is both a prefix operator (unary negation) and
an infix one (subtraction).</p>
<p>In the full Lox language, what other tokens can be used in both prefix and
infix positions? What about in C or in another language of your choice?</p>
</li>
<li>
<p>You might be wondering about complex &ldquo;mixfix&rdquo; expressions that have more
than two operands separated by tokens. C&rsquo;s conditional or &ldquo;ternary&rdquo;
operator, <code>?:</code>, is a widely known one.</p>
<p>Add support for that operator to the compiler. You don&rsquo;t have to generate
any bytecode, just show how you would hook it up to the parser and handle
the operands.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: It&rsquo;s Just Parsing</a></h2>
<p>I&rsquo;m going to make a claim here that will be unpopular with some compiler and
language people. It&rsquo;s OK if you don&rsquo;t agree. Personally, I learn more from
strongly stated opinions that I disagree with than I do from several pages of
qualifiers and equivocation. My claim is that <em>parsing doesn&rsquo;t matter</em>.</p>
<p>Over the years, many programming language people, especially in academia, have
gotten <em>really</em> into parsers and taken them very seriously. Initially, it was
the compiler folks who got into <span name="yacc">compiler-compilers</span>,
LALR, and other stuff like that. The first half of the dragon book is a long
love letter to the wonders of parser generators.</p>
<aside name="yacc">
<p>All of us suffer from the vice of &ldquo;when all you have is a hammer, everything
looks like a nail&rdquo;, but perhaps none so visibly as compiler people. You wouldn&rsquo;t
believe the breadth of software problems that miraculously seem to require a new
little language in their solution as soon as you ask a compiler hacker for help.</p>
<p>Yacc and other compiler-compilers are the most delightfully recursive example.
&ldquo;Wow, writing compilers is a chore. I know, let&rsquo;s write a compiler to write our
compiler for us.&rdquo;</p>
<p>For the record, I don&rsquo;t claim immunity to this affliction.</p>
</aside>
<p>Later, the functional programming folks got into parser combinators, packrat
parsers, and other sorts of things. Because, obviously, if you give a functional
programmer a problem, the first thing they&rsquo;ll do is whip out a pocketful of
higher-order functions.</p>
<p>Over in math and algorithm analysis land, there is a long legacy of research
into proving time and memory usage for various parsing techniques, transforming
parsing problems into other problems and back, and assigning complexity classes
to different grammars.</p>
<p>At one level, this stuff is important. If you&rsquo;re implementing a language, you
want some assurance that your parser won&rsquo;t go exponential and take 7,000 years
to parse a weird edge case in the grammar. Parser theory gives you that bound.
As an intellectual exercise, learning about parsing techniques is also fun and
rewarding.</p>
<p>But if your goal is just to implement a language and get it in front of users,
almost all of that stuff doesn&rsquo;t matter. It&rsquo;s really easy to get worked up by
the enthusiasm of the people who <em>are</em> into it and think that your front end
<em>needs</em> some whiz-bang generated combinator-parser-factory thing. I&rsquo;ve seen
people burn tons of time writing and rewriting their parser using whatever
today&rsquo;s hot library or technique is.</p>
<p>That&rsquo;s time that doesn&rsquo;t add any value to your user&rsquo;s life. If you&rsquo;re just
trying to get your parser done, pick one of the bog-standard techniques, use it,
and move on. Recursive descent, Pratt parsing, and the popular parser generators
like ANTLR or Bison are all fine.</p>
<p>Take the extra time you saved not rewriting your parsing code and spend it
improving the compile error messages your compiler shows users. Good error
handling and reporting is more valuable to users than almost anything else you
can put time into in the front end.</p>
</div>

<footer>
<a href="types-of-values.html" class="next">
  Next Chapter: &ldquo;Types of Values&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/contents.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Table of Contents &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->

<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
    <h2><a href="#top"><small>&nbsp;</small> Table of Contents</a></h2>
    <ul>
      <li><a href="#welcome"><small>I</small>Welcome</a></li>
      <li><a href="#a-tree-walk-interpreter"><small>II</small>A Tree-Walk Interpreter</a></li>
      <li><a href="#a-bytecode-virtual-machine"><small>III</small>A Bytecode Virtual Machine</a></li>
      <li><a href="#backmatter"><small>&#10087;</small>Backmatter</a></li>
    </ul>
        <div class="prev-next">
        <a href="acknowledgements.html" title="Acknowledgements" class="left">&larr;&nbsp;Previous</a>
        <a href="index.html" title="Crafting Interpreters">&uarr;&nbsp;Up</a>
        <a href="welcome.html" title="Welcome" class="right">Next&nbsp;&rarr;</a>
    </div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="acknowledgements.html" title="Acknowledgements" class="prev">←</a>
<a href="welcome.html" title="Welcome" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
    <h2><a href="#top"><small>&nbsp;</small> Table of Contents</a></h2>
    <ul>
      <li><a href="#welcome"><small>I</small>Welcome</a></li>
      <li><a href="#a-tree-walk-interpreter"><small>II</small>A Tree-Walk Interpreter</a></li>
      <li><a href="#a-bytecode-virtual-machine"><small>III</small>A Bytecode Virtual Machine</a></li>
      <li><a href="#backmatter"><small>&#10087;</small>Backmatter</a></li>
    </ul>
        <div class="prev-next">
        <a href="acknowledgements.html" title="Acknowledgements" class="left">&larr;&nbsp;Previous</a>
        <a href="index.html" title="Crafting Interpreters">&uarr;&nbsp;Up</a>
        <a href="welcome.html" title="Welcome" class="right">Next&nbsp;&rarr;</a>
    </div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="contents">

<h1 class="part">Table of Contents</h1>

<div class="chapters">
  <div class="row">
    <div class="first">
    <h2><span class="num">&#10087;</span>Frontmatter</h2>
    <ul>
      <li><span class="num">&nbsp;</span><a href="dedication.html">Dedication</a></li>
      <li><span class="num">&nbsp;</span><a href="acknowledgements.html">Acknowledgements</a></li>
    </ul>

      <h2><span class="num">I.</span><a href="welcome.html" name="welcome">Welcome</a></h2>
      <ul>
        <li><span class="num">1.</span><a href="introduction.html">Introduction</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="introduction.html#design-note">Design Note: What&rsquo;s in a Name?</a>
        </li>
        <li><span class="num">2.</span><a href="a-map-of-the-territory.html">A Map of the Territory</a>
        </li>
        <li><span class="num">3.</span><a href="the-lox-language.html">The Lox Language</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="the-lox-language.html#design-note">Design Note: Expressions and Statements</a>
        </li>
      </ul>      <h2><span class="num">II.</span><a href="a-tree-walk-interpreter.html" name="a-tree-walk-interpreter">A Tree-Walk Interpreter</a></h2>
      <ul>
        <li><span class="num">4.</span><a href="scanning.html">Scanning</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="scanning.html#design-note">Design Note: Implicit Semicolons</a>
        </li>
        <li><span class="num">5.</span><a href="representing-code.html">Representing Code</a>
        </li>
        <li><span class="num">6.</span><a href="parsing-expressions.html">Parsing Expressions</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="parsing-expressions.html#design-note">Design Note: Logic Versus History</a>
        </li>
        <li><span class="num">7.</span><a href="evaluating-expressions.html">Evaluating Expressions</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="evaluating-expressions.html#design-note">Design Note: Static and Dynamic Typing</a>
        </li>
        <li><span class="num">8.</span><a href="statements-and-state.html">Statements and State</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="statements-and-state.html#design-note">Design Note: Implicit Variable Declaration</a>
        </li>
        <li><span class="num">9.</span><a href="control-flow.html">Control Flow</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="control-flow.html#design-note">Design Note: Spoonfuls of Syntactic Sugar</a>
        </li>
        <li><span class="num">10.</span><a href="functions.html">Functions</a>
        </li>
        <li><span class="num">11.</span><a href="resolving-and-binding.html">Resolving and Binding</a>
        </li>
        <li><span class="num">12.</span><a href="classes.html">Classes</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="classes.html#design-note">Design Note: Prototypes and Power</a>
        </li>
        <li><span class="num">13.</span><a href="inheritance.html">Inheritance</a>
        </li>
      </ul>    </div>
    <div class="second">
      <h2><span class="num">III.</span><a href="a-bytecode-virtual-machine.html" name="a-bytecode-virtual-machine">A Bytecode Virtual Machine</a></h2>
      <ul>
        <li><span class="num">14.</span><a href="chunks-of-bytecode.html">Chunks of Bytecode</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="chunks-of-bytecode.html#design-note">Design Note: Test Your Language</a>
        </li>
        <li><span class="num">15.</span><a href="a-virtual-machine.html">A Virtual Machine</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="a-virtual-machine.html#design-note">Design Note: Register-Based Bytecode</a>
        </li>
        <li><span class="num">16.</span><a href="scanning-on-demand.html">Scanning on Demand</a>
        </li>
        <li><span class="num">17.</span><a href="compiling-expressions.html">Compiling Expressions</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="compiling-expressions.html#design-note">Design Note: It&rsquo;s Just Parsing</a>
        </li>
        <li><span class="num">18.</span><a href="types-of-values.html">Types of Values</a>
        </li>
        <li><span class="num">19.</span><a href="strings.html">Strings</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="strings.html#design-note">Design Note: String Encoding</a>
        </li>
        <li><span class="num">20.</span><a href="hash-tables.html">Hash Tables</a>
        </li>
        <li><span class="num">21.</span><a href="global-variables.html">Global Variables</a>
        </li>
        <li><span class="num">22.</span><a href="local-variables.html">Local Variables</a>
        </li>
        <li><span class="num">23.</span><a href="jumping-back-and-forth.html">Jumping Back and Forth</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="jumping-back-and-forth.html#design-note">Design Note: Considering Goto Harmful</a>
        </li>
        <li><span class="num">24.</span><a href="calls-and-functions.html">Calls and Functions</a>
        </li>
        <li><span class="num">25.</span><a href="closures.html">Closures</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="closures.html#design-note">Design Note: Closing Over the Loop Variable</a>
        </li>
        <li><span class="num">26.</span><a href="garbage-collection.html">Garbage Collection</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="garbage-collection.html#design-note">Design Note: Generational Collectors</a>
        </li>
        <li><span class="num">27.</span><a href="classes-and-instances.html">Classes and Instances</a>
        </li>
        <li><span class="num">28.</span><a href="methods-and-initializers.html">Methods and Initializers</a>
        </li>
        <li class="design-note">
        <span class="num">&nbsp;</span><a href="methods-and-initializers.html#design-note">Design Note: Novelty Budget</a>
        </li>
        <li><span class="num">29.</span><a href="superclasses.html">Superclasses</a>
        </li>
        <li><span class="num">30.</span><a href="optimization.html">Optimization</a>
        </li>
      </ul>
    <h2><span class="num">&#10087;</span><a href="backmatter.html" name="backmatter">Backmatter</a></h2>
    <ul>
      <li><span class="num">A1.</span><a href="appendix-i.html">Appendix I: Lox Grammar</a></li>
      <li><span class="num">A2.</span><a href="appendix-ii.html">Appendix II: Generated Syntax Tree Classes</a></li>
    </ul>
    </div>
  </div>
</div>

<footer>
  <a href="welcome.html" class="next">
    First Part: &ldquo;Welcome&rdquo; &rarr;
  </a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2020</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/control-flow.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Control Flow &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Control Flow<small>9</small></a></h3>

<ul>
    <li><a href="#turing-machines-briefly"><small>9.1</small> Turing Machines (Briefly)</a></li>
    <li><a href="#conditional-execution"><small>9.2</small> Conditional Execution</a></li>
    <li><a href="#logical-operators"><small>9.3</small> Logical Operators</a></li>
    <li><a href="#while-loops"><small>9.4</small> While Loops</a></li>
    <li><a href="#for-loops"><small>9.5</small> For Loops</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Spoonfuls of Syntactic Sugar</a></li>
</ul>


<div class="prev-next">
    <a href="statements-and-state.html" title="Statements and State" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="functions.html" title="Functions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="statements-and-state.html" title="Statements and State" class="prev">←</a>
<a href="functions.html" title="Functions" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Control Flow<small>9</small></a></h3>

<ul>
    <li><a href="#turing-machines-briefly"><small>9.1</small> Turing Machines (Briefly)</a></li>
    <li><a href="#conditional-execution"><small>9.2</small> Conditional Execution</a></li>
    <li><a href="#logical-operators"><small>9.3</small> Logical Operators</a></li>
    <li><a href="#while-loops"><small>9.4</small> While Loops</a></li>
    <li><a href="#for-loops"><small>9.5</small> For Loops</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Spoonfuls of Syntactic Sugar</a></li>
</ul>


<div class="prev-next">
    <a href="statements-and-state.html" title="Statements and State" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="functions.html" title="Functions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">9</div>
  <h1>Control Flow</h1>

<blockquote>
<p>Logic, like whiskey, loses its beneficial effect when taken in too large
quantities.</p>
<p><cite>Edward John Moreton Drax Plunkett, Lord Dunsany</cite></p>
</blockquote>
<p>Compared to <a href="statements-and-state.html">last chapter&rsquo;s</a> grueling marathon, today is a
lighthearted frolic through a daisy meadow. But while the work is easy, the
reward is surprisingly large.</p>
<p>Right now, our interpreter is little more than a calculator. A Lox program can
only do a fixed amount of work before completing. To make it run twice as long
you have to make the source code twice as lengthy. We&rsquo;re about to fix that. In
this chapter, our interpreter takes a big step towards the programming
language major leagues: <em>Turing-completeness</em>.</p>
<h2><a href="#turing-machines-briefly" id="turing-machines-briefly"><small>9&#8202;.&#8202;1</small>Turing Machines (Briefly)</a></h2>
<p>In the early part of last century, mathematicians stumbled into a series of
confusing <span name="paradox">paradoxes</span> that led them to doubt the
stability of the foundation they had built their work upon. To address that
<a href="https://en.wikipedia.org/wiki/Foundations_of_mathematics#Foundational_crisis">crisis</a>, they went back to square one. Starting from a handful of axioms,
logic, and set theory, they hoped to rebuild mathematics on top of an
impervious foundation.</p>
<aside name="paradox">
<p>The most famous is <a href="https://en.wikipedia.org/wiki/Russell%27s_paradox"><strong>Russell&rsquo;s paradox</strong></a>. Initially, set theory
allowed you to define any sort of set. If you could describe it in English, it
was valid. Naturally, given mathematicians&rsquo; predilection for self-reference,
sets can contain other sets. So Russell, rascal that he was, came up with:</p>
<p><em>R is the set of all sets that do not contain themselves.</em></p>
<p>Does R contain itself? If it doesn&rsquo;t, then according to the second half of the
definition it should. But if it does, then it no longer meets the definition.
Cue mind exploding.</p>
</aside>
<p>They wanted to rigorously answer questions like, &ldquo;Can all true statements be
proven?&rdquo;, &ldquo;Can we <a href="https://en.wikipedia.org/wiki/Computable_function">compute</a> all functions that we can define?&rdquo;, or even the
more general question, &ldquo;What do we mean when we claim a function is
&lsquo;computable&rsquo;?&rdquo;</p>
<p>They presumed the answer to the first two questions would be &ldquo;yes&rdquo;. All that
remained was to prove it. It turns out that the answer to both is &ldquo;no&rdquo;, and
astonishingly, the two questions are deeply intertwined. This is a fascinating
corner of mathematics that touches fundamental questions about what brains are
able to do and how the universe works. I can&rsquo;t do it justice here.</p>
<p>What I do want to note is that in the process of proving that the answer to the
first two questions is &ldquo;no&rdquo;, Alan Turing and Alonzo Church devised a precise
answer to the last question<span class="em">&mdash;</span>a definition of what kinds of functions are <span
name="uncomputable">computable</span>. They each crafted a tiny system with a
minimum set of machinery that is still powerful enough to compute any of a
(very) large class of functions.</p>
<aside name="uncomputable">
<p>They proved the answer to the first question is &ldquo;no&rdquo; by showing that the
function that returns the truth value of a given statement is <em>not</em> a computable
one.</p>
</aside>
<p>These are now considered the &ldquo;computable functions&rdquo;. Turing&rsquo;s system is called a
<span name="turing"><strong>Turing machine</strong></span>. Church&rsquo;s is the <strong>lambda
calculus</strong>. Both are still widely used as the basis for models of computation
and, in fact, many modern functional programming languages use the lambda
calculus at their core.</p>
<aside name="turing">
<p>Turing called his inventions &ldquo;a-machines&rdquo; for &ldquo;automatic&rdquo;. He wasn&rsquo;t so
self-aggrandizing as to put his <em>own</em> name on them. Later mathematicians did
that for him. That&rsquo;s how you get famous while still retaining some modesty.</p>
</aside><img src="image/control-flow/turing-machine.png" alt="A Turing machine." />
<p>Turing machines have better name recognition<span class="em">&mdash;</span>there&rsquo;s no Hollywood film about
Alonzo Church yet<span class="em">&mdash;</span>but the two formalisms are <a href="https://en.wikipedia.org/wiki/Church%E2%80%93Turing_thesis">equivalent in power</a>.
In fact, any programming language with some minimal level of expressiveness is
powerful enough to compute <em>any</em> computable function.</p>
<p>You can prove that by writing a simulator for a Turing machine in your language.
Since Turing proved his machine can compute any computable function, by
extension, that means your language can too. All you need to do is translate the
function into a Turing machine, and then run that on your simulator.</p>
<p>If your language is expressive enough to do that, it&rsquo;s considered
<strong>Turing-complete</strong>. Turing machines are pretty dang simple, so it doesn&rsquo;t take
much power to do this. You basically need arithmetic, a little control flow,
and the ability to allocate and use (theoretically) arbitrary amounts of memory.
We&rsquo;ve got the first. By the end of this chapter, we&rsquo;ll have the <span
name="memory">second</span>.</p>
<aside name="memory">
<p>We <em>almost</em> have the third too. You can create and concatenate strings of
arbitrary size, so you can <em>store</em> unbounded memory. But we don&rsquo;t have any way
to access parts of a string.</p>
</aside>
<h2><a href="#conditional-execution" id="conditional-execution"><small>9&#8202;.&#8202;2</small>Conditional Execution</a></h2>
<p>Enough history, let&rsquo;s jazz up our language. We can divide control flow roughly
into two kinds:</p>
<ul>
<li>
<p><strong>Conditional</strong> or <strong>branching control flow</strong> is used to <em>not</em> execute
some piece of code. Imperatively, you can think of it as jumping <em>ahead</em>
over a region of code.</p>
</li>
<li>
<p><strong>Looping control flow</strong> executes a chunk of code more than once. It jumps
<em>back</em> so that you can do something again. Since you don&rsquo;t usually want
<em>infinite</em> loops, it typically has some conditional logic to know when to
stop looping as well.</p>
</li>
</ul>
<p>Branching is simpler, so we&rsquo;ll start there. C-derived languages have two main
conditional execution features, the <code>if</code> statement and the perspicaciously named
&ldquo;conditional&rdquo; <span name="ternary">operator</span> (<code>?:</code>). An <code>if</code> statement
lets you conditionally execute statements and the conditional operator lets you
conditionally execute expressions.</p>
<aside name="ternary">
<p>The conditional operator is also called the &ldquo;ternary&rdquo; operator because it&rsquo;s the
only operator in C that takes three operands.</p>
</aside>
<p>For simplicity&rsquo;s sake, Lox doesn&rsquo;t have a conditional operator, so let&rsquo;s get our
<code>if</code> statement on. Our statement grammar gets a new production.</p>
<p><span name="semicolon"></span></p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">ifStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">block</span> ;

<span class="i">ifStmt</span>         → <span class="s">&quot;if&quot;</span> <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> <span class="i">statement</span>
               ( <span class="s">&quot;else&quot;</span> <span class="i">statement</span> )? ;
</pre></div>
<aside name="semicolon">
<p>The semicolons in the rules aren&rsquo;t quoted, which means they are part of the
grammar metasyntax, not Lox&rsquo;s syntax. A block does not have a <code>;</code> at the end and
an <code>if</code> statement doesn&rsquo;t either, unless the then or else statement happens to
be one that ends in a semicolon.</p>
</aside>
<p>An <code>if</code> statement has an expression for the condition, then a statement to execute
if the condition is truthy. Optionally, it may also have an <code>else</code> keyword and a
statement to execute if the condition is falsey. The <span name="if-ast">syntax
tree node</span> has fields for each of those three pieces.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Expression : Expr expression&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;If         : Expr condition, Stmt thenBranch,&quot;</span> +
                  <span class="s">&quot; Stmt elseBranch&quot;</span>,
</pre><pre class="insert-after">      &quot;Print      : Expr expression&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="if-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#if-statement">Appendix II</a>.</p>
</aside>
<p>Like other statements, the parser recognizes an <code>if</code> statement by the leading
<code>if</code> keyword.</p>
<div class="codehilite"><pre class="insert-before">  private Stmt statement() {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>statement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">match</span>(<span class="i">IF</span>)) <span class="k">return</span> <span class="i">ifStatement</span>();
</pre><pre class="insert-after">    if (match(PRINT)) return printStatement();
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>statement</em>()</div>

<p>When it finds one, it calls this new method to parse the rest:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>statement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">ifStatement</span>() {
    <span class="i">consume</span>(<span class="i">LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after &#39;if&#39;.&quot;</span>);
    <span class="t">Expr</span> <span class="i">condition</span> = <span class="i">expression</span>();
    <span class="i">consume</span>(<span class="i">RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after if condition.&quot;</span>);<span name="parens"> </span>

    <span class="t">Stmt</span> <span class="i">thenBranch</span> = <span class="i">statement</span>();
    <span class="t">Stmt</span> <span class="i">elseBranch</span> = <span class="k">null</span>;
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">ELSE</span>)) {
      <span class="i">elseBranch</span> = <span class="i">statement</span>();
    }

    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">If</span>(<span class="i">condition</span>, <span class="i">thenBranch</span>, <span class="i">elseBranch</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>statement</em>()</div>

<aside name="parens">
<p>The parentheses around the condition are only half useful. You need some kind of
delimiter <em>between</em> the condition and the then statement, otherwise the parser
can&rsquo;t tell when it has reached the end of the condition expression. But the
<em>opening</em> parenthesis after <code>if</code> doesn&rsquo;t do anything useful. Dennis Ritchie put
it there so he could use <code>)</code> as the ending delimiter without having unbalanced
parentheses.</p>
<p>Other languages like Lua and some BASICs use a keyword like <code>then</code> as the ending
delimiter and don&rsquo;t have anything before the condition. Go and Swift instead
require the statement to be a braced block. That lets them use the <code>{</code> at the
beginning of the statement to tell when the condition is done.</p>
</aside>
<p>As usual, the parsing code hews closely to the grammar. It detects an else
clause by looking for the preceding <code>else</code> keyword. If there isn&rsquo;t one, the
<code>elseBranch</code> field in the syntax tree is <code>null</code>.</p>
<p>That seemingly innocuous optional else has, in fact, opened up an ambiguity in
our grammar. Consider:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">first</span>) <span class="k">if</span> (<span class="i">second</span>) <span class="i">whenTrue</span>(); <span class="k">else</span> <span class="i">whenFalse</span>();
</pre></div>
<p>Here&rsquo;s the riddle: Which <code>if</code> statement does that else clause belong to? This
isn&rsquo;t just a theoretical question about how we notate our grammar. It actually
affects how the code executes:</p>
<ul>
<li>
<p>If we attach the else to the first <code>if</code> statement, then <code>whenFalse()</code> is
called if <code>first</code> is falsey, regardless of what value <code>second</code> has.</p>
</li>
<li>
<p>If we attach it to the second <code>if</code> statement, then <code>whenFalse()</code> is only
called if <code>first</code> is truthy and <code>second</code> is falsey.</p>
</li>
</ul>
<p>Since else clauses are optional, and there is no explicit delimiter marking the
end of the <code>if</code> statement, the grammar is ambiguous when you nest <code>if</code>s in this
way. This classic pitfall of syntax is called the <strong><a href="https://en.wikipedia.org/wiki/Dangling_else">dangling else</a></strong> problem.</p>
<p><span name="else"></span></p><img class="above" src="image/control-flow/dangling-else.png" alt="Two ways the else can be interpreted." />
<aside name="else">
<p>Here, formatting highlights the two ways the else could be parsed. But note that
since whitespace characters are ignored by the parser, this is only a guide to
the human reader.</p>
</aside>
<p>It <em>is</em> possible to define a context-free grammar that avoids the ambiguity
directly, but it requires splitting most of the statement rules into pairs, one
that allows an <code>if</code> with an <code>else</code> and one that doesn&rsquo;t. It&rsquo;s annoying.</p>
<p>Instead, most languages and parsers avoid the problem in an ad hoc way. No
matter what hack they use to get themselves out of the trouble, they always
choose the same interpretation<span class="em">&mdash;</span>the <code>else</code> is bound to the nearest <code>if</code> that
precedes it.</p>
<p>Our parser conveniently does that already. Since <code>ifStatement()</code> eagerly looks
for an <code>else</code> before returning, the innermost call to a nested series will claim
the else clause for itself before returning to the outer <code>if</code> statements.</p>
<p>Syntax in hand, we are ready to interpret.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitExpressionStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitIfStmt</span>(<span class="t">Stmt</span>.<span class="t">If</span> <span class="i">stmt</span>) {
    <span class="k">if</span> (<span class="i">isTruthy</span>(<span class="i">evaluate</span>(<span class="i">stmt</span>.<span class="i">condition</span>))) {
      <span class="i">execute</span>(<span class="i">stmt</span>.<span class="i">thenBranch</span>);
    } <span class="k">else</span> <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">elseBranch</span> != <span class="k">null</span>) {
      <span class="i">execute</span>(<span class="i">stmt</span>.<span class="i">elseBranch</span>);
    }
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitExpressionStmt</em>()</div>

<p>The interpreter implementation is a thin wrapper around the self-same Java code.
It evaluates the condition. If truthy, it executes the then branch. Otherwise,
if there is an else branch, it executes that.</p>
<p>If you compare this code to how the interpreter handles other syntax we&rsquo;ve
implemented, the part that makes control flow special is that Java <code>if</code>
statement. Most other syntax trees always evaluate their subtrees. Here, we may
not evaluate the then or else statement. If either of those has a side effect,
the choice not to evaluate it becomes user visible.</p>
<h2><a href="#logical-operators" id="logical-operators"><small>9&#8202;.&#8202;3</small>Logical Operators</a></h2>
<p>Since we don&rsquo;t have the conditional operator, you might think we&rsquo;re done with
branching, but no. Even without the ternary operator, there are two other
operators that are technically control flow constructs<span class="em">&mdash;</span>the logical operators
<code>and</code> and <code>or</code>.</p>
<p>These aren&rsquo;t like other binary operators because they <strong>short-circuit</strong>. If,
after evaluating the left operand, we know what the result of the logical
expression must be, we don&rsquo;t evaluate the right operand. For example:</p>
<div class="codehilite"><pre><span class="k">false</span> <span class="k">and</span> <span class="i">sideEffect</span>();
</pre></div>
<p>For an <code>and</code> expression to evaluate to something truthy, both operands must be
truthy. We can see as soon as we evaluate the left <code>false</code> operand that that
isn&rsquo;t going to be the case, so there&rsquo;s no need to evaluate <code>sideEffect()</code> and it
gets skipped.</p>
<p>This is why we didn&rsquo;t implement the logical operators with the other binary
operators. Now we&rsquo;re ready. The two new operators are low in the precedence
table. Similar to <code>||</code> and <code>&amp;&amp;</code> in C, they each have their <span
name="logical">own</span> precedence with <code>or</code> lower than <code>and</code>. We slot them
right between <code>assignment</code> and <code>equality</code>.</p>
<aside name="logical">
<p>I&rsquo;ve always wondered why they don&rsquo;t have the same precedence, like the various
comparison or equality operators do.</p>
</aside>
<div class="codehilite"><pre><span class="i">expression</span>     → <span class="i">assignment</span> ;
<span class="i">assignment</span>     → <span class="t">IDENTIFIER</span> <span class="s">&quot;=&quot;</span> <span class="i">assignment</span>
               | <span class="i">logic_or</span> ;
<span class="i">logic_or</span>       → <span class="i">logic_and</span> ( <span class="s">&quot;or&quot;</span> <span class="i">logic_and</span> )* ;
<span class="i">logic_and</span>      → <span class="i">equality</span> ( <span class="s">&quot;and&quot;</span> <span class="i">equality</span> )* ;
</pre></div>
<p>Instead of falling back to <code>equality</code>, <code>assignment</code> now cascades to <code>logic_or</code>.
The two new rules, <code>logic_or</code> and <code>logic_and</code>, are <span
name="same">similar</span> to other binary operators. Then <code>logic_and</code> calls
out to <code>equality</code> for its operands, and we chain back to the rest of the
expression rules.</p>
<aside name="same">
<p>The <em>syntax</em> doesn&rsquo;t care that they short-circuit. That&rsquo;s a semantic concern.</p>
</aside>
<p>We could reuse the existing Expr.Binary class for these two new expressions
since they have the same fields. But then <code>visitBinaryExpr()</code> would have to
check to see if the operator is one of the logical operators and use a different
code path to handle the short circuiting. I think it&rsquo;s cleaner to define a <span
name="logical-ast">new class</span> for these operators so that they get their
own visit method.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Literal  : Object value&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Logical  : Expr left, Token operator, Expr right&quot;</span>,
</pre><pre class="insert-after">      &quot;Unary    : Token operator, Expr right&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="logical-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#logical-expression">Appendix II</a>.</p>
</aside>
<p>To weave the new expressions into the parser, we first change the parsing code
for assignment to call <code>or()</code>.</p>
<div class="codehilite"><pre class="insert-before">  private Expr assignment() {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>assignment</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">or</span>();
</pre><pre class="insert-after">

    if (match(EQUAL)) {
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>assignment</em>(), replace 1 line</div>

<p>The code to parse a series of <code>or</code> expressions mirrors other binary operators.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>assignment</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">or</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">and</span>();

    <span class="k">while</span> (<span class="i">match</span>(<span class="i">OR</span>)) {
      <span class="t">Token</span> <span class="i">operator</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">right</span> = <span class="i">and</span>();
      <span class="i">expr</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Logical</span>(<span class="i">expr</span>, <span class="i">operator</span>, <span class="i">right</span>);
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>assignment</em>()</div>

<p>Its operands are the next higher level of precedence, the new <code>and</code> expression.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>or</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">and</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">equality</span>();

    <span class="k">while</span> (<span class="i">match</span>(<span class="i">AND</span>)) {
      <span class="t">Token</span> <span class="i">operator</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">right</span> = <span class="i">equality</span>();
      <span class="i">expr</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Logical</span>(<span class="i">expr</span>, <span class="i">operator</span>, <span class="i">right</span>);
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>or</em>()</div>

<p>That calls <code>equality()</code> for its operands, and with that, the expression parser
is all tied back together again. We&rsquo;re ready to interpret.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitLiteralExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitLogicalExpr</span>(<span class="t">Expr</span>.<span class="t">Logical</span> <span class="i">expr</span>) {
    <span class="t">Object</span> <span class="i">left</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">left</span>);

    <span class="k">if</span> (<span class="i">expr</span>.<span class="i">operator</span>.<span class="i">type</span> == <span class="t">TokenType</span>.<span class="i">OR</span>) {
      <span class="k">if</span> (<span class="i">isTruthy</span>(<span class="i">left</span>)) <span class="k">return</span> <span class="i">left</span>;
    } <span class="k">else</span> {
      <span class="k">if</span> (!<span class="i">isTruthy</span>(<span class="i">left</span>)) <span class="k">return</span> <span class="i">left</span>;
    }

    <span class="k">return</span> <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">right</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitLiteralExpr</em>()</div>

<p>If you compare this to the <a href="evaluating-expressions.html">earlier chapter&rsquo;s</a> <code>visitBinaryExpr()</code>
method, you can see the difference. Here, we evaluate the left operand first. We
look at its value to see if we can short-circuit. If not, and only then, do we
evaluate the right operand.</p>
<p>The other interesting piece here is deciding what actual value to return. Since
Lox is dynamically typed, we allow operands of any type and use truthiness to
determine what each operand represents. We apply similar reasoning to the
result. Instead of promising to literally return <code>true</code> or <code>false</code>, a logic
operator merely guarantees it will return a value with appropriate truthiness.</p>
<p>Fortunately, we have values with proper truthiness right at hand<span class="em">&mdash;</span>the results
of the operands themselves. So we use those. For example:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="s">&quot;hi&quot;</span> <span class="k">or</span> <span class="n">2</span>; <span class="c">// &quot;hi&quot;.</span>
<span class="k">print</span> <span class="k">nil</span> <span class="k">or</span> <span class="s">&quot;yes&quot;</span>; <span class="c">// &quot;yes&quot;.</span>
</pre></div>
<p>On the first line, <code>"hi"</code> is truthy, so the <code>or</code> short-circuits and returns
that. On the second line, <code>nil</code> is falsey, so it evaluates and returns the
second operand, <code>"yes"</code>.</p>
<p>That covers all of the branching primitives in Lox. We&rsquo;re ready to jump ahead to
loops. You see what I did there? <em>Jump. Ahead.</em> Get it? See, it&rsquo;s like a
reference to<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>oh, forget it.</p>
<h2><a href="#while-loops" id="while-loops"><small>9&#8202;.&#8202;4</small>While Loops</a></h2>
<p>Lox features two looping control flow statements, <code>while</code> and <code>for</code>. The <code>while</code>
loop is the simpler one, so we&rsquo;ll start there. Its grammar is the same as in C.</p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">ifStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">whileStmt</span>
               | <span class="i">block</span> ;

<span class="i">whileStmt</span>      → <span class="s">&quot;while&quot;</span> <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> <span class="i">statement</span> ;
</pre></div>
<p>We add another clause to the statement rule that points to the new rule for
while. It takes a <code>while</code> keyword, followed by a parenthesized condition
expression, then a statement for the body. That new grammar rule gets a <span
name="while-ast">syntax tree node</span>.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Print      : Expr expression&quot;,
</pre><pre class="insert-before">      <span class="s">&quot;Var        : Token name, Expr initializer&quot;</span><span class="insert-comma">,</span>
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()<br>
add <em>&ldquo;,&rdquo;</em> to previous line</div>
<pre class="insert">      <span class="s">&quot;While      : Expr condition, Stmt body&quot;</span>
</pre><pre class="insert-after">    ));
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>(), add <em>&ldquo;,&rdquo;</em> to previous line</div>

<aside name="while-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#while-statement">Appendix II</a>.</p>
</aside>
<p>The node stores the condition and body. Here you can see why it&rsquo;s nice to have
separate base classes for expressions and statements. The field declarations
make it clear that the condition is an expression and the body is a statement.</p>
<p>Over in the parser, we follow the same process we used for <code>if</code> statements.
First, we add another case in <code>statement()</code> to detect and match the leading
keyword.</p>
<div class="codehilite"><pre class="insert-before">    if (match(PRINT)) return printStatement();
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>statement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">match</span>(<span class="i">WHILE</span>)) <span class="k">return</span> <span class="i">whileStatement</span>();
</pre><pre class="insert-after">    if (match(LEFT_BRACE)) return new Stmt.Block(block());
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>statement</em>()</div>

<p>That delegates the real work to this method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>varDeclaration</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">whileStatement</span>() {
    <span class="i">consume</span>(<span class="i">LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after &#39;while&#39;.&quot;</span>);
    <span class="t">Expr</span> <span class="i">condition</span> = <span class="i">expression</span>();
    <span class="i">consume</span>(<span class="i">RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after condition.&quot;</span>);
    <span class="t">Stmt</span> <span class="i">body</span> = <span class="i">statement</span>();

    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">While</span>(<span class="i">condition</span>, <span class="i">body</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>varDeclaration</em>()</div>

<p>The grammar is dead simple and this is a straight translation of it to Java.
Speaking of translating straight to Java, here&rsquo;s how we execute the new syntax:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitVarStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitWhileStmt</span>(<span class="t">Stmt</span>.<span class="t">While</span> <span class="i">stmt</span>) {
    <span class="k">while</span> (<span class="i">isTruthy</span>(<span class="i">evaluate</span>(<span class="i">stmt</span>.<span class="i">condition</span>))) {
      <span class="i">execute</span>(<span class="i">stmt</span>.<span class="i">body</span>);
    }
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitVarStmt</em>()</div>

<p>Like the visit method for <code>if</code>, this visitor uses the corresponding Java
feature. This method isn&rsquo;t complex, but it makes Lox much more powerful. We can
finally write a program whose running time isn&rsquo;t strictly bound by the length of
the source code.</p>
<h2><a href="#for-loops" id="for-loops"><small>9&#8202;.&#8202;5</small>For Loops</a></h2>
<p>We&rsquo;re down to the last control flow construct, <span name="for">Ye Olde</span>
C-style <code>for</code> loop. I probably don&rsquo;t need to remind you, but it looks like this:</p>
<div class="codehilite"><pre><span class="k">for</span> (<span class="k">var</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="n">10</span>; <span class="i">i</span> = <span class="i">i</span> + <span class="n">1</span>) <span class="k">print</span> <span class="i">i</span>;
</pre></div>
<p>In grammarese, that&rsquo;s:</p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">forStmt</span>
               | <span class="i">ifStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">whileStmt</span>
               | <span class="i">block</span> ;

<span class="i">forStmt</span>        → <span class="s">&quot;for&quot;</span> <span class="s">&quot;(&quot;</span> ( <span class="i">varDecl</span> | <span class="i">exprStmt</span> | <span class="s">&quot;;&quot;</span> )
                 <span class="i">expression</span>? <span class="s">&quot;;&quot;</span>
                 <span class="i">expression</span>? <span class="s">&quot;)&quot;</span> <span class="i">statement</span> ;
</pre></div>
<aside name="for">
<p>Most modern languages have a higher-level looping statement for iterating over
arbitrary user-defined sequences. C# has <code>foreach</code>, Java has &ldquo;enhanced for&rdquo;,
even C++ has range-based <code>for</code> statements now. Those offer cleaner syntax than
C&rsquo;s <code>for</code> statement by implicitly calling into an iteration protocol that the
object being looped over supports.</p>
<p>I love those. For Lox, though, we&rsquo;re limited by building up the interpreter a
chapter at a time. We don&rsquo;t have objects and methods yet, so we have no way of
defining an iteration protocol that the <code>for</code> loop could use. So we&rsquo;ll stick
with the old school C <code>for</code> loop. Think of it as &ldquo;vintage&rdquo;. The fixie of control
flow statements.</p>
</aside>
<p>Inside the parentheses, you have three clauses separated by semicolons:</p>
<ol>
<li>
<p>The first clause is the <em>initializer</em>. It is executed exactly once, before
anything else. It&rsquo;s usually an expression, but for convenience, we also
allow a variable declaration. In that case, the variable is scoped to the
rest of the <code>for</code> loop<span class="em">&mdash;</span>the other two clauses and the body.</p>
</li>
<li>
<p>Next is the <em>condition</em>. As in a <code>while</code> loop, this expression controls when
to exit the loop. It&rsquo;s evaluated once at the beginning of each iteration,
including the first. If the result is truthy, it executes the loop body.
Otherwise, it bails.</p>
</li>
<li>
<p>The last clause is the <em>increment</em>. It&rsquo;s an arbitrary expression that does
some work at the end of each loop iteration. The result of the expression is
discarded, so it must have a side effect to be useful. In practice, it
usually increments a variable.</p>
</li>
</ol>
<p>Any of these clauses can be omitted. Following the closing parenthesis is a
statement for the body, which is typically a block.</p>
<h3><a href="#desugaring" id="desugaring"><small>9&#8202;.&#8202;5&#8202;.&#8202;1</small>Desugaring</a></h3>
<p>That&rsquo;s a lot of machinery, but note that none of it does anything you couldn&rsquo;t
do with the statements we already have. If <code>for</code> loops didn&rsquo;t support
initializer clauses, you could just put the initializer expression before the
<code>for</code> statement. Without an increment clause, you could simply put the increment
expression at the end of the body yourself.</p>
<p>In other words, Lox doesn&rsquo;t <em>need</em> <code>for</code> loops, they just make some common code
patterns more pleasant to write. These kinds of features are called <span
name="sugar"><strong>syntactic sugar</strong></span>. For example, the previous <code>for</code> loop
could be rewritten like so:</p>
<aside name="sugar">
<p>This delightful turn of phrase was coined by Peter J. Landin in 1964 to describe
how some of the nice expression forms supported by languages like ALGOL were a
sweetener sprinkled over the more fundamental<span class="em">&mdash;</span>but presumably less palatable<span class="em">&mdash;</span>lambda calculus underneath.</p><img class="above" src="image/control-flow/sugar.png" alt="Slightly more than a spoonful of sugar." />
</aside>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">i</span> = <span class="n">0</span>;
  <span class="k">while</span> (<span class="i">i</span> &lt; <span class="n">10</span>) {
    <span class="k">print</span> <span class="i">i</span>;
    <span class="i">i</span> = <span class="i">i</span> + <span class="n">1</span>;
  }
}
</pre></div>
<p>This script has the exact same semantics as the previous one, though it&rsquo;s not as
easy on the eyes. Syntactic sugar features like Lox&rsquo;s <code>for</code> loop make a language
more pleasant and productive to work in. But, especially in sophisticated
language implementations, every language feature that requires back-end support
and optimization is expensive.</p>
<p>We can have our cake and eat it too by <span
name="caramel"><strong>desugaring</strong></span>. That funny word describes a process where
the front end takes code using syntax sugar and translates it to a more
primitive form that the back end already knows how to execute.</p>
<aside name="caramel">
<p>Oh, how I wish the accepted term for this was &ldquo;caramelization&rdquo;. Why introduce a
metaphor if you aren&rsquo;t going to stick with it?</p>
</aside>
<p>We&rsquo;re going to desugar <code>for</code> loops to the <code>while</code> loops and other statements the
interpreter already handles. In our simple interpreter, desugaring really
doesn&rsquo;t save us much work, but it does give me an excuse to introduce you to the
technique. So, unlike the previous statements, we <em>won&rsquo;t</em> add a new syntax tree
node. Instead, we go straight to parsing. First, add an import we&rsquo;ll need soon.</p>
<div class="codehilite"><pre class="insert-before">import java.util.ArrayList;
</pre><div class="source-file"><em>lox/Parser.java</em></div>
<pre class="insert"><span class="k">import</span> <span class="i">java.util.Arrays</span>;
</pre><pre class="insert-after">import java.util.List;
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em></div>

<p>Like every statement, we start parsing a <code>for</code> loop by matching its keyword.</p>
<div class="codehilite"><pre class="insert-before">  private Stmt statement() {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>statement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">match</span>(<span class="i">FOR</span>)) <span class="k">return</span> <span class="i">forStatement</span>();
</pre><pre class="insert-after">    if (match(IF)) return ifStatement();
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>statement</em>()</div>

<p>Here is where it gets interesting. The desugaring is going to happen here, so
we&rsquo;ll build this method a piece at a time, starting with the opening parenthesis
before the clauses.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>statement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">forStatement</span>() {
    <span class="i">consume</span>(<span class="i">LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after &#39;for&#39;.&quot;</span>);

    <span class="c">// More here...</span>
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>statement</em>()</div>

<p>The first clause following that is the initializer.</p>
<div class="codehilite"><pre class="insert-before">    consume(LEFT_PAREN, &quot;Expect '(' after 'for'.&quot;);

</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>forStatement</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">Stmt</span> <span class="i">initializer</span>;
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">SEMICOLON</span>)) {
      <span class="i">initializer</span> = <span class="k">null</span>;
    } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="i">VAR</span>)) {
      <span class="i">initializer</span> = <span class="i">varDeclaration</span>();
    } <span class="k">else</span> {
      <span class="i">initializer</span> = <span class="i">expressionStatement</span>();
    }
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>forStatement</em>(), replace 1 line</div>

<p>If the token following the <code>(</code> is a semicolon then the initializer has been
omitted. Otherwise, we check for a <code>var</code> keyword to see if it&rsquo;s a <span
name="variable">variable</span> declaration. If neither of those matched, it
must be an expression. We parse that and wrap it in an expression statement so
that the initializer is always of type Stmt.</p>
<aside name="variable">
<p>In a previous chapter, I said we can split expression and statement syntax trees
into two separate class hierarchies because there&rsquo;s no single place in the
grammar that allows both an expression and a statement. That wasn&rsquo;t <em>entirely</em>
true, I guess.</p>
</aside>
<p>Next up is the condition.</p>
<div class="codehilite"><pre class="insert-before">      initializer = expressionStatement();
    }
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">

    <span class="t">Expr</span> <span class="i">condition</span> = <span class="k">null</span>;
    <span class="k">if</span> (!<span class="i">check</span>(<span class="i">SEMICOLON</span>)) {
      <span class="i">condition</span> = <span class="i">expression</span>();
    }
    <span class="i">consume</span>(<span class="i">SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after loop condition.&quot;</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>forStatement</em>()</div>

<p>Again, we look for a semicolon to see if the clause has been omitted. The last
clause is the increment.</p>
<div class="codehilite"><pre class="insert-before">    consume(SEMICOLON, &quot;Expect ';' after loop condition.&quot;);
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">

    <span class="t">Expr</span> <span class="i">increment</span> = <span class="k">null</span>;
    <span class="k">if</span> (!<span class="i">check</span>(<span class="i">RIGHT_PAREN</span>)) {
      <span class="i">increment</span> = <span class="i">expression</span>();
    }
    <span class="i">consume</span>(<span class="i">RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after for clauses.&quot;</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>forStatement</em>()</div>

<p>It&rsquo;s similar to the condition clause except this one is terminated by the
closing parenthesis. All that remains is the <span name="body">body</span>.</p>
<aside name="body">
<p>Is it just me or does that sound morbid? &ldquo;All that remained<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>was the <em>body</em>&rdquo;.</p>
</aside>
<div class="codehilite"><pre class="insert-before">    consume(RIGHT_PAREN, &quot;Expect ')' after for clauses.&quot;);
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">    <span class="t">Stmt</span> <span class="i">body</span> = <span class="i">statement</span>();

    <span class="k">return</span> <span class="i">body</span>;
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>forStatement</em>()</div>

<p>We&rsquo;ve parsed all of the various pieces of the <code>for</code> loop and the resulting AST
nodes are sitting in a handful of Java local variables. This is where the
desugaring comes in. We take those and use them to synthesize syntax tree nodes
that express the semantics of the <code>for</code> loop, like the hand-desugared example I
showed you earlier.</p>
<p>The code is a little simpler if we work backward, so we start with the increment
clause.</p>
<div class="codehilite"><pre class="insert-before">    Stmt body = statement();

</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">increment</span> != <span class="k">null</span>) {
      <span class="i">body</span> = <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Block</span>(
          <span class="t">Arrays</span>.<span class="i">asList</span>(
              <span class="i">body</span>,
              <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Expression</span>(<span class="i">increment</span>)));
    }

</pre><pre class="insert-after">    return body;
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>forStatement</em>()</div>

<p>The increment, if there is one, executes after the body in each iteration of the
loop. We do that by replacing the body with a little block that contains the
original body followed by an expression statement that evaluates the increment.</p>
<div class="codehilite"><pre class="insert-before">    }

</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">condition</span> == <span class="k">null</span>) <span class="i">condition</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Literal</span>(<span class="k">true</span>);
    <span class="i">body</span> = <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">While</span>(<span class="i">condition</span>, <span class="i">body</span>);

</pre><pre class="insert-after">    return body;
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>forStatement</em>()</div>

<p>Next, we take the condition and the body and build the loop using a primitive
<code>while</code> loop. If the condition is omitted, we jam in <code>true</code> to make an infinite
loop.</p>
<div class="codehilite"><pre class="insert-before">    body = new Stmt.While(condition, body);

</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">initializer</span> != <span class="k">null</span>) {
      <span class="i">body</span> = <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Block</span>(<span class="t">Arrays</span>.<span class="i">asList</span>(<span class="i">initializer</span>, <span class="i">body</span>));
    }

</pre><pre class="insert-after">    return body;
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>forStatement</em>()</div>

<p>Finally, if there is an initializer, it runs once before the entire loop. We do
that by, again, replacing the whole statement with a block that runs the
initializer and then executes the loop.</p>
<p>That&rsquo;s it. Our interpreter now supports C-style <code>for</code> loops and we didn&rsquo;t have
to touch the Interpreter class at all. Since we desugared to nodes the
interpreter already knows how to visit, there is no more work to do.</p>
<p>Finally, Lox is powerful enough to entertain us, at least for a few minutes.
Here&rsquo;s a tiny program to print the first 21 elements in the Fibonacci
sequence:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="n">0</span>;
<span class="k">var</span> <span class="i">temp</span>;

<span class="k">for</span> (<span class="k">var</span> <span class="i">b</span> = <span class="n">1</span>; <span class="i">a</span> &lt; <span class="n">10000</span>; <span class="i">b</span> = <span class="i">temp</span> + <span class="i">b</span>) {
  <span class="k">print</span> <span class="i">a</span>;
  <span class="i">temp</span> = <span class="i">a</span>;
  <span class="i">a</span> = <span class="i">b</span>;
}
</pre></div>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>A few chapters from now, when Lox supports first-class functions and dynamic
dispatch, we technically won&rsquo;t <em>need</em> branching statements built into the
language. Show how conditional execution can be implemented in terms of
those. Name a language that uses this technique for its control flow.</p>
</li>
<li>
<p>Likewise, looping can be implemented using those same tools, provided our
interpreter supports an important optimization. What is it, and why is it
necessary? Name a language that uses this technique for iteration.</p>
</li>
<li>
<p>Unlike Lox, most other C-style languages also support <code>break</code> and <code>continue</code>
statements inside loops. Add support for <code>break</code> statements.</p>
<p>The syntax is a <code>break</code> keyword followed by a semicolon. It should be a
syntax error to have a <code>break</code> statement appear outside of any enclosing
loop. At runtime, a <code>break</code> statement causes execution to jump to the end of
the nearest enclosing loop and proceeds from there. Note that the <code>break</code>
may be nested inside other blocks and <code>if</code> statements that also need to be
exited.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Spoonfuls of Syntactic Sugar</a></h2>
<p>When you design your own language, you choose how much syntactic sugar to pour
into the grammar. Do you make an unsweetened health food where each semantic
operation maps to a single syntactic unit, or some decadent dessert where every
bit of behavior can be expressed ten different ways? Successful languages
inhabit all points along this continuum.</p>
<p>On the extreme acrid end are those with ruthlessly minimal syntax like Lisp,
Forth, and Smalltalk. Lispers famously claim their language &ldquo;has no syntax&rdquo;,
while Smalltalkers proudly show that you can fit the entire grammar on an index
card. This tribe has the philosophy that the <em>language</em> doesn&rsquo;t need syntactic
sugar. Instead, the minimal syntax and semantics it provides are powerful enough
to let library code be as expressive as if it were part of the language itself.</p>
<p>Near these are languages like C, Lua, and Go. They aim for simplicity and
clarity over minimalism. Some, like Go, deliberately eschew both syntactic sugar
and the kind of syntactic extensibility of the previous category. They want the
syntax to get out of the way of the semantics, so they focus on keeping both the
grammar and libraries simple. Code should be obvious more than beautiful.</p>
<p>Somewhere in the middle you have languages like Java, C#, and Python. Eventually
you reach Ruby, C++, Perl, and D<span class="em">&mdash;</span>languages which have stuffed so much syntax
into their grammar, they are running out of punctuation characters on the
keyboard.</p>
<p>To some degree, location on the spectrum correlates with age. It&rsquo;s relatively
easy to add bits of syntactic sugar in later releases. New syntax is a crowd
pleaser, and it&rsquo;s less likely to break existing programs than mucking with the
semantics. Once added, you can never take it away, so languages tend to sweeten
with time. One of the main benefits of creating a new language from scratch is
it gives you an opportunity to scrape off those accumulated layers of frosting
and start over.</p>
<p>Syntactic sugar has a bad rap among the PL intelligentsia. There&rsquo;s a real fetish
for minimalism in that crowd. There is some justification for that. Poorly
designed, unneeded syntax raises the cognitive load without adding enough
expressiveness to carry its weight. Since there is always pressure to cram new
features into the language, it takes discipline and a focus on simplicity to
avoid bloat. Once you add some syntax, you&rsquo;re stuck with it, so it&rsquo;s smart to be
parsimonious.</p>
<p>At the same time, most successful languages do have fairly complex grammars, at
least by the time they are widely used. Programmers spend a ton of time in their
language of choice, and a few niceties here and there really can improve the
comfort and efficiency of their work.</p>
<p>Striking the right balance<span class="em">&mdash;</span>choosing the right level of sweetness for your
language<span class="em">&mdash;</span>relies on your own sense of taste.</p>
</div>

<footer>
<a href="functions.html" class="next">
  Next Chapter: &ldquo;Functions&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/dedication.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Dedication &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h2><small></small>Dedication</h2>
<hr>

<div class="prev-next">
    <a href="index.html" title="Crafting Interpreters" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="acknowledgements.html" title="Acknowledgements" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="index.html" title="Crafting Interpreters" class="prev">←</a>
<a href="acknowledgements.html" title="Acknowledgements" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h2><small></small>Dedication</h2>
<hr>

<div class="prev-next">
    <a href="index.html" title="Crafting Interpreters" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="acknowledgements.html" title="Acknowledgements" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <h1 class="part">Dedication</h1>


<div class="dedication"><img src="image/ginny.png" alt="My beloved dog and her stupid face." />
<p>To Ginny, I miss your stupid face.</p>
</div>

<footer>
<a href="acknowledgements.html" class="next">
  Next Part: &ldquo;Acknowledgements&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/evaluating-expressions.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Evaluating Expressions &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Evaluating Expressions<small>7</small></a></h3>

<ul>
    <li><a href="#representing-values"><small>7.1</small> Representing Values</a></li>
    <li><a href="#evaluating-expressions"><small>7.2</small> Evaluating Expressions</a></li>
    <li><a href="#runtime-errors"><small>7.3</small> Runtime Errors</a></li>
    <li><a href="#hooking-up-the-interpreter"><small>7.4</small> Hooking Up the Interpreter</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Static and Dynamic Typing</a></li>
</ul>


<div class="prev-next">
    <a href="parsing-expressions.html" title="Parsing Expressions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="statements-and-state.html" title="Statements and State" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="parsing-expressions.html" title="Parsing Expressions" class="prev">←</a>
<a href="statements-and-state.html" title="Statements and State" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Evaluating Expressions<small>7</small></a></h3>

<ul>
    <li><a href="#representing-values"><small>7.1</small> Representing Values</a></li>
    <li><a href="#evaluating-expressions"><small>7.2</small> Evaluating Expressions</a></li>
    <li><a href="#runtime-errors"><small>7.3</small> Runtime Errors</a></li>
    <li><a href="#hooking-up-the-interpreter"><small>7.4</small> Hooking Up the Interpreter</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Static and Dynamic Typing</a></li>
</ul>


<div class="prev-next">
    <a href="parsing-expressions.html" title="Parsing Expressions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="statements-and-state.html" title="Statements and State" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">7</div>
  <h1>Evaluating Expressions</h1>

<blockquote>
<p>You are my creator, but I am your master; Obey!</p>
<p><cite>Mary Shelley, <em>Frankenstein</em></cite></p>
</blockquote>
<p>If you want to properly set the mood for this chapter, try to conjure up a
thunderstorm, one of those swirling tempests that likes to yank open shutters at
the climax of the story. Maybe toss in a few bolts of lightning. In this
chapter, our interpreter will take breath, open its eyes, and execute some code.</p>
<p><span name="spooky"></span></p><img src="image/evaluating-expressions/lightning.png" alt="A bolt of lightning strikes a Victorian mansion. Spooky!" />
<aside name="spooky">
<p>A decrepit Victorian mansion is optional, but adds to the ambiance.</p>
</aside>
<p>There are all manner of ways that language implementations make a computer do
what the user&rsquo;s source code commands. They can compile it to machine code,
translate it to another high-level language, or reduce it to some bytecode
format for a virtual machine to run. For our first interpreter, though, we are
going to take the simplest, shortest path and execute the syntax tree itself.</p>
<p>Right now, our parser only supports expressions. So, to &ldquo;execute&rdquo; code, we will
evaluate an expression and produce a value. For each kind of expression syntax
we can parse<span class="em">&mdash;</span>literal, operator, etc.<span class="em">&mdash;</span>we need a corresponding chunk of code
that knows how to evaluate that tree and produce a result. That raises two
questions:</p>
<ol>
<li>
<p>What kinds of values do we produce?</p>
</li>
<li>
<p>How do we organize those chunks of code?</p>
</li>
</ol>
<p>Taking them on one at a time<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h2><a href="#representing-values" id="representing-values"><small>7&#8202;.&#8202;1</small>Representing Values</a></h2>
<p>In Lox, <span name="value">values</span> are created by literals, computed by
expressions, and stored in variables. The user sees these as <em>Lox</em> objects, but
they are implemented in the underlying language our interpreter is written in.
That means bridging the lands of Lox&rsquo;s dynamic typing and Java&rsquo;s static types. A
variable in Lox can store a value of any (Lox) type, and can even store values
of different types at different points in time. What Java type might we use to
represent that?</p>
<aside name="value">
<p>Here, I&rsquo;m using &ldquo;value&rdquo; and &ldquo;object&rdquo; pretty much interchangeably.</p>
<p>Later in the C interpreter we&rsquo;ll make a slight distinction between them, but
that&rsquo;s mostly to have unique terms for two different corners of the
implementation<span class="em">&mdash;</span>in-place versus heap-allocated data. From the user&rsquo;s
perspective, the terms are synonymous.</p>
</aside>
<p>Given a Java variable with that static type, we must also be able to determine
which kind of value it holds at runtime. When the interpreter executes a <code>+</code>
operator, it needs to tell if it is adding two numbers or concatenating two
strings. Is there a Java type that can hold numbers, strings, Booleans, and
more? Is there one that can tell us what its runtime type is? There is! Good old
java.lang.Object.</p>
<p>In places in the interpreter where we need to store a Lox value, we can use
Object as the type. Java has boxed versions of its primitive types that all
subclass Object, so we can use those for Lox&rsquo;s built-in types:</p><table>
<thead>
<tr>
  <td>Lox type</td>
  <td>Java representation</td>
</tr>
</thead>
<tbody>
<tr>
  <td>Any Lox value</td>
  <td>Object</td>
</tr>
<tr>
  <td><code>nil</code></td>
  <td><code>null</code></td>
</tr>
<tr>
  <td>Boolean</td>
  <td>Boolean</td>
</tr>
<tr>
  <td>number</td>
  <td>Double</td>
</tr>
<tr>
  <td>string</td>
  <td>String</td>
</tr>
</tbody>
</table>
<p>Given a value of static type Object, we can determine if the runtime value is a
number or a string or whatever using Java&rsquo;s built-in <code>instanceof</code> operator. In
other words, the <span name="jvm">JVM</span>&rsquo;s own object representation
conveniently gives us everything we need to implement Lox&rsquo;s built-in types.
We&rsquo;ll have to do a little more work later when we add Lox&rsquo;s notions of
functions, classes, and instances, but Object and the boxed primitive classes
are sufficient for the types we need right now.</p>
<aside name="jvm">
<p>Another thing we need to do with values is manage their memory, and Java does
that too. A handy object representation and a really nice garbage collector are
the main reasons we&rsquo;re writing our first interpreter in Java.</p>
</aside>
<h2><a href="#evaluating-expressions" id="evaluating-expressions"><small>7&#8202;.&#8202;2</small>Evaluating Expressions</a></h2>
<p>Next, we need blobs of code to implement the evaluation logic for each kind of
expression we can parse. We could stuff that code into the syntax tree classes
in something like an <code>interpret()</code> method. In effect, we could tell each syntax
tree node, &ldquo;Interpret thyself&rdquo;. This is the Gang of Four&rsquo;s
<a href="https://en.wikipedia.org/wiki/Interpreter_pattern">Interpreter design pattern</a>. It&rsquo;s a neat pattern, but like I mentioned
earlier, it gets messy if we jam all sorts of logic into the tree classes.</p>
<p>Instead, we&rsquo;re going to reuse our groovy <a href="representing-code.html#the-visitor-pattern">Visitor pattern</a>. In the previous
chapter, we created an AstPrinter class. It took in a syntax tree and
recursively traversed it, building up a string which it ultimately returned.
That&rsquo;s almost exactly what a real interpreter does, except instead of
concatenating strings, it computes values.</p>
<p>We start with a new class.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">class</span> <span class="t">Interpreter</span> <span class="k">implements</span> <span class="t">Expr</span>.<span class="t">Visitor</span>&lt;<span class="t">Object</span>&gt; {
}
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, create new file</div>

<p>The class declares that it&rsquo;s a visitor. The return type of the visit methods
will be Object, the root class that we use to refer to a Lox value in our Java
code. To satisfy the Visitor interface, we need to define visit methods for each
of the four expression tree classes our parser produces. We&rsquo;ll start with the
simplest<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h3><a href="#evaluating-literals" id="evaluating-literals"><small>7&#8202;.&#8202;2&#8202;.&#8202;1</small>Evaluating literals</a></h3>
<p>The leaves of an expression tree<span class="em">&mdash;</span>the atomic bits of syntax that all other
expressions are composed of<span class="em">&mdash;</span>are <span name="leaf">literals</span>. Literals
are almost values already, but the distinction is important. A literal is a <em>bit
of syntax</em> that produces a value. A literal always appears somewhere in the
user&rsquo;s source code. Lots of values are produced by computation and don&rsquo;t exist
anywhere in the code itself. Those aren&rsquo;t literals. A literal comes from the
parser&rsquo;s domain. Values are an interpreter concept, part of the runtime&rsquo;s world.</p>
<aside name="leaf">
<p>In the <a href="statements-and-state.html">next chapter</a>, when we implement variables, we&rsquo;ll add identifier
expressions, which are also leaf nodes.</p>
</aside>
<p>So, much like we converted a literal <em>token</em> into a literal <em>syntax tree node</em>
in the parser, now we convert the literal tree node into a runtime value. That
turns out to be trivial.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em></div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitLiteralExpr</span>(<span class="t">Expr</span>.<span class="t">Literal</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">expr</span>.<span class="i">value</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em></div>

<p>We eagerly produced the runtime value way back during scanning and stuffed it in
the token. The parser took that value and stuck it in the literal tree node,
so to evaluate a literal, we simply pull it back out.</p>
<h3><a href="#evaluating-parentheses" id="evaluating-parentheses"><small>7&#8202;.&#8202;2&#8202;.&#8202;2</small>Evaluating parentheses</a></h3>
<p>The next simplest node to evaluate is grouping<span class="em">&mdash;</span>the node you get as a result
of using explicit parentheses in an expression.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em></div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitGroupingExpr</span>(<span class="t">Expr</span>.<span class="t">Grouping</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">expression</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em></div>

<p>A <span name="grouping">grouping</span> node has a reference to an inner node
for the expression contained inside the parentheses. To evaluate the grouping
expression itself, we recursively evaluate that subexpression and return it.</p>
<p>We rely on this helper method which simply sends the expression back into the
interpreter&rsquo;s visitor implementation:</p>
<aside name="grouping">
<p>Some parsers don&rsquo;t define tree nodes for parentheses. Instead, when parsing a
parenthesized expression, they simply return the node for the inner expression.
We do create a node for parentheses in Lox because we&rsquo;ll need it later to
correctly handle the left-hand sides of assignment expressions.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em></div>
<pre>  <span class="k">private</span> <span class="t">Object</span> <span class="i">evaluate</span>(<span class="t">Expr</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">expr</span>.<span class="i">accept</span>(<span class="k">this</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em></div>

<h3><a href="#evaluating-unary-expressions" id="evaluating-unary-expressions"><small>7&#8202;.&#8202;2&#8202;.&#8202;3</small>Evaluating unary expressions</a></h3>
<p>Like grouping, unary expressions have a single subexpression that we must
evaluate first. The difference is that the unary expression itself does a little
work afterwards.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitLiteralExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitUnaryExpr</span>(<span class="t">Expr</span>.<span class="t">Unary</span> <span class="i">expr</span>) {
    <span class="t">Object</span> <span class="i">right</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">right</span>);

    <span class="k">switch</span> (<span class="i">expr</span>.<span class="i">operator</span>.<span class="i">type</span>) {
      <span class="k">case</span> <span class="i">MINUS</span>:
        <span class="k">return</span> -(<span class="t">double</span>)<span class="i">right</span>;
    }

    <span class="c">// Unreachable.</span>
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitLiteralExpr</em>()</div>

<p>First, we evaluate the operand expression. Then we apply the unary operator
itself to the result of that. There are two different unary expressions,
identified by the type of the operator token.</p>
<p>Shown here is <code>-</code>, which negates the result of the subexpression. The
subexpression must be a number. Since we don&rsquo;t <em>statically</em> know that in Java,
we <span name="cast">cast</span> it before performing the operation. This type
cast happens at runtime when the <code>-</code> is evaluated. That&rsquo;s the core of what makes
a language dynamically typed right there.</p>
<aside name="cast">
<p>You&rsquo;re probably wondering what happens if the cast fails. Fear not, we&rsquo;ll get
into that soon.</p>
</aside>
<p>You can start to see how evaluation recursively traverses the tree. We can&rsquo;t
evaluate the unary operator itself until after we evaluate its operand
subexpression. That means our interpreter is doing a <strong>post-order traversal</strong><span class="em">&mdash;</span>each node evaluates its children before doing its own work.</p>
<p>The other unary operator is logical not.</p>
<div class="codehilite"><pre class="insert-before">    switch (expr.operator.type) {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitUnaryExpr</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="i">BANG</span>:
        <span class="k">return</span> !<span class="i">isTruthy</span>(<span class="i">right</span>);
</pre><pre class="insert-after">      case MINUS:
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitUnaryExpr</em>()</div>

<p>The implementation is simple, but what is this &ldquo;truthy&rdquo; thing about? We need to
make a little side trip to one of the great questions of Western philosophy:
<em>What is truth?</em></p>
<h3><a href="#truthiness-and-falsiness" id="truthiness-and-falsiness"><small>7&#8202;.&#8202;2&#8202;.&#8202;4</small>Truthiness and falsiness</a></h3>
<p>OK, maybe we&rsquo;re not going to really get into the universal question, but at
least inside the world of Lox, we need to decide what happens when you use
something other than <code>true</code> or <code>false</code> in a logic operation like <code>!</code> or any
other place where a Boolean is expected.</p>
<p>We <em>could</em> just say it&rsquo;s an error because we don&rsquo;t roll with implicit
conversions, but most dynamically typed languages aren&rsquo;t that ascetic. Instead,
they take the universe of values of all types and partition them into two sets,
one of which they define to be &ldquo;true&rdquo;, or &ldquo;truthful&rdquo;, or (my favorite) &ldquo;truthy&rdquo;,
and the rest which are &ldquo;false&rdquo; or &ldquo;falsey&rdquo;. This partitioning is somewhat
arbitrary and gets <span name="weird">weird</span> in a few languages.</p>
<aside name="weird" class="bottom">
<p>In JavaScript, strings are truthy, but empty strings are not. Arrays are truthy
but empty arrays are<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>also truthy. The number <code>0</code> is falsey, but the <em>string</em>
<code>"0"</code> is truthy.</p>
<p>In Python, empty strings are falsey like in JS, but other empty sequences are
falsey too.</p>
<p>In PHP, both the number <code>0</code> and the string <code>"0"</code> are falsey. Most other
non-empty strings are truthy.</p>
<p>Get all that?</p>
</aside>
<p>Lox follows Ruby&rsquo;s simple rule: <code>false</code> and <code>nil</code> are falsey, and everything else
is truthy. We implement that like so:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitUnaryExpr</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">isTruthy</span>(<span class="t">Object</span> <span class="i">object</span>) {
    <span class="k">if</span> (<span class="i">object</span> == <span class="k">null</span>) <span class="k">return</span> <span class="k">false</span>;
    <span class="k">if</span> (<span class="i">object</span> <span class="k">instanceof</span> <span class="t">Boolean</span>) <span class="k">return</span> (<span class="t">boolean</span>)<span class="i">object</span>;
    <span class="k">return</span> <span class="k">true</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitUnaryExpr</em>()</div>

<h3><a href="#evaluating-binary-operators" id="evaluating-binary-operators"><small>7&#8202;.&#8202;2&#8202;.&#8202;5</small>Evaluating binary operators</a></h3>
<p>On to the last expression tree class, binary operators. There&rsquo;s a handful of
them, and we&rsquo;ll start with the arithmetic ones.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>evaluate</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitBinaryExpr</span>(<span class="t">Expr</span>.<span class="t">Binary</span> <span class="i">expr</span>) {
    <span class="t">Object</span> <span class="i">left</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">left</span>);
    <span class="t">Object</span> <span class="i">right</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">right</span>);<span name="left"> </span>

    <span class="k">switch</span> (<span class="i">expr</span>.<span class="i">operator</span>.<span class="i">type</span>) {
      <span class="k">case</span> <span class="i">MINUS</span>:
        <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> - (<span class="t">double</span>)<span class="i">right</span>;
      <span class="k">case</span> <span class="i">SLASH</span>:
        <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> / (<span class="t">double</span>)<span class="i">right</span>;
      <span class="k">case</span> <span class="i">STAR</span>:
        <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> * (<span class="t">double</span>)<span class="i">right</span>;
    }

    <span class="c">// Unreachable.</span>
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>evaluate</em>()</div>

<aside name="left">
<p>Did you notice we pinned down a subtle corner of the language semantics here?
In a binary expression, we evaluate the operands in left-to-right order. If
those operands have side effects, that choice is user visible, so this isn&rsquo;t
simply an implementation detail.</p>
<p>If we want our two interpreters to be consistent (hint: we do), we&rsquo;ll need to
make sure clox does the same thing.</p>
</aside>
<p>I think you can figure out what&rsquo;s going on here. The main difference from the
unary negation operator is that we have two operands to evaluate.</p>
<p>I left out one arithmetic operator because it&rsquo;s a little special.</p>
<div class="codehilite"><pre class="insert-before">    switch (expr.operator.type) {
      case MINUS:
        return (double)left - (double)right;
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="i">PLUS</span>:
        <span class="k">if</span> (<span class="i">left</span> <span class="k">instanceof</span> <span class="t">Double</span> &amp;&amp; <span class="i">right</span> <span class="k">instanceof</span> <span class="t">Double</span>) {
          <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> + (<span class="t">double</span>)<span class="i">right</span>;
        }<span name="plus"> </span>

        <span class="k">if</span> (<span class="i">left</span> <span class="k">instanceof</span> <span class="t">String</span> &amp;&amp; <span class="i">right</span> <span class="k">instanceof</span> <span class="t">String</span>) {
          <span class="k">return</span> (<span class="t">String</span>)<span class="i">left</span> + (<span class="t">String</span>)<span class="i">right</span>;
        }

        <span class="k">break</span>;
</pre><pre class="insert-after">      case SLASH:
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>The <code>+</code> operator can also be used to concatenate two strings. To handle that, we
don&rsquo;t just assume the operands are a certain type and <em>cast</em> them, we
dynamically <em>check</em> the type and choose the appropriate operation. This is why
we need our object representation to support <code>instanceof</code>.</p>
<aside name="plus">
<p>We could have defined an operator specifically for string concatenation. That&rsquo;s
what Perl (<code>.</code>), Lua (<code>..</code>), Smalltalk (<code>,</code>), Haskell (<code>++</code>), and others do.</p>
<p>I thought it would make Lox a little more approachable to use the same syntax as
Java, JavaScript, Python, and others. This means that the <code>+</code> operator is
<strong>overloaded</strong> to support both adding numbers and concatenating strings. Even in
languages that don&rsquo;t use <code>+</code> for strings, they still often overload it for
adding both integers and floating-point numbers.</p>
</aside>
<p>Next up are the comparison operators.</p>
<div class="codehilite"><pre class="insert-before">    switch (expr.operator.type) {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="i">GREATER</span>:
        <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> &gt; (<span class="t">double</span>)<span class="i">right</span>;
      <span class="k">case</span> <span class="i">GREATER_EQUAL</span>:
        <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> &gt;= (<span class="t">double</span>)<span class="i">right</span>;
      <span class="k">case</span> <span class="i">LESS</span>:
        <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> &lt; (<span class="t">double</span>)<span class="i">right</span>;
      <span class="k">case</span> <span class="i">LESS_EQUAL</span>:
        <span class="k">return</span> (<span class="t">double</span>)<span class="i">left</span> &lt;= (<span class="t">double</span>)<span class="i">right</span>;
</pre><pre class="insert-after">      case MINUS:
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>They are basically the same as arithmetic. The only difference is that where the
arithmetic operators produce a value whose type is the same as the operands
(numbers or strings), the comparison operators always produce a Boolean.</p>
<p>The last pair of operators are equality.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre>      <span class="k">case</span> <span class="i">BANG_EQUAL</span>: <span class="k">return</span> !<span class="i">isEqual</span>(<span class="i">left</span>, <span class="i">right</span>);
      <span class="k">case</span> <span class="i">EQUAL_EQUAL</span>: <span class="k">return</span> <span class="i">isEqual</span>(<span class="i">left</span>, <span class="i">right</span>);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>Unlike the comparison operators which require numbers, the equality operators
support operands of any type, even mixed ones. You can&rsquo;t ask Lox if 3 is <em>less</em>
than <code>"three"</code>, but you can ask if it&rsquo;s <span name="equal"><em>equal</em></span> to
it.</p>
<aside name="equal">
<p>Spoiler alert: it&rsquo;s not.</p>
</aside>
<p>Like truthiness, the equality logic is hoisted out into a separate method.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>isTruthy</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">isEqual</span>(<span class="t">Object</span> <span class="i">a</span>, <span class="t">Object</span> <span class="i">b</span>) {
    <span class="k">if</span> (<span class="i">a</span> == <span class="k">null</span> &amp;&amp; <span class="i">b</span> == <span class="k">null</span>) <span class="k">return</span> <span class="k">true</span>;
    <span class="k">if</span> (<span class="i">a</span> == <span class="k">null</span>) <span class="k">return</span> <span class="k">false</span>;

    <span class="k">return</span> <span class="i">a</span>.<span class="i">equals</span>(<span class="i">b</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>isTruthy</em>()</div>

<p>This is one of those corners where the details of how we represent Lox objects
in terms of Java matter. We need to correctly implement <em>Lox&rsquo;s</em> notion of
equality, which may be different from Java&rsquo;s.</p>
<p>Fortunately, the two are pretty similar. Lox doesn&rsquo;t do implicit conversions in
equality and Java does not either. We do have to handle <code>nil</code>/<code>null</code> specially
so that we don&rsquo;t throw a NullPointerException if we try to call <code>equals()</code> on
<code>null</code>. Otherwise, we&rsquo;re fine. Java&rsquo;s <span name="nan"><code>equals()</code></span> method
on Boolean, Double, and String have the behavior we want for Lox.</p>
<aside name="nan">
<p>What do you expect this to evaluate to:</p>
<div class="codehilite"><pre>(<span class="n">0</span> / <span class="n">0</span>) == (<span class="n">0</span> / <span class="n">0</span>)
</pre></div>
<p>According to <a href="https://en.wikipedia.org/wiki/IEEE_754">IEEE 754</a>, which specifies the behavior of double-precision
numbers, dividing a zero by zero gives you the special <strong>NaN</strong> (&ldquo;not a number&rdquo;)
value. Strangely enough, NaN is <em>not</em> equal to itself.</p>
<p>In Java, the <code>==</code> operator on primitive doubles preserves that behavior, but the
<code>equals()</code> method on the Double class does not. Lox uses the latter, so doesn&rsquo;t
follow IEEE. These kinds of subtle incompatibilities occupy a dismaying fraction
of language implementers&rsquo; lives.</p>
</aside>
<p>And that&rsquo;s it! That&rsquo;s all the code we need to correctly interpret a valid Lox
expression. But what about an <em>invalid</em> one? In particular, what happens when a
subexpression evaluates to an object of the wrong type for the operation being
performed?</p>
<h2><a href="#runtime-errors" id="runtime-errors"><small>7&#8202;.&#8202;3</small>Runtime Errors</a></h2>
<p>I was cavalier about jamming casts in whenever a subexpression produces an
Object and the operator requires it to be a number or a string. Those casts can
fail. Even though the user&rsquo;s code is erroneous, if we want to make a <span
name="fail">usable</span> language, we are responsible for handling that error
gracefully.</p>
<aside name="fail">
<p>We could simply not detect or report a type error at all. This is what C does if
you cast a pointer to some type that doesn&rsquo;t match the data that is actually
being pointed to. C gains flexibility and speed by allowing that, but is
also famously dangerous. Once you misinterpret bits in memory, all bets are off.</p>
<p>Few modern languages accept unsafe operations like that. Instead, most are
<strong>memory safe</strong> and ensure<span class="em">&mdash;</span>through a combination of static and runtime checks<span class="em">&mdash;</span>that a program can never incorrectly interpret the value stored in a piece of
memory.</p>
</aside>
<p>It&rsquo;s time for us to talk about <strong>runtime errors</strong>. I spilled a lot of ink in the
previous chapters talking about error handling, but those were all <em>syntax</em> or
<em>static</em> errors. Those are detected and reported before <em>any</em> code is executed.
Runtime errors are failures that the language semantics demand we detect and
report while the program is running (hence the name).</p>
<p>Right now, if an operand is the wrong type for the operation being performed,
the Java cast will fail and the JVM will throw a ClassCastException. That
unwinds the whole stack and exits the application, vomiting a Java stack trace
onto the user. That&rsquo;s probably not what we want. The fact that Lox is
implemented in Java should be a detail hidden from the user. Instead, we want
them to understand that a <em>Lox</em> runtime error occurred, and give them an error
message relevant to our language and their program.</p>
<p>The Java behavior does have one thing going for it, though. It correctly stops
executing any code when the error occurs. Let&rsquo;s say the user enters some
expression like:</p>
<div class="codehilite"><pre><span class="n">2</span> * (<span class="n">3</span> / -<span class="s">&quot;muffin&quot;</span>)
</pre></div>
<p>You can&rsquo;t negate a <span name="muffin">muffin</span>, so we need to report a
runtime error at that inner <code>-</code> expression. That in turn means we can&rsquo;t evaluate
the <code>/</code> expression since it has no meaningful right operand. Likewise for the
<code>*</code>. So when a runtime error occurs deep in some expression, we need to escape
all the way out.</p>
<aside name="muffin">
<p>I don&rsquo;t know, man, <em>can</em> you negate a muffin?</p><img src="image/evaluating-expressions/muffin.png" alt="A muffin, negated." />
</aside>
<p>We could print a runtime error and then abort the process and exit the
application entirely. That has a certain melodramatic flair. Sort of the
programming language interpreter equivalent of a mic drop.</p>
<p>Tempting as that is, we should probably do something a little less cataclysmic.
While a runtime error needs to stop evaluating the <em>expression</em>, it shouldn&rsquo;t
kill the <em>interpreter</em>. If a user is running the REPL and has a typo in a line
of code, they should still be able to keep the session going and enter more code
after that.</p>
<h3><a href="#detecting-runtime-errors" id="detecting-runtime-errors"><small>7&#8202;.&#8202;3&#8202;.&#8202;1</small>Detecting runtime errors</a></h3>
<p>Our tree-walk interpreter evaluates nested expressions using recursive method
calls, and we need to unwind out of all of those. Throwing an exception in Java
is a fine way to accomplish that. However, instead of using Java&rsquo;s own cast
failure, we&rsquo;ll define a Lox-specific one so that we can handle it how we want.</p>
<p>Before we do the cast, we check the object&rsquo;s type ourselves. So, for unary <code>-</code>,
we add:</p>
<div class="codehilite"><pre class="insert-before">      case MINUS:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitUnaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperand</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return -(double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitUnaryExpr</em>()</div>

<p>The code to check the operand is:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitUnaryExpr</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">checkNumberOperand</span>(<span class="t">Token</span> <span class="i">operator</span>, <span class="t">Object</span> <span class="i">operand</span>) {
    <span class="k">if</span> (<span class="i">operand</span> <span class="k">instanceof</span> <span class="t">Double</span>) <span class="k">return</span>;
    <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">operator</span>, <span class="s">&quot;Operand must be a number.&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitUnaryExpr</em>()</div>

<p>When the check fails, it throws one of these:</p>
<div class="codehilite"><div class="source-file"><em>lox/RuntimeError.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">class</span> <span class="t">RuntimeError</span> <span class="k">extends</span> <span class="t">RuntimeException</span> {
  <span class="k">final</span> <span class="t">Token</span> <span class="i">token</span>;

  <span class="t">RuntimeError</span>(<span class="t">Token</span> <span class="i">token</span>, <span class="t">String</span> <span class="i">message</span>) {
    <span class="k">super</span>(<span class="i">message</span>);
    <span class="k">this</span>.<span class="i">token</span> = <span class="i">token</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/RuntimeError.java</em>, create new file</div>

<p>Unlike the Java cast exception, our <span name="class">class</span> tracks the
token that identifies where in the user&rsquo;s code the runtime error came from. As
with static errors, this helps the user know where to fix their code.</p>
<aside name="class">
<p>I admit the name &ldquo;RuntimeError&rdquo; is confusing since Java defines a
RuntimeException class. An annoying thing about building interpreters is your
names often collide with ones already taken by the implementation language. Just
wait until we support Lox classes.</p>
</aside>
<p>We need similar checking for the binary operators. Since I promised you every
single line of code needed to implement the interpreters, I&rsquo;ll run through them
all.</p>
<p>Greater than:</p>
<div class="codehilite"><pre class="insert-before">      case GREATER:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperands</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">left</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return (double)left &gt; (double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>Greater than or equal to:</p>
<div class="codehilite"><pre class="insert-before">      case GREATER_EQUAL:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperands</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">left</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return (double)left &gt;= (double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>Less than:</p>
<div class="codehilite"><pre class="insert-before">      case LESS:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperands</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">left</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return (double)left &lt; (double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>Less than or equal to:</p>
<div class="codehilite"><pre class="insert-before">      case LESS_EQUAL:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperands</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">left</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return (double)left &lt;= (double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>Subtraction:</p>
<div class="codehilite"><pre class="insert-before">      case MINUS:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperands</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">left</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return (double)left - (double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>Division:</p>
<div class="codehilite"><pre class="insert-before">      case SLASH:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperands</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">left</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return (double)left / (double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>Multiplication:</p>
<div class="codehilite"><pre class="insert-before">      case STAR:
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()</div>
<pre class="insert">        <span class="i">checkNumberOperands</span>(<span class="i">expr</span>.<span class="i">operator</span>, <span class="i">left</span>, <span class="i">right</span>);
</pre><pre class="insert-after">        return (double)left * (double)right;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>()</div>

<p>All of those rely on this validator, which is virtually the same as the unary
one:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>checkNumberOperand</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">checkNumberOperands</span>(<span class="t">Token</span> <span class="i">operator</span>,
                                   <span class="t">Object</span> <span class="i">left</span>, <span class="t">Object</span> <span class="i">right</span>) {
    <span class="k">if</span> (<span class="i">left</span> <span class="k">instanceof</span> <span class="t">Double</span> &amp;&amp; <span class="i">right</span> <span class="k">instanceof</span> <span class="t">Double</span>) <span class="k">return</span>;
   <span name="operand"> </span>
    <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">operator</span>, <span class="s">&quot;Operands must be numbers.&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>checkNumberOperand</em>()</div>

<aside name="operand">
<p>Another subtle semantic choice: We evaluate <em>both</em> operands before checking the
type of <em>either</em>. Imagine we have a function <code>say()</code> that prints its argument
then returns it. Using that, we write:</p>
<div class="codehilite"><pre><span class="i">say</span>(<span class="s">&quot;left&quot;</span>) - <span class="i">say</span>(<span class="s">&quot;right&quot;</span>);
</pre></div>
<p>Our interpreter prints &ldquo;left&rdquo; and &ldquo;right&rdquo; before reporting the runtime error. We
could have instead specified that the left operand is checked before even
evaluating the right.</p>
</aside>
<p>The last remaining operator, again the odd one out, is addition. Since <code>+</code> is
overloaded for numbers and strings, it already has code to check the types. All
we need to do is fail if neither of the two success cases match.</p>
<div class="codehilite"><pre class="insert-before">          return (String)left + (String)right;
        }

</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitBinaryExpr</em>()<br>
replace 1 line</div>
<pre class="insert">        <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">expr</span>.<span class="i">operator</span>,
            <span class="s">&quot;Operands must be two numbers or two strings.&quot;</span>);
</pre><pre class="insert-after">      case SLASH:
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitBinaryExpr</em>(), replace 1 line</div>

<p>That gets us detecting runtime errors deep in the innards of the evaluator. The
errors are getting thrown. The next step is to write the code that catches them.
For that, we need to wire up the Interpreter class into the main Lox class that
drives it.</p>
<h2><a href="#hooking-up-the-interpreter" id="hooking-up-the-interpreter"><small>7&#8202;.&#8202;4</small>Hooking Up the Interpreter</a></h2>
<p>The visit methods are sort of the guts of the Interpreter class, where the real
work happens. We need to wrap a skin around them to interface with the rest of
the program. The Interpreter&rsquo;s public API is simply one method.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em></div>
<pre>  <span class="t">void</span> <span class="i">interpret</span>(<span class="t">Expr</span> <span class="i">expression</span>) {<span name="void"> </span>
    <span class="k">try</span> {
      <span class="t">Object</span> <span class="i">value</span> = <span class="i">evaluate</span>(<span class="i">expression</span>);
      <span class="t">System</span>.<span class="i">out</span>.<span class="i">println</span>(<span class="i">stringify</span>(<span class="i">value</span>));
    } <span class="k">catch</span> (<span class="t">RuntimeError</span> <span class="i">error</span>) {
      <span class="t">Lox</span>.<span class="i">runtimeError</span>(<span class="i">error</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em></div>

<p>This takes in a syntax tree for an expression and evaluates it. If that
succeeds, <code>evaluate()</code> returns an object for the result value. <code>interpret()</code>
converts that to a string and shows it to the user. To convert a Lox value to a
string, we rely on:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>isEqual</em>()</div>
<pre>  <span class="k">private</span> <span class="t">String</span> <span class="i">stringify</span>(<span class="t">Object</span> <span class="i">object</span>) {
    <span class="k">if</span> (<span class="i">object</span> == <span class="k">null</span>) <span class="k">return</span> <span class="s">&quot;nil&quot;</span>;

    <span class="k">if</span> (<span class="i">object</span> <span class="k">instanceof</span> <span class="t">Double</span>) {
      <span class="t">String</span> <span class="i">text</span> = <span class="i">object</span>.<span class="i">toString</span>();
      <span class="k">if</span> (<span class="i">text</span>.<span class="i">endsWith</span>(<span class="s">&quot;.0&quot;</span>)) {
        <span class="i">text</span> = <span class="i">text</span>.<span class="i">substring</span>(<span class="n">0</span>, <span class="i">text</span>.<span class="i">length</span>() - <span class="n">2</span>);
      }
      <span class="k">return</span> <span class="i">text</span>;
    }

    <span class="k">return</span> <span class="i">object</span>.<span class="i">toString</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>isEqual</em>()</div>

<p>This is another of those pieces of code like <code>isTruthy()</code> that crosses the
membrane between the user&rsquo;s view of Lox objects and their internal
representation in Java.</p>
<p>It&rsquo;s pretty straightforward. Since Lox was designed to be familiar to someone
coming from Java, things like Booleans look the same in both languages. The two
edge cases are <code>nil</code>, which we represent using Java&rsquo;s <code>null</code>, and numbers.</p>
<p>Lox uses double-precision numbers even for integer values. In that case, they
should print without a decimal point. Since Java has both floating point and
integer types, it wants you to know which one you&rsquo;re using. It tells you by
adding an explicit <code>.0</code> to integer-valued doubles. We don&rsquo;t care about that, so
we <span name="number">hack</span> it off the end.</p>
<aside name="number">
<p>Yet again, we take care of this edge case with numbers to ensure that jlox and
clox work the same. Handling weird corners of the language like this will drive
you crazy but is an important part of the job.</p>
<p>Users rely on these details<span class="em">&mdash;</span>either deliberately or inadvertently<span class="em">&mdash;</span>and if
the implementations aren&rsquo;t consistent, their program will break when they run it
on different interpreters.</p>
</aside>
<h3><a href="#reporting-runtime-errors" id="reporting-runtime-errors"><small>7&#8202;.&#8202;4&#8202;.&#8202;1</small>Reporting runtime errors</a></h3>
<p>If a runtime error is thrown while evaluating the expression, <code>interpret()</code>
catches it. This lets us report the error to the user and then gracefully
continue. All of our existing error reporting code lives in the Lox class, so we
put this method there too:</p>
<div class="codehilite"><div class="source-file"><em>lox/Lox.java</em><br>
add after <em>error</em>()</div>
<pre>  <span class="k">static</span> <span class="t">void</span> <span class="i">runtimeError</span>(<span class="t">RuntimeError</span> <span class="i">error</span>) {
    <span class="t">System</span>.<span class="i">err</span>.<span class="i">println</span>(<span class="i">error</span>.<span class="i">getMessage</span>() +
        <span class="s">&quot;</span><span class="e">\n</span><span class="s">[line &quot;</span> + <span class="i">error</span>.<span class="i">token</span>.<span class="i">line</span> + <span class="s">&quot;]&quot;</span>);
    <span class="i">hadRuntimeError</span> = <span class="k">true</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, add after <em>error</em>()</div>

<p>We use the token associated with the RuntimeError to tell the user what line of
code was executing when the error occurred. Even better would be to give the
user an entire call stack to show how they <em>got</em> to be executing that code. But
we don&rsquo;t have function calls yet, so I guess we don&rsquo;t have to worry about it.</p>
<p>After showing the error, <code>runtimeError()</code> sets this field:</p>
<div class="codehilite"><pre class="insert-before">  static boolean hadError = false;
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in class <em>Lox</em></div>
<pre class="insert">  <span class="k">static</span> <span class="t">boolean</span> <span class="i">hadRuntimeError</span> = <span class="k">false</span>;

</pre><pre class="insert-after">  public static void main(String[] args) throws IOException {
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in class <em>Lox</em></div>

<p>That field plays a small but important role.</p>
<div class="codehilite"><pre class="insert-before">    run(new String(bytes, Charset.defaultCharset()));

    // Indicate an error in the exit code.
    if (hadError) System.exit(65);
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>runFile</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">hadRuntimeError</span>) <span class="t">System</span>.<span class="i">exit</span>(<span class="n">70</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>runFile</em>()</div>

<p>If the user is running a Lox <span name="repl">script from a file</span> and a
runtime error occurs, we set an exit code when the process quits to let the
calling process know. Not everyone cares about shell etiquette, but we do.</p>
<aside name="repl">
<p>If the user is running the REPL, we don&rsquo;t care about tracking runtime errors.
After they are reported, we simply loop around and let them input new code and
keep going.</p>
</aside>
<h3><a href="#running-the-interpreter" id="running-the-interpreter"><small>7&#8202;.&#8202;4&#8202;.&#8202;2</small>Running the interpreter</a></h3>
<p>Now that we have an interpreter, the Lox class can start using it.</p>
<div class="codehilite"><pre class="insert-before">public class Lox {
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in class <em>Lox</em></div>
<pre class="insert">  <span class="k">private</span> <span class="k">static</span> <span class="k">final</span> <span class="t">Interpreter</span> <span class="i">interpreter</span> = <span class="k">new</span> <span class="t">Interpreter</span>();
</pre><pre class="insert-after">  static boolean hadError = false;
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in class <em>Lox</em></div>

<p>We make the field static so that successive calls to <code>run()</code> inside a REPL
session reuse the same interpreter. That doesn&rsquo;t make a difference now, but it
will later when the interpreter stores global variables. Those variables should
persist throughout the REPL session.</p>
<p>Finally, we remove the line of temporary code from the <a href="parsing-expressions.html">last chapter</a> for
printing the syntax tree and replace it with this:</p>
<div class="codehilite"><pre class="insert-before">    // Stop if there was a syntax error.
    if (hadError) return;

</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">interpreter</span>.<span class="i">interpret</span>(<span class="i">expression</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>run</em>(), replace 1 line</div>

<p>We have an entire language pipeline now: scanning, parsing, and
execution. Congratulations, you now have your very own arithmetic calculator.</p>
<p>As you can see, the interpreter is pretty bare bones. But the Interpreter class
and the Visitor pattern we&rsquo;ve set up today form the skeleton that later chapters
will stuff full of interesting guts<span class="em">&mdash;</span>variables, functions, etc. Right now, the
interpreter doesn&rsquo;t do very much, but it&rsquo;s alive!</p><img src="image/evaluating-expressions/skeleton.png" alt="A skeleton waving hello." />
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Allowing comparisons on types other than numbers could be useful. The
operators might have a reasonable interpretation for strings. Even
comparisons among mixed types, like <code>3 &lt; "pancake"</code> could be handy to enable
things like ordered collections of heterogeneous types. Or it could simply
lead to bugs and confusion.</p>
<p>Would you extend Lox to support comparing other types? If so, which pairs of
types do you allow and how do you define their ordering? Justify your
choices and compare them to other languages.</p>
</li>
<li>
<p>Many languages define <code>+</code> such that if <em>either</em> operand is a string, the
other is converted to a string and the results are then concatenated. For
example, <code>"scone" + 4</code> would yield <code>scone4</code>. Extend the code in
<code>visitBinaryExpr()</code> to support that.</p>
</li>
<li>
<p>What happens right now if you divide a number by zero? What do you think
should happen? Justify your choice. How do other languages you know handle
division by zero, and why do they make the choices they do?</p>
<p>Change the implementation in <code>visitBinaryExpr()</code> to detect and report a
runtime error for this case.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Static and Dynamic Typing</a></h2>
<p>Some languages, like Java, are statically typed which means type errors are
detected and reported at compile time before any code is run. Others, like Lox,
are dynamically typed and defer checking for type errors until runtime right
before an operation is attempted. We tend to consider this a black-and-white
choice, but there is actually a continuum between them.</p>
<p>It turns out even most statically typed languages do <em>some</em> type checks at
runtime. The type system checks most type rules statically, but inserts runtime
checks in the generated code for other operations.</p>
<p>For example, in Java, the <em>static</em> type system assumes a cast expression will
always safely succeed. After you cast some value, you can statically treat it as
the destination type and not get any compile errors. But downcasts can fail,
obviously. The only reason the static checker can presume that casts always
succeed without violating the language&rsquo;s soundness guarantees, is because the
cast is checked <em>at runtime</em> and throws an exception on failure.</p>
<p>A more subtle example is <a href="https://en.wikipedia.org/wiki/Covariance_and_contravariance_(computer_science)#Covariant_arrays_in_Java_and_C.23">covariant arrays</a> in Java and C#. The static
subtyping rules for arrays allow operations that are not sound. Consider:</p>
<div class="codehilite"><pre><span class="t">Object</span>[] <span class="i">stuff</span> = <span class="k">new</span> <span class="t">Integer</span>[<span class="n">1</span>];
<span class="i">stuff</span>[<span class="n">0</span>] = <span class="s">&quot;not an int!&quot;</span>;
</pre></div>
<p>This code compiles without any errors. The first line upcasts the Integer array
and stores it in a variable of type Object array. The second line stores a
string in one of its cells. The Object array type statically allows that<span class="em">&mdash;</span>strings <em>are</em> Objects<span class="em">&mdash;</span>but the actual Integer array that <code>stuff</code> refers to
at runtime should never have a string in it! To avoid that catastrophe, when you
store a value in an array, the JVM does a <em>runtime</em> check to make sure it&rsquo;s an
allowed type. If not, it throws an ArrayStoreException.</p>
<p>Java could have avoided the need to check this at runtime by disallowing the
cast on the first line. It could make arrays <em>invariant</em> such that an array of
Integers is <em>not</em> an array of Objects. That&rsquo;s statically sound, but it prohibits
common and safe patterns of code that only read from arrays. Covariance is safe
if you never <em>write</em> to the array. Those patterns were particularly important
for usability in Java 1.0 before it supported generics. James Gosling and the
other Java designers traded off a little static safety and performance<span class="em">&mdash;</span>those
array store checks take time<span class="em">&mdash;</span>in return for some flexibility.</p>
<p>There are few modern statically typed languages that don&rsquo;t make that trade-off
<em>somewhere</em>. Even Haskell will let you run code with non-exhaustive matches. If
you find yourself designing a statically typed language, keep in mind that you
can sometimes give users more flexibility without sacrificing <em>too</em> many of the
benefits of static safety by deferring some type checks until runtime.</p>
<p>On the other hand, a key reason users choose statically typed languages is
because of the confidence the language gives them that certain kinds of errors
can <em>never</em> occur when their program is run. Defer too many type checks until
runtime, and you erode that confidence.</p>
</div>

<footer>
<a href="statements-and-state.html" class="next">
  Next Chapter: &ldquo;Statements and State&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/functions.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Functions &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Functions<small>10</small></a></h3>

<ul>
    <li><a href="#function-calls"><small>10.1</small> Function Calls</a></li>
    <li><a href="#native-functions"><small>10.2</small> Native Functions</a></li>
    <li><a href="#function-declarations"><small>10.3</small> Function Declarations</a></li>
    <li><a href="#function-objects"><small>10.4</small> Function Objects</a></li>
    <li><a href="#return-statements"><small>10.5</small> Return Statements</a></li>
    <li><a href="#local-functions-and-closures"><small>10.6</small> Local Functions and Closures</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="control-flow.html" title="Control Flow" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="resolving-and-binding.html" title="Resolving and Binding" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="control-flow.html" title="Control Flow" class="prev">←</a>
<a href="resolving-and-binding.html" title="Resolving and Binding" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Functions<small>10</small></a></h3>

<ul>
    <li><a href="#function-calls"><small>10.1</small> Function Calls</a></li>
    <li><a href="#native-functions"><small>10.2</small> Native Functions</a></li>
    <li><a href="#function-declarations"><small>10.3</small> Function Declarations</a></li>
    <li><a href="#function-objects"><small>10.4</small> Function Objects</a></li>
    <li><a href="#return-statements"><small>10.5</small> Return Statements</a></li>
    <li><a href="#local-functions-and-closures"><small>10.6</small> Local Functions and Closures</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="control-flow.html" title="Control Flow" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="resolving-and-binding.html" title="Resolving and Binding" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">10</div>
  <h1>Functions</h1>

<blockquote>
<p>And that is also the way the human mind works<span class="em">&mdash;</span>by the compounding of old
ideas into new structures that become new ideas that can themselves be used in
compounds, and round and round endlessly, growing ever more remote from the
basic earthbound imagery that is each language&rsquo;s soil.</p>
<p><cite>Douglas R. Hofstadter, <em>I Am a Strange Loop</em></cite></p>
</blockquote>
<p>This chapter marks the culmination of a lot of hard work. The previous chapters
add useful functionality in their own right, but each also supplies a piece of a
<span name="lambda">puzzle</span>. We&rsquo;ll take those pieces<span class="em">&mdash;</span>expressions,
statements, variables, control flow, and lexical scope<span class="em">&mdash;</span>add a couple more, and
assemble them all into support for real user-defined functions and function
calls.</p>
<aside name="lambda"><img src="image/functions/lambda.png" alt="A lambda puzzle." />
</aside>
<h2><a href="#function-calls" id="function-calls"><small>10&#8202;.&#8202;1</small>Function Calls</a></h2>
<p>You&rsquo;re certainly familiar with C-style function call syntax, but the grammar is
more subtle than you may realize. Calls are typically to named functions like:</p>
<div class="codehilite"><pre><span class="i">average</span>(<span class="n">1</span>, <span class="n">2</span>);
</pre></div>
<p>But the <span name="pascal">name</span> of the function being called isn&rsquo;t
actually part of the call syntax. The thing being called<span class="em">&mdash;</span>the <strong>callee</strong><span class="em">&mdash;</span>can be any expression that evaluates to a function. (Well, it does have to be a
pretty <em>high precedence</em> expression, but parentheses take care of that.) For
example:</p>
<aside name="pascal">
<p>The name <em>is</em> part of the call syntax in Pascal. You can call only named
functions or functions stored directly in variables.</p>
</aside>
<div class="codehilite"><pre><span class="i">getCallback</span>()();
</pre></div>
<p>There are two call expressions here. The first pair of parentheses has
<code>getCallback</code> as its callee. But the second call has the entire <code>getCallback()</code>
expression as its callee. It is the parentheses following an expression that
indicate a function call. You can think of a call as sort of like a postfix
operator that starts with <code>(</code>.</p>
<p>This &ldquo;operator&rdquo; has higher precedence than any other operator, even the unary
ones. So we slot it into the grammar by having the <code>unary</code> rule bubble up to a
new <code>call</code> rule.</p>
<p><span name="curry"></span></p>
<div class="codehilite"><pre><span class="i">unary</span>          → ( <span class="s">&quot;!&quot;</span> | <span class="s">&quot;-&quot;</span> ) <span class="i">unary</span> | <span class="i">call</span> ;
<span class="i">call</span>           → <span class="i">primary</span> ( <span class="s">&quot;(&quot;</span> <span class="i">arguments</span>? <span class="s">&quot;)&quot;</span> )* ;
</pre></div>
<p>This rule matches a primary expression followed by zero or more function calls.
If there are no parentheses, this parses a bare primary expression. Otherwise,
each call is recognized by a pair of parentheses with an optional list of
arguments inside. The argument list grammar is:</p>
<aside name="curry">
<p>The rule uses <code>*</code> to allow matching a series of calls like <code>fn(1)(2)(3)</code>. Code
like that isn&rsquo;t common in C-style languages, but it is in the family of
languages derived from ML. There, the normal way of defining a function that
takes multiple arguments is as a series of nested functions. Each function takes
one argument and returns a new function. That function consumes the next
argument, returns yet another function, and so on. Eventually, once all of the
arguments are consumed, the last function completes the operation.</p>
<p>This style, called <strong>currying</strong>, after Haskell Curry (the same guy whose first
name graces that <em>other</em> well-known functional language), is baked directly into
the language syntax so it&rsquo;s not as weird looking as it would be here.</p>
</aside>
<div class="codehilite"><pre><span class="i">arguments</span>      → <span class="i">expression</span> ( <span class="s">&quot;,&quot;</span> <span class="i">expression</span> )* ;
</pre></div>
<p>This rule requires at least one argument expression, followed by zero or more
other expressions, each preceded by a comma. To handle zero-argument calls, the
<code>call</code> rule itself considers the entire <code>arguments</code> production to be optional.</p>
<p>I admit, this seems more grammatically awkward than you&rsquo;d expect for the
incredibly common &ldquo;zero or more comma-separated things&rdquo; pattern. There are some
sophisticated metasyntaxes that handle this better, but in our BNF and in many
language specs I&rsquo;ve seen, it is this cumbersome.</p>
<p>Over in our syntax tree generator, we add a <span name="call-ast">new
node</span>.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Binary   : Expr left, Token operator, Expr right&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Call     : Expr callee, Token paren, List&lt;Expr&gt; arguments&quot;</span>,
</pre><pre class="insert-after">      &quot;Grouping : Expr expression&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="call-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#call-expression">Appendix II</a>.</p>
</aside>
<p>It stores the callee expression and a list of expressions for the arguments. It
also stores the token for the closing parenthesis. We&rsquo;ll use that token&rsquo;s
location when we report a runtime error caused by a function call.</p>
<p>Crack open the parser. Where <code>unary()</code> used to jump straight to <code>primary()</code>,
change it to call, well, <code>call()</code>.</p>
<div class="codehilite"><pre class="insert-before">      return new Expr.Unary(operator, right);
    }

</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>unary</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">return</span> <span class="i">call</span>();
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>unary</em>(), replace 1 line</div>

<p>Its definition is:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>unary</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">call</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">primary</span>();

    <span class="k">while</span> (<span class="k">true</span>) {<span name="while-true"> </span>
      <span class="k">if</span> (<span class="i">match</span>(<span class="i">LEFT_PAREN</span>)) {
        <span class="i">expr</span> = <span class="i">finishCall</span>(<span class="i">expr</span>);
      } <span class="k">else</span> {
        <span class="k">break</span>;
      }
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>unary</em>()</div>

<p>The code here doesn&rsquo;t quite line up with the grammar rules. I moved a few things
around to make the code cleaner<span class="em">&mdash;</span>one of the luxuries we have with a
handwritten parser. But it&rsquo;s roughly similar to how we parse infix operators.
First, we parse a primary expression, the &ldquo;left operand&rdquo; to the call. Then, each
time we see a <code>(</code>, we call <code>finishCall()</code> to parse the call expression using the
previously parsed expression as the callee. The returned expression becomes the
new <code>expr</code> and we loop to see if the result is itself called.</p>
<aside name="while-true">
<p>This code would be simpler as <code>while (match(LEFT_PAREN))</code> instead of the silly
<code>while (true)</code> and <code>break</code>. Don&rsquo;t worry, it will make sense when we expand the
parser later to handle properties on objects.</p>
</aside>
<p>The code to parse the argument list is in this helper:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>unary</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">finishCall</span>(<span class="t">Expr</span> <span class="i">callee</span>) {
    <span class="t">List</span>&lt;<span class="t">Expr</span>&gt; <span class="i">arguments</span> = <span class="k">new</span> <span class="t">ArrayList</span>&lt;&gt;();
    <span class="k">if</span> (!<span class="i">check</span>(<span class="i">RIGHT_PAREN</span>)) {
      <span class="k">do</span> {
        <span class="i">arguments</span>.<span class="i">add</span>(<span class="i">expression</span>());
      } <span class="k">while</span> (<span class="i">match</span>(<span class="i">COMMA</span>));
    }

    <span class="t">Token</span> <span class="i">paren</span> = <span class="i">consume</span>(<span class="i">RIGHT_PAREN</span>,
                          <span class="s">&quot;Expect &#39;)&#39; after arguments.&quot;</span>);

    <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Call</span>(<span class="i">callee</span>, <span class="i">paren</span>, <span class="i">arguments</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>unary</em>()</div>

<p>This is more or less the <code>arguments</code> grammar rule translated to code, except
that we also handle the zero-argument case. We check for that case first by
seeing if the next token is <code>)</code>. If it is, we don&rsquo;t try to parse any arguments.</p>
<p>Otherwise, we parse an expression, then look for a comma indicating that there
is another argument after that. We keep doing that as long as we find commas
after each expression. When we don&rsquo;t find a comma, then the argument list must
be done and we consume the expected closing parenthesis. Finally, we wrap the
callee and those arguments up into a call AST node.</p>
<h3><a href="#maximum-argument-counts" id="maximum-argument-counts"><small>10&#8202;.&#8202;1&#8202;.&#8202;1</small>Maximum argument counts</a></h3>
<p>Right now, the loop where we parse arguments has no bound. If you want to call a
function and pass a million arguments to it, the parser would have no problem
with it. Do we want to limit that?</p>
<p>Other languages have various approaches. The C standard says a conforming
implementation has to support <em>at least</em> 127 arguments to a function, but
doesn&rsquo;t say there&rsquo;s any upper limit. The Java specification says a method can
accept <em>no more than</em> <span name="254">255</span> arguments.</p>
<aside name="254">
<p>The limit is 25<em>4</em> arguments if the method is an instance method. That&rsquo;s because
<code>this</code><span class="em">&mdash;</span>the receiver of the method<span class="em">&mdash;</span>works like an argument that is
implicitly passed to the method, so it claims one of the slots.</p>
</aside>
<p>Our Java interpreter for Lox doesn&rsquo;t really need a limit, but having a maximum
number of arguments will simplify our bytecode interpreter in <a href="a-bytecode-virtual-machine.html">Part III</a>. We
want our two interpreters to be compatible with each other, even in weird corner
cases like this, so we&rsquo;ll add the same limit to jlox.</p>
<div class="codehilite"><pre class="insert-before">      do {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>finishCall</em>()</div>
<pre class="insert">        <span class="k">if</span> (<span class="i">arguments</span>.<span class="i">size</span>() &gt;= <span class="n">255</span>) {
          <span class="i">error</span>(<span class="i">peek</span>(), <span class="s">&quot;Can&#39;t have more than 255 arguments.&quot;</span>);
        }
</pre><pre class="insert-after">        arguments.add(expression());
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>finishCall</em>()</div>

<p>Note that the code here <em>reports</em> an error if it encounters too many arguments,
but it doesn&rsquo;t <em>throw</em> the error. Throwing is how we kick into panic mode which
is what we want if the parser is in a confused state and doesn&rsquo;t know where it
is in the grammar anymore. But here, the parser is still in a perfectly valid
state<span class="em">&mdash;</span>it just found too many arguments. So it reports the error and keeps on
keepin&rsquo; on.</p>
<h3><a href="#interpreting-function-calls" id="interpreting-function-calls"><small>10&#8202;.&#8202;1&#8202;.&#8202;2</small>Interpreting function calls</a></h3>
<p>We don&rsquo;t have any functions we can call, so it seems weird to start implementing
calls first, but we&rsquo;ll worry about that when we get there. First, our
interpreter needs a new import.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em></div>
<pre class="insert"><span class="k">import</span> <span class="i">java.util.ArrayList</span>;
</pre><pre class="insert-after">import java.util.List;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em></div>

<p>As always, interpretation starts with a new visit method for our new call
expression node.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitBinaryExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitCallExpr</span>(<span class="t">Expr</span>.<span class="t">Call</span> <span class="i">expr</span>) {
    <span class="t">Object</span> <span class="i">callee</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">callee</span>);

    <span class="t">List</span>&lt;<span class="t">Object</span>&gt; <span class="i">arguments</span> = <span class="k">new</span> <span class="t">ArrayList</span>&lt;&gt;();
    <span class="k">for</span> (<span class="t">Expr</span> <span class="i">argument</span> : <span class="i">expr</span>.<span class="i">arguments</span>) {<span name="in-order"> </span>
      <span class="i">arguments</span>.<span class="i">add</span>(<span class="i">evaluate</span>(<span class="i">argument</span>));
    }

    <span class="t">LoxCallable</span> <span class="i">function</span> = (<span class="t">LoxCallable</span>)<span class="i">callee</span>;
    <span class="k">return</span> <span class="i">function</span>.<span class="i">call</span>(<span class="k">this</span>, <span class="i">arguments</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitBinaryExpr</em>()</div>

<p>First, we evaluate the expression for the callee. Typically, this expression is
just an identifier that looks up the function by its name, but it could be
anything. Then we evaluate each of the argument expressions in order and store
the resulting values in a list.</p>
<aside name="in-order">
<p>This is another one of those subtle semantic choices. Since argument expressions
may have side effects, the order they are evaluated could be user visible. Even
so, some languages like Scheme and C don&rsquo;t specify an order. This gives
compilers freedom to reorder them for efficiency, but means users may be
unpleasantly surprised if arguments aren&rsquo;t evaluated in the order they expect.</p>
</aside>
<p>Once we&rsquo;ve got the callee and the arguments ready, all that remains is to
perform the call. We do that by casting the callee to a <span
name="callable">LoxCallable</span> and then invoking a <code>call()</code> method on it.
The Java representation of any Lox object that can be called like a function
will implement this interface. That includes user-defined functions, naturally,
but also class objects since classes are &ldquo;called&rdquo; to construct new instances.
We&rsquo;ll also use it for one more purpose shortly.</p>
<aside name="callable">
<p>I stuck &ldquo;Lox&rdquo; before the name to distinguish it from the Java standard library&rsquo;s
own Callable interface. Alas, all the good simple names are already taken.</p>
</aside>
<p>There isn&rsquo;t too much to this new interface.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxCallable.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.List</span>;

<span class="k">interface</span> <span class="t">LoxCallable</span> {
  <span class="t">Object</span> <span class="i">call</span>(<span class="t">Interpreter</span> <span class="i">interpreter</span>, <span class="t">List</span>&lt;<span class="t">Object</span>&gt; <span class="i">arguments</span>);
}
</pre></div>
<div class="source-file-narrow"><em>lox/LoxCallable.java</em>, create new file</div>

<p>We pass in the interpreter in case the class implementing <code>call()</code> needs it. We
also give it the list of evaluated argument values. The implementer&rsquo;s job is
then to return the value that the call expression produces.</p>
<h3><a href="#call-type-errors" id="call-type-errors"><small>10&#8202;.&#8202;1&#8202;.&#8202;3</small>Call type errors</a></h3>
<p>Before we get to implementing LoxCallable, we need to make the visit method a
little more robust. It currently ignores a couple of failure modes that we can&rsquo;t
pretend won&rsquo;t occur. First, what happens if the callee isn&rsquo;t actually something
you can call? What if you try to do this:</p>
<div class="codehilite"><pre><span class="s">&quot;totally not a function&quot;</span>();
</pre></div>
<p>Strings aren&rsquo;t callable in Lox. The runtime representation of a Lox string is a
Java string, so when we cast that to LoxCallable, the JVM will throw a
ClassCastException. We don&rsquo;t want our interpreter to vomit out some nasty Java
stack trace and die. Instead, we need to check the type ourselves first.</p>
<div class="codehilite"><pre class="insert-before">    }

</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitCallExpr</em>()</div>
<pre class="insert">    <span class="k">if</span> (!(<span class="i">callee</span> <span class="k">instanceof</span> <span class="t">LoxCallable</span>)) {
      <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">expr</span>.<span class="i">paren</span>,
          <span class="s">&quot;Can only call functions and classes.&quot;</span>);
    }

</pre><pre class="insert-after">    LoxCallable function = (LoxCallable)callee;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitCallExpr</em>()</div>

<p>We still throw an exception, but now we&rsquo;re throwing our own exception type, one
that the interpreter knows to catch and report gracefully.</p>
<h3><a href="#checking-arity" id="checking-arity"><small>10&#8202;.&#8202;1&#8202;.&#8202;4</small>Checking arity</a></h3>
<p>The other problem relates to the function&rsquo;s <strong>arity</strong>. Arity is the fancy term
for the number of arguments a function or operation expects. Unary operators
have arity one, binary operators two, etc. With functions, the arity is
determined by the number of parameters it declares.</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">add</span>(<span class="i">a</span>, <span class="i">b</span>, <span class="i">c</span>) {
  <span class="k">print</span> <span class="i">a</span> + <span class="i">b</span> + <span class="i">c</span>;
}
</pre></div>
<p>This function defines three parameters, <code>a</code>, <code>b</code>, and <code>c</code>, so its arity is
three and it expects three arguments. So what if you try to call it like this:</p>
<div class="codehilite"><pre><span class="i">add</span>(<span class="n">1</span>, <span class="n">2</span>, <span class="n">3</span>, <span class="n">4</span>); <span class="c">// Too many.</span>
<span class="i">add</span>(<span class="n">1</span>, <span class="n">2</span>);       <span class="c">// Too few.</span>
</pre></div>
<p>Different languages take different approaches to this problem. Of course, most
statically typed languages check this at compile time and refuse to compile the
code if the argument count doesn&rsquo;t match the function&rsquo;s arity. JavaScript
discards any extra arguments you pass. If you don&rsquo;t pass enough, it fills in the
missing parameters with the magic sort-of-like-null-but-not-really value
<code>undefined</code>. Python is stricter. It raises a runtime error if the argument list
is too short or too long.</p>
<p>I think the latter is a better approach. Passing the wrong number of arguments
is almost always a bug, and it&rsquo;s a mistake I do make in practice. Given that,
the sooner the implementation draws my attention to it, the better. So for Lox,
we&rsquo;ll take Python&rsquo;s approach. Before invoking the callable, we check to see if
the argument list&rsquo;s length matches the callable&rsquo;s arity.</p>
<div class="codehilite"><pre class="insert-before">    LoxCallable function = (LoxCallable)callee;
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitCallExpr</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">arguments</span>.<span class="i">size</span>() != <span class="i">function</span>.<span class="i">arity</span>()) {
      <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">expr</span>.<span class="i">paren</span>, <span class="s">&quot;Expected &quot;</span> +
          <span class="i">function</span>.<span class="i">arity</span>() + <span class="s">&quot; arguments but got &quot;</span> +
          <span class="i">arguments</span>.<span class="i">size</span>() + <span class="s">&quot;.&quot;</span>);
    }

</pre><pre class="insert-after">    return function.call(this, arguments);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitCallExpr</em>()</div>

<p>That requires a new method on the LoxCallable interface to ask it its arity.</p>
<div class="codehilite"><pre class="insert-before">interface LoxCallable {
</pre><div class="source-file"><em>lox/LoxCallable.java</em><br>
in interface <em>LoxCallable</em></div>
<pre class="insert">  <span class="t">int</span> <span class="i">arity</span>();
</pre><pre class="insert-after">  Object call(Interpreter interpreter, List&lt;Object&gt; arguments);
</pre></div>
<div class="source-file-narrow"><em>lox/LoxCallable.java</em>, in interface <em>LoxCallable</em></div>

<p>We <em>could</em> push the arity checking into the concrete implementation of <code>call()</code>.
But, since we&rsquo;ll have multiple classes implementing LoxCallable, that would end
up with redundant validation spread across a few classes. Hoisting it up into
the visit method lets us do it in one place.</p>
<h2><a href="#native-functions" id="native-functions"><small>10&#8202;.&#8202;2</small>Native Functions</a></h2>
<p>We can theoretically call functions, but we have no functions to call yet.
Before we get to user-defined functions, now is a good time to introduce a vital
but often overlooked facet of language implementations<span class="em">&mdash;</span><span
name="native"><strong>native functions</strong></span>. These are functions that the
interpreter exposes to user code but that are implemented in the host language
(in our case Java), not the language being implemented (Lox).</p>
<p>Sometimes these are called <strong>primitives</strong>, <strong>external functions</strong>, or <strong>foreign
functions</strong>. Since these functions can be called while the user&rsquo;s program is
running, they form part of the implementation&rsquo;s runtime. A lot of programming
language books gloss over these because they aren&rsquo;t conceptually interesting.
They&rsquo;re mostly grunt work.</p>
<aside name="native">
<p>Curiously, two names for these functions<span class="em">&mdash;</span>&ldquo;native&rdquo; and &ldquo;foreign&rdquo;<span class="em">&mdash;</span>are
antonyms. Maybe it depends on the perspective of the person choosing the term.
If you think of yourself as &ldquo;living&rdquo; within the runtime&rsquo;s implementation (in our
case, Java) then functions written in that are &ldquo;native&rdquo;. But if you have the
mindset of a <em>user</em> of your language, then the runtime is implemented in some
other &ldquo;foreign&rdquo; language.</p>
<p>Or it may be that &ldquo;native&rdquo; refers to the machine code language of the underlying
hardware. In Java, &ldquo;native&rdquo; methods are ones implemented in C or C++ and
compiled to native machine code.</p><img src="image/functions/foreign.png" class="above" alt="All a matter of perspective." />
</aside>
<p>But when it comes to making your language actually good at doing useful stuff,
the native functions your implementation provides are key. They provide access
to the fundamental services that all programs are defined in terms of. If you
don&rsquo;t provide native functions to access the file system, a user&rsquo;s going to have
a hell of a time writing a program that reads and <span
name="print">displays</span> a file.</p>
<aside name="print">
<p>A classic native function almost every language provides is one to print text to
stdout. In Lox, I made <code>print</code> a built-in statement so that we could get stuff
on screen in the chapters before this one.</p>
<p>Once we have functions, we could simplify the language by tearing out the old
print syntax and replacing it with a native function. But that would mean that
examples early in the book wouldn&rsquo;t run on the interpreter from later chapters
and vice versa. So, for the book, I&rsquo;ll leave it alone.</p>
<p>If you&rsquo;re building an interpreter for your <em>own</em> language, though, you may want
to consider it.</p>
</aside>
<p>Many languages also allow users to provide their own native functions. The
mechanism for doing so is called a <strong>foreign function interface</strong> (<strong>FFI</strong>),
<strong>native extension</strong>, <strong>native interface</strong>, or something along those lines.
These are nice because they free the language implementer from providing access
to every single capability the underlying platform supports. We won&rsquo;t define an
FFI for jlox, but we will add one native function to give you an idea of what it
looks like.</p>
<h3><a href="#telling-time" id="telling-time"><small>10&#8202;.&#8202;2&#8202;.&#8202;1</small>Telling time</a></h3>
<p>When we get to <a href="a-bytecode-virtual-machine.html">Part III</a> and start working on a much more efficient
implementation of Lox, we&rsquo;re going to care deeply about performance. Performance
work requires measurement, and that in turn means <strong>benchmarks</strong>. These are
programs that measure the time it takes to exercise some corner of the
interpreter.</p>
<p>We could measure the time it takes to start up the interpreter, run the
benchmark, and exit, but that adds a lot of overhead<span class="em">&mdash;</span>JVM startup time, OS
shenanigans, etc. That stuff does matter, of course, but if you&rsquo;re just trying
to validate an optimization to some piece of the interpreter, you don&rsquo;t want
that overhead obscuring your results.</p>
<p>A nicer solution is to have the benchmark script itself measure the time elapsed
between two points in the code. To do that, a Lox program needs to be able to
tell time. There&rsquo;s no way to do that now<span class="em">&mdash;</span>you can&rsquo;t implement a useful clock
&ldquo;from scratch&rdquo; without access to the underlying clock on the computer.</p>
<p>So we&rsquo;ll add <code>clock()</code>, a native function that returns the number of seconds
that have passed since some fixed point in time. The difference between two
successive invocations tells you how much time elapsed between the two calls.
This function is defined in the global scope, so let&rsquo;s ensure the interpreter
has access to that.</p>
<div class="codehilite"><pre class="insert-before">class Interpreter implements Expr.Visitor&lt;Object&gt;,
                             Stmt.Visitor&lt;Void&gt; {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em><br>
replace 1 line</div>
<pre class="insert">  <span class="k">final</span> <span class="t">Environment</span> <span class="i">globals</span> = <span class="k">new</span> <span class="t">Environment</span>();
  <span class="k">private</span> <span class="t">Environment</span> <span class="i">environment</span> = <span class="i">globals</span>;
</pre><pre class="insert-after">

  void interpret(List&lt;Stmt&gt; statements) {
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em>, replace 1 line</div>

<p>The <code>environment</code> field in the interpreter changes as we enter and exit local
scopes. It tracks the <em>current</em> environment. This new <code>globals</code> field holds a
fixed reference to the outermost global environment.</p>
<p>When we instantiate an Interpreter, we stuff the native function in that global
scope.</p>
<div class="codehilite"><pre class="insert-before">  private Environment environment = globals;

</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em></div>
<pre class="insert">  <span class="t">Interpreter</span>() {
    <span class="i">globals</span>.<span class="i">define</span>(<span class="s">&quot;clock&quot;</span>, <span class="k">new</span> <span class="t">LoxCallable</span>() {
      <span class="a">@Override</span>
      <span class="k">public</span> <span class="t">int</span> <span class="i">arity</span>() { <span class="k">return</span> <span class="n">0</span>; }

      <span class="a">@Override</span>
      <span class="k">public</span> <span class="t">Object</span> <span class="i">call</span>(<span class="t">Interpreter</span> <span class="i">interpreter</span>,
                         <span class="t">List</span>&lt;<span class="t">Object</span>&gt; <span class="i">arguments</span>) {
        <span class="k">return</span> (<span class="t">double</span>)<span class="t">System</span>.<span class="i">currentTimeMillis</span>() / <span class="n">1000.0</span>;
      }

      <span class="a">@Override</span>
      <span class="k">public</span> <span class="t">String</span> <span class="i">toString</span>() { <span class="k">return</span> <span class="s">&quot;&lt;native fn&gt;&quot;</span>; }
    });
  }

</pre><pre class="insert-after">  void interpret(List&lt;Stmt&gt; statements) {
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em></div>

<p>This defines a <span name="lisp-1">variable</span> named &ldquo;clock&rdquo;. Its value is a
Java anonymous class that implements LoxCallable. The <code>clock()</code> function takes
no arguments, so its arity is zero. The implementation of <code>call()</code> calls the
corresponding Java function and converts the result to a double value in
seconds.</p>
<aside name="lisp-1">
<p>In Lox, functions and variables occupy the same namespace. In Common Lisp, the
two live in their own worlds. A function and variable with the same name don&rsquo;t
collide. If you call the name, it looks up the function. If you refer to it, it
looks up the variable. This does require jumping through some hoops when you do
want to refer to a function as a first-class value.</p>
<p>Richard P. Gabriel and Kent Pitman coined the terms &ldquo;Lisp-1&rdquo; to refer to
languages like Scheme that put functions and variables in the same namespace,
and &ldquo;Lisp-2&rdquo; for languages like Common Lisp that partition them. Despite being
totally opaque, those names have since stuck. Lox is a Lisp-1.</p>
</aside>
<p>If we wanted to add other native functions<span class="em">&mdash;</span>reading input from the user,
working with files, etc.<span class="em">&mdash;</span>we could add them each as their own anonymous class
that implements LoxCallable. But for the book, this one is really all we need.</p>
<p>Let&rsquo;s get ourselves out of the function-defining business and let our users
take over<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h2><a href="#function-declarations" id="function-declarations"><small>10&#8202;.&#8202;3</small>Function Declarations</a></h2>
<p>We finally get to add a new production to the <code>declaration</code> rule we introduced
back when we added variables. Function declarations, like variables, bind a new
<span name="name">name</span>. That means they are allowed only in places where
a declaration is permitted.</p>
<aside name="name">
<p>A named function declaration isn&rsquo;t really a single primitive operation. It&rsquo;s
syntactic sugar for two distinct steps: (1) creating a new function object, and
(2) binding a new variable to it. If Lox had syntax for anonymous functions, we
wouldn&rsquo;t need function declaration statements. You could just do:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">add</span> = <span class="k">fun</span> (<span class="i">a</span>, <span class="i">b</span>) {
  <span class="k">print</span> <span class="i">a</span> + <span class="i">b</span>;
};
</pre></div>
<p>However, since named functions are the common case, I went ahead and gave Lox
nice syntax for them.</p>
</aside>
<div class="codehilite"><pre><span class="i">declaration</span>    → <span class="i">funDecl</span>
               | <span class="i">varDecl</span>
               | <span class="i">statement</span> ;
</pre></div>
<p>The updated <code>declaration</code> rule references this new rule:</p>
<div class="codehilite"><pre><span class="i">funDecl</span>        → <span class="s">&quot;fun&quot;</span> <span class="i">function</span> ;
<span class="i">function</span>       → <span class="t">IDENTIFIER</span> <span class="s">&quot;(&quot;</span> <span class="i">parameters</span>? <span class="s">&quot;)&quot;</span> <span class="i">block</span> ;
</pre></div>
<p>The main <code>funDecl</code> rule uses a separate helper rule <code>function</code>. A function
<em>declaration statement</em> is the <code>fun</code> keyword followed by the actual function-y
stuff. When we get to classes, we&rsquo;ll reuse that <code>function</code> rule for declaring
methods. Those look similar to function declarations, but aren&rsquo;t preceded by
<span name="fun"><code>fun</code></span>.</p>
<aside name="fun">
<p>Methods are too classy to have fun.</p>
</aside>
<p>The function itself is a name followed by the parenthesized parameter list and
the body. The body is always a braced block, using the same grammar rule that
block statements use. The parameter list uses this rule:</p>
<div class="codehilite"><pre><span class="i">parameters</span>     → <span class="t">IDENTIFIER</span> ( <span class="s">&quot;,&quot;</span> <span class="t">IDENTIFIER</span> )* ;
</pre></div>
<p>It&rsquo;s like the earlier <code>arguments</code> rule, except that each parameter is an
identifier, not an expression. That&rsquo;s a lot of new syntax for the parser to chew
through, but the resulting AST <span name="fun-ast">node</span> isn&rsquo;t too bad.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Expression : Expr expression&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Function   : Token name, List&lt;Token&gt; params,&quot;</span> +
                  <span class="s">&quot; List&lt;Stmt&gt; body&quot;</span>,
</pre><pre class="insert-after">      &quot;If         : Expr condition, Stmt thenBranch,&quot; +
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="fun-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#function-statement">Appendix II</a>.</p>
</aside>
<p>A function node has a name, a list of parameters (their names), and then the
body. We store the body as the list of statements contained inside the curly
braces.</p>
<p>Over in the parser, we weave in the new declaration.</p>
<div class="codehilite"><pre class="insert-before">    try {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>declaration</em>()</div>
<pre class="insert">      <span class="k">if</span> (<span class="i">match</span>(<span class="i">FUN</span>)) <span class="k">return</span> <span class="i">function</span>(<span class="s">&quot;function&quot;</span>);
</pre><pre class="insert-after">      if (match(VAR)) return varDeclaration();
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>declaration</em>()</div>

<p>Like other statements, a function is recognized by the leading keyword. When we
encounter <code>fun</code>, we call <code>function</code>. That corresponds to the <code>function</code> grammar
rule since we already matched and consumed the <code>fun</code> keyword. We&rsquo;ll build the
method up a piece at a time, starting with this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>expressionStatement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">function</span>(<span class="t">String</span> <span class="i">kind</span>) {
    <span class="t">Token</span> <span class="i">name</span> = <span class="i">consume</span>(<span class="i">IDENTIFIER</span>, <span class="s">&quot;Expect &quot;</span> + <span class="i">kind</span> + <span class="s">&quot; name.&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>expressionStatement</em>()</div>

<p>Right now, it only consumes the identifier token for the function&rsquo;s name. You
might be wondering about that funny little <code>kind</code> parameter. Just like we reuse
the grammar rule, we&rsquo;ll reuse the <code>function()</code> method later to parse methods
inside classes. When we do that, we&rsquo;ll pass in &ldquo;method&rdquo; for <code>kind</code> so that the
error messages are specific to the kind of declaration being parsed.</p>
<p>Next, we parse the parameter list and the pair of parentheses wrapped around it.</p>
<div class="codehilite"><pre class="insert-before">    Token name = consume(IDENTIFIER, &quot;Expect &quot; + kind + &quot; name.&quot;);
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>function</em>()</div>
<pre class="insert">    <span class="i">consume</span>(<span class="i">LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after &quot;</span> + <span class="i">kind</span> + <span class="s">&quot; name.&quot;</span>);
    <span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">parameters</span> = <span class="k">new</span> <span class="t">ArrayList</span>&lt;&gt;();
    <span class="k">if</span> (!<span class="i">check</span>(<span class="i">RIGHT_PAREN</span>)) {
      <span class="k">do</span> {
        <span class="k">if</span> (<span class="i">parameters</span>.<span class="i">size</span>() &gt;= <span class="n">255</span>) {
          <span class="i">error</span>(<span class="i">peek</span>(), <span class="s">&quot;Can&#39;t have more than 255 parameters.&quot;</span>);
        }

        <span class="i">parameters</span>.<span class="i">add</span>(
            <span class="i">consume</span>(<span class="i">IDENTIFIER</span>, <span class="s">&quot;Expect parameter name.&quot;</span>));
      } <span class="k">while</span> (<span class="i">match</span>(<span class="i">COMMA</span>));
    }
    <span class="i">consume</span>(<span class="i">RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after parameters.&quot;</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>function</em>()</div>

<p>This is like the code for handling arguments in a call, except not split out
into a helper method. The outer <code>if</code> statement handles the zero parameter case,
and the inner <code>while</code> loop parses parameters as long as we find commas to
separate them. The result is the list of tokens for each parameter&rsquo;s name.</p>
<p>Just like we do with arguments to function calls, we validate at parse time
that you don&rsquo;t exceed the maximum number of parameters a function is allowed to
have.</p>
<p>Finally, we parse the body and wrap it all up in a function node.</p>
<div class="codehilite"><pre class="insert-before">    consume(RIGHT_PAREN, &quot;Expect ')' after parameters.&quot;);
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>function</em>()</div>
<pre class="insert">

    <span class="i">consume</span>(<span class="i">LEFT_BRACE</span>, <span class="s">&quot;Expect &#39;{&#39; before &quot;</span> + <span class="i">kind</span> + <span class="s">&quot; body.&quot;</span>);
    <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">body</span> = <span class="i">block</span>();
    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Function</span>(<span class="i">name</span>, <span class="i">parameters</span>, <span class="i">body</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>function</em>()</div>

<p>Note that we consume the <code>{</code> at the beginning of the body here before calling
<code>block()</code>. That&rsquo;s because <code>block()</code> assumes the brace token has already been
matched. Consuming it here lets us report a more precise error message if the
<code>{</code> isn&rsquo;t found since we know it&rsquo;s in the context of a function declaration.</p>
<h2><a href="#function-objects" id="function-objects"><small>10&#8202;.&#8202;4</small>Function Objects</a></h2>
<p>We&rsquo;ve got some syntax parsed so usually we&rsquo;re ready to interpret, but first we
need to think about how to represent a Lox function in Java. We need to keep
track of the parameters so that we can bind them to argument values when the
function is called. And, of course, we need to keep the code for the body of the
function so that we can execute it.</p>
<p>That&rsquo;s basically what the Stmt.Function class is. Could we just use that?
Almost, but not quite. We also need a class that implements LoxCallable so that
we can call it. We don&rsquo;t want the runtime phase of the interpreter to bleed into
the front end&rsquo;s syntax classes so we don&rsquo;t want Stmt.Function itself to
implement that. Instead, we wrap it in a new class.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxFunction.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.List</span>;

<span class="k">class</span> <span class="t">LoxFunction</span> <span class="k">implements</span> <span class="t">LoxCallable</span> {
  <span class="k">private</span> <span class="k">final</span> <span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">declaration</span>;
  <span class="t">LoxFunction</span>(<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">declaration</span>) {
    <span class="k">this</span>.<span class="i">declaration</span> = <span class="i">declaration</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, create new file</div>

<p>We implement the <code>call()</code> of LoxCallable like so:</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxFunction.java</em><br>
add after <em>LoxFunction</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">call</span>(<span class="t">Interpreter</span> <span class="i">interpreter</span>,
                     <span class="t">List</span>&lt;<span class="t">Object</span>&gt; <span class="i">arguments</span>) {
    <span class="t">Environment</span> <span class="i">environment</span> = <span class="k">new</span> <span class="t">Environment</span>(<span class="i">interpreter</span>.<span class="i">globals</span>);
    <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">declaration</span>.<span class="i">params</span>.<span class="i">size</span>(); <span class="i">i</span>++) {
      <span class="i">environment</span>.<span class="i">define</span>(<span class="i">declaration</span>.<span class="i">params</span>.<span class="i">get</span>(<span class="i">i</span>).<span class="i">lexeme</span>,
          <span class="i">arguments</span>.<span class="i">get</span>(<span class="i">i</span>));
    }

    <span class="i">interpreter</span>.<span class="i">executeBlock</span>(<span class="i">declaration</span>.<span class="i">body</span>, <span class="i">environment</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, add after <em>LoxFunction</em>()</div>

<p>This handful of lines of code is one of the most fundamental, powerful pieces of
our interpreter. As we saw in <a href="statements-and-state.html">the chapter on statements and <span
name="env">state</span></a>, managing name environments is a core part
of a language implementation. Functions are deeply tied to that.</p>
<aside name="env">
<p>We&rsquo;ll dig even deeper into environments in the <a href="resolving-and-binding.html">next chapter</a>.</p>
</aside>
<p>Parameters are core to functions, especially the fact that a function
<em>encapsulates</em> its parameters<span class="em">&mdash;</span>no other code outside of the function can see
them. This means each function gets its own environment where it stores those
variables.</p>
<p>Further, this environment must be created dynamically. Each function <em>call</em> gets
its own environment. Otherwise, recursion would break. If there are multiple
calls to the same function in play at the same time, each needs its <em>own</em>
environment, even though they are all calls to the same function.</p>
<p>For example, here&rsquo;s a convoluted way to count to three:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">count</span>(<span class="i">n</span>) {
  <span class="k">if</span> (<span class="i">n</span> &gt; <span class="n">1</span>) <span class="i">count</span>(<span class="i">n</span> - <span class="n">1</span>);
  <span class="k">print</span> <span class="i">n</span>;
}

<span class="i">count</span>(<span class="n">3</span>);
</pre></div>
<p>Imagine we pause the interpreter right at the point where it&rsquo;s about to print 1
in the innermost nested call. The outer calls to print 2 and 3 haven&rsquo;t printed
their values yet, so there must be environments somewhere in memory that still
store the fact that <code>n</code> is bound to 3 in one context, 2 in another, and 1 in the
innermost, like:</p><img src="image/functions/recursion.png" alt="A separate environment for each recursive call." />
<p>That&rsquo;s why we create a new environment at each <em>call</em>, not at the function
<em>declaration</em>. The <code>call()</code> method we saw earlier does that. At the beginning of
the call, it creates a new environment. Then it walks the parameter and argument
lists in lockstep. For each pair, it creates a new variable with the parameter&rsquo;s
name and binds it to the argument&rsquo;s value.</p>
<p>So, for a program like this:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">add</span>(<span class="i">a</span>, <span class="i">b</span>, <span class="i">c</span>) {
  <span class="k">print</span> <span class="i">a</span> + <span class="i">b</span> + <span class="i">c</span>;
}

<span class="i">add</span>(<span class="n">1</span>, <span class="n">2</span>, <span class="n">3</span>);
</pre></div>
<p>At the point of the call to <code>add()</code>, the interpreter creates something like
this:</p><img src="image/functions/binding.png" alt="Binding arguments to their parameters." />
<p>Then <code>call()</code> tells the interpreter to execute the body of the function in this
new function-local environment. Up until now, the current environment was the
environment where the function was being called. Now, we teleport from there
inside the new parameter space we&rsquo;ve created for the function.</p>
<p>This is all that&rsquo;s required to pass data into the function. By using different
environments when we execute the body, calls to the same function with the
same code can produce different results.</p>
<p>Once the body of the function has finished executing, <code>executeBlock()</code> discards
that function-local environment and restores the previous one that was active
back at the callsite. Finally, <code>call()</code> returns <code>null</code>, which returns <code>nil</code> to
the caller. (We&rsquo;ll add return values later.)</p>
<p>Mechanically, the code is pretty simple. Walk a couple of lists. Bind some new
variables. Call a method. But this is where the crystalline <em>code</em> of the
function declaration becomes a living, breathing <em>invocation</em>. This is one of my
favorite snippets in this entire book. Feel free to take a moment to meditate on
it if you&rsquo;re so inclined.</p>
<p>Done? OK. Note when we bind the parameters, we assume the parameter and argument
lists have the same length. This is safe because <code>visitCallExpr()</code> checks the
arity before calling <code>call()</code>. It relies on the function reporting its arity to
do that.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxFunction.java</em><br>
add after <em>LoxFunction</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">int</span> <span class="i">arity</span>() {
    <span class="k">return</span> <span class="i">declaration</span>.<span class="i">params</span>.<span class="i">size</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, add after <em>LoxFunction</em>()</div>

<p>That&rsquo;s most of our object representation. While we&rsquo;re in here, we may as well
implement <code>toString()</code>.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxFunction.java</em><br>
add after <em>LoxFunction</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">String</span> <span class="i">toString</span>() {
    <span class="k">return</span> <span class="s">&quot;&lt;fn &quot;</span> + <span class="i">declaration</span>.<span class="i">name</span>.<span class="i">lexeme</span> + <span class="s">&quot;&gt;&quot;</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, add after <em>LoxFunction</em>()</div>

<p>This gives nicer output if a user decides to print a function value.</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">add</span>(<span class="i">a</span>, <span class="i">b</span>) {
  <span class="k">print</span> <span class="i">a</span> + <span class="i">b</span>;
}

<span class="k">print</span> <span class="i">add</span>; <span class="c">// &quot;&lt;fn add&gt;&quot;.</span>
</pre></div>
<h3><a href="#interpreting-function-declarations" id="interpreting-function-declarations"><small>10&#8202;.&#8202;4&#8202;.&#8202;1</small>Interpreting function declarations</a></h3>
<p>We&rsquo;ll come back and refine LoxFunction soon, but that&rsquo;s enough to get started.
Now we can visit a function declaration.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitExpressionStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitFunctionStmt</span>(<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">stmt</span>) {
    <span class="t">LoxFunction</span> <span class="i">function</span> = <span class="k">new</span> <span class="t">LoxFunction</span>(<span class="i">stmt</span>);
    <span class="i">environment</span>.<span class="i">define</span>(<span class="i">stmt</span>.<span class="i">name</span>.<span class="i">lexeme</span>, <span class="i">function</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitExpressionStmt</em>()</div>

<p>This is similar to how we interpret other literal expressions. We take a
function <em>syntax node</em><span class="em">&mdash;</span>a compile-time representation of the function<span class="em">&mdash;</span>and
convert it to its runtime representation. Here, that&rsquo;s a LoxFunction that wraps
the syntax node.</p>
<p>Function declarations are different from other literal nodes in that the
declaration <em>also</em> binds the resulting object to a new variable. So, after
creating the LoxFunction, we create a new binding in the current environment and
store a reference to it there.</p>
<p>With that, we can define and call our own functions all within Lox. Give it a
try:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">sayHi</span>(<span class="i">first</span>, <span class="i">last</span>) {
  <span class="k">print</span> <span class="s">&quot;Hi, &quot;</span> + <span class="i">first</span> + <span class="s">&quot; &quot;</span> + <span class="i">last</span> + <span class="s">&quot;!&quot;</span>;
}

<span class="i">sayHi</span>(<span class="s">&quot;Dear&quot;</span>, <span class="s">&quot;Reader&quot;</span>);
</pre></div>
<p>I don&rsquo;t know about you, but that looks like an honest-to-God programming
language to me.</p>
<h2><a href="#return-statements" id="return-statements"><small>10&#8202;.&#8202;5</small>Return Statements</a></h2>
<p>We can get data into functions by passing parameters, but we&rsquo;ve got no way to
get results back <span name="hotel"><em>out</em></span>. If Lox were an
expression-oriented language like Ruby or Scheme, the body would be an
expression whose value is implicitly the function&rsquo;s result. But in Lox, the body
of a function is a list of statements which don&rsquo;t produce values, so we need
dedicated syntax for emitting a result. In other words, <code>return</code> statements. I&rsquo;m
sure you can guess the grammar already.</p>
<aside name="hotel">
<p>The Hotel California of data.</p>
</aside>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">forStmt</span>
               | <span class="i">ifStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">returnStmt</span>
               | <span class="i">whileStmt</span>
               | <span class="i">block</span> ;

<span class="i">returnStmt</span>     → <span class="s">&quot;return&quot;</span> <span class="i">expression</span>? <span class="s">&quot;;&quot;</span> ;
</pre></div>
<p>We&rsquo;ve got one more<span class="em">&mdash;</span>the final, in fact<span class="em">&mdash;</span>production under the venerable
<code>statement</code> rule. A <code>return</code> statement is the <code>return</code> keyword followed by an
optional expression and terminated with a semicolon.</p>
<p>The return value is optional to support exiting early from a function that
doesn&rsquo;t return a useful value. In statically typed languages, &ldquo;void&rdquo; functions
don&rsquo;t return a value and non-void ones do. Since Lox is dynamically typed, there
are no true void functions. The compiler has no way of preventing you from
taking the result value of a call to a function that doesn&rsquo;t contain a <code>return</code>
statement.</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">procedure</span>() {
  <span class="k">print</span> <span class="s">&quot;don&#39;t return anything&quot;</span>;
}

<span class="k">var</span> <span class="i">result</span> = <span class="i">procedure</span>();
<span class="k">print</span> <span class="i">result</span>; <span class="c">// ?</span>
</pre></div>
<p>This means every Lox function must return <em>something</em>, even if it contains no
<code>return</code> statements at all. We use <code>nil</code> for this, which is why LoxFunction&rsquo;s
implementation of <code>call()</code> returns <code>null</code> at the end. In that same vein, if you
omit the value in a <code>return</code> statement, we simply treat it as equivalent to:</p>
<div class="codehilite"><pre><span class="k">return</span> <span class="k">nil</span>;
</pre></div>
<p>Over in our AST generator, we add a <span name="return-ast">new node</span>.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Print      : Expr expression&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Return     : Token keyword, Expr value&quot;</span>,
</pre><pre class="insert-after">      &quot;Var        : Token name, Expr initializer&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="return-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#return-statement">Appendix II</a>.</p>
</aside>
<p>It keeps the <code>return</code> keyword token so we can use its location for error
reporting, and the value being returned, if any. We parse it like other
statements, first by recognizing the initial keyword.</p>
<div class="codehilite"><pre class="insert-before">    if (match(PRINT)) return printStatement();
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>statement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">match</span>(<span class="i">RETURN</span>)) <span class="k">return</span> <span class="i">returnStatement</span>();
</pre><pre class="insert-after">    if (match(WHILE)) return whileStatement();
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>statement</em>()</div>

<p>That branches out to:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>printStatement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">returnStatement</span>() {
    <span class="t">Token</span> <span class="i">keyword</span> = <span class="i">previous</span>();
    <span class="t">Expr</span> <span class="i">value</span> = <span class="k">null</span>;
    <span class="k">if</span> (!<span class="i">check</span>(<span class="i">SEMICOLON</span>)) {
      <span class="i">value</span> = <span class="i">expression</span>();
    }

    <span class="i">consume</span>(<span class="i">SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after return value.&quot;</span>);
    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Return</span>(<span class="i">keyword</span>, <span class="i">value</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>printStatement</em>()</div>

<p>After snagging the previously consumed <code>return</code> keyword, we look for a value
expression. Since many different tokens can potentially start an expression,
it&rsquo;s hard to tell if a return value is <em>present</em>. Instead, we check if it&rsquo;s
<em>absent</em>. Since a semicolon can&rsquo;t begin an expression, if the next token is
that, we know there must not be a value.</p>
<h3><a href="#returning-from-calls" id="returning-from-calls"><small>10&#8202;.&#8202;5&#8202;.&#8202;1</small>Returning from calls</a></h3>
<p>Interpreting a <code>return</code> statement is tricky. You can return from anywhere within
the body of a function, even deeply nested inside other statements. When the
return is executed, the interpreter needs to jump all the way out of whatever
context it&rsquo;s currently in and cause the function call to complete, like some
kind of jacked up control flow construct.</p>
<p>For example, say we&rsquo;re running this program and we&rsquo;re about to execute the
<code>return</code> statement:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">count</span>(<span class="i">n</span>) {
  <span class="k">while</span> (<span class="i">n</span> &lt; <span class="n">100</span>) {
    <span class="k">if</span> (<span class="i">n</span> == <span class="n">3</span>) <span class="k">return</span> <span class="i">n</span>; <span class="c">// &lt;--</span>
    <span class="k">print</span> <span class="i">n</span>;
    <span class="i">n</span> = <span class="i">n</span> + <span class="n">1</span>;
  }
}

<span class="i">count</span>(<span class="n">1</span>);
</pre></div>
<p>The Java call stack currently looks roughly like this:</p>
<div class="codehilite"><pre>Interpreter.visitReturnStmt()
Interpreter.visitIfStmt()
Interpreter.executeBlock()
Interpreter.visitBlockStmt()
Interpreter.visitWhileStmt()
Interpreter.executeBlock()
LoxFunction.call()
Interpreter.visitCallExpr()
</pre></div>
<p>We need to get from the top of the stack all the way back to <code>call()</code>. I don&rsquo;t
know about you, but to me that sounds like exceptions. When we execute a
<code>return</code> statement, we&rsquo;ll use an exception to unwind the interpreter past the
visit methods of all of the containing statements back to the code that began
executing the body.</p>
<p>The visit method for our new AST node looks like this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitPrintStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitReturnStmt</span>(<span class="t">Stmt</span>.<span class="t">Return</span> <span class="i">stmt</span>) {
    <span class="t">Object</span> <span class="i">value</span> = <span class="k">null</span>;
    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">value</span> != <span class="k">null</span>) <span class="i">value</span> = <span class="i">evaluate</span>(<span class="i">stmt</span>.<span class="i">value</span>);

    <span class="k">throw</span> <span class="k">new</span> <span class="t">Return</span>(<span class="i">value</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitPrintStmt</em>()</div>

<p>If we have a return value, we evaluate it, otherwise, we use <code>nil</code>. Then we take
that value and wrap it in a custom exception class and throw it.</p>
<div class="codehilite"><div class="source-file"><em>lox/Return.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">class</span> <span class="t">Return</span> <span class="k">extends</span> <span class="t">RuntimeException</span> {
  <span class="k">final</span> <span class="t">Object</span> <span class="i">value</span>;

  <span class="t">Return</span>(<span class="t">Object</span> <span class="i">value</span>) {
    <span class="k">super</span>(<span class="k">null</span>, <span class="k">null</span>, <span class="k">false</span>, <span class="k">false</span>);
    <span class="k">this</span>.<span class="i">value</span> = <span class="i">value</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/Return.java</em>, create new file</div>

<p>This class wraps the return value with the accoutrements Java requires for a
runtime exception class. The weird super constructor call with those <code>null</code> and
<code>false</code> arguments disables some JVM machinery that we don&rsquo;t need. Since we&rsquo;re
using our exception class for <span name="exception">control flow</span> and not
actual error handling, we don&rsquo;t need overhead like stack traces.</p>
<aside name="exception">
<p>For the record, I&rsquo;m not generally a fan of using exceptions for control flow.
But inside a heavily recursive tree-walk interpreter, it&rsquo;s the way to go. Since
our own syntax tree evaluation is so heavily tied to the Java call stack, we&rsquo;re
pressed to do some heavyweight call stack manipulation occasionally, and
exceptions are a handy tool for that.</p>
</aside>
<p>We want this to unwind all the way to where the function call began, the
<code>call()</code> method in LoxFunction.</p>
<div class="codehilite"><pre class="insert-before">          arguments.get(i));
    }

</pre><div class="source-file"><em>lox/LoxFunction.java</em><br>
in <em>call</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">try</span> {
      <span class="i">interpreter</span>.<span class="i">executeBlock</span>(<span class="i">declaration</span>.<span class="i">body</span>, <span class="i">environment</span>);
    } <span class="k">catch</span> (<span class="t">Return</span> <span class="i">returnValue</span>) {
      <span class="k">return</span> <span class="i">returnValue</span>.<span class="i">value</span>;
    }
</pre><pre class="insert-after">    return null;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, in <em>call</em>(), replace 1 line</div>

<p>We wrap the call to <code>executeBlock()</code> in a try-catch block. When it catches a
return exception, it pulls out the value and makes that the return value from
<code>call()</code>. If it never catches one of these exceptions, it means the function
reached the end of its body without hitting a <code>return</code> statement. In that case,
it implicitly returns <code>nil</code>.</p>
<p>Let&rsquo;s try it out. We finally have enough power to support this classic
example<span class="em">&mdash;</span>a recursive function to calculate Fibonacci numbers:</p>
<p><span name="slow"></span></p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">fib</span>(<span class="i">n</span>) {
  <span class="k">if</span> (<span class="i">n</span> &lt;= <span class="n">1</span>) <span class="k">return</span> <span class="i">n</span>;
  <span class="k">return</span> <span class="i">fib</span>(<span class="i">n</span> - <span class="n">2</span>) + <span class="i">fib</span>(<span class="i">n</span> - <span class="n">1</span>);
}

<span class="k">for</span> (<span class="k">var</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="n">20</span>; <span class="i">i</span> = <span class="i">i</span> + <span class="n">1</span>) {
  <span class="k">print</span> <span class="i">fib</span>(<span class="i">i</span>);
}
</pre></div>
<p>This tiny program exercises almost every language feature we have spent the past
several chapters implementing<span class="em">&mdash;</span>expressions, arithmetic, branching, looping,
variables, functions, function calls, parameter binding, and returns.</p>
<aside name="slow">
<p>You might notice this is pretty slow. Obviously, recursion isn&rsquo;t the most
efficient way to calculate Fibonacci numbers, but as a microbenchmark, it does
a good job of stress testing how fast our interpreter implements function calls.</p>
<p>As you can see, the answer is &ldquo;not very fast&rdquo;. That&rsquo;s OK. Our C interpreter will
be faster.</p>
</aside>
<h2><a href="#local-functions-and-closures" id="local-functions-and-closures"><small>10&#8202;.&#8202;6</small>Local Functions and Closures</a></h2>
<p>Our functions are pretty full featured, but there is one hole to patch. In fact,
it&rsquo;s a big enough gap that we&rsquo;ll spend most of the <a href="resolving-and-binding.html">next chapter</a> sealing it
up, but we can get started here.</p>
<p>LoxFunction&rsquo;s implementation of <code>call()</code> creates a new environment where it
binds the function&rsquo;s parameters. When I showed you that code, I glossed over one
important point: What is the <em>parent</em> of that environment?</p>
<p>Right now, it is always <code>globals</code>, the top-level global environment. That way,
if an identifier isn&rsquo;t defined inside the function body itself, the interpreter
can look outside the function in the global scope to find it. In the Fibonacci
example, that&rsquo;s how the interpreter is able to look up the recursive call to
<code>fib</code> inside the function&rsquo;s own body<span class="em">&mdash;</span><code>fib</code> is a global variable.</p>
<p>But recall that in Lox, function declarations are allowed <em>anywhere</em> a name can
be bound. That includes the top level of a Lox script, but also the inside of
blocks or other functions. Lox supports <strong>local functions</strong> that are defined
inside another function, or nested inside a block.</p>
<p>Consider this classic example:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">makeCounter</span>() {
  <span class="k">var</span> <span class="i">i</span> = <span class="n">0</span>;
  <span class="k">fun</span> <span class="i">count</span>() {
    <span class="i">i</span> = <span class="i">i</span> + <span class="n">1</span>;
    <span class="k">print</span> <span class="i">i</span>;
  }

  <span class="k">return</span> <span class="i">count</span>;
}

<span class="k">var</span> <span class="i">counter</span> = <span class="i">makeCounter</span>();
<span class="i">counter</span>(); <span class="c">// &quot;1&quot;.</span>
<span class="i">counter</span>(); <span class="c">// &quot;2&quot;.</span>
</pre></div>
<p>Here, <code>count()</code> uses <code>i</code>, which is declared outside of itself in the containing
function <code>makeCounter()</code>. <code>makeCounter()</code> returns a reference to the <code>count()</code>
function and then its own body finishes executing completely.</p>
<p>Meanwhile, the top-level code invokes the returned <code>count()</code> function. That
executes the body of <code>count()</code>, which assigns to and reads <code>i</code>, even though the
function where <code>i</code> was defined has already exited.</p>
<p>If you&rsquo;ve never encountered a language with nested functions before, this might
seem crazy, but users do expect it to work. Alas, if you run it now, you get an
undefined variable error in the call to <code>counter()</code> when the body of <code>count()</code>
tries to look up <code>i</code>. That&rsquo;s because the environment chain in effect looks like
this:</p><img src="image/functions/global.png" alt="The environment chain from count()'s body to the global scope." />
<p>When we call <code>count()</code> (through the reference to it stored in <code>counter</code>), we
create a new empty environment for the function body. The parent of that is the
global environment. We lost the environment for <code>makeCounter()</code> where <code>i</code> is
bound.</p>
<p>Let&rsquo;s go back in time a bit. Here&rsquo;s what the environment chain looked like right
when we declared <code>count()</code> inside the body of <code>makeCounter()</code>:</p><img src="image/functions/body.png" alt="The environment chain inside the body of makeCounter()." />
<p>So at the point where the function is declared, we can see <code>i</code>. But when we
return from <code>makeCounter()</code> and exit its body, the interpreter discards that
environment. Since the interpreter doesn&rsquo;t keep the environment surrounding
<code>count()</code> around, it&rsquo;s up to the function object itself to hang on to it.</p>
<p>This data structure is called a <span name="closure"><strong>closure</strong></span> because
it &ldquo;closes over&rdquo; and holds on to the surrounding variables where the function is
declared. Closures have been around since the early Lisp days, and language
hackers have come up with all manner of ways to implement them. For jlox, we&rsquo;ll
do the simplest thing that works. In LoxFunction, we add a field to store an
environment.</p>
<aside name="closure">
<p>&ldquo;Closure&rdquo; is yet another term coined by Peter J. Landin. I assume before he came
along that computer scientists communicated with each other using only primitive
grunts and pawing hand gestures.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  private final Stmt.Function declaration;
</pre><div class="source-file"><em>lox/LoxFunction.java</em><br>
in class <em>LoxFunction</em></div>
<pre class="insert">  <span class="k">private</span> <span class="k">final</span> <span class="t">Environment</span> <span class="i">closure</span>;

</pre><pre class="insert-after">  LoxFunction(Stmt.Function declaration) {
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, in class <em>LoxFunction</em></div>

<p>We initialize that in the constructor.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxFunction.java</em><br>
constructor <em>LoxFunction</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">LoxFunction</span>(<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">declaration</span>, <span class="t">Environment</span> <span class="i">closure</span>) {
    <span class="k">this</span>.<span class="i">closure</span> = <span class="i">closure</span>;
</pre><pre class="insert-after">    this.declaration = declaration;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, constructor <em>LoxFunction</em>(), replace 1 line</div>

<p>When we create a LoxFunction, we capture the current environment.</p>
<div class="codehilite"><pre class="insert-before">  public Void visitFunctionStmt(Stmt.Function stmt) {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitFunctionStmt</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">LoxFunction</span> <span class="i">function</span> = <span class="k">new</span> <span class="t">LoxFunction</span>(<span class="i">stmt</span>, <span class="i">environment</span>);
</pre><pre class="insert-after">    environment.define(stmt.name.lexeme, function);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitFunctionStmt</em>(), replace 1 line</div>

<p>This is the environment that is active when the function is <em>declared</em> not when
it&rsquo;s <em>called</em>, which is what we want. It represents the lexical scope
surrounding the function declaration. Finally, when we call the function, we use
that environment as the call&rsquo;s parent instead of going straight to <code>globals</code>.</p>
<div class="codehilite"><pre class="insert-before">                     List&lt;Object&gt; arguments) {
</pre><div class="source-file"><em>lox/LoxFunction.java</em><br>
in <em>call</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">Environment</span> <span class="i">environment</span> = <span class="k">new</span> <span class="t">Environment</span>(<span class="i">closure</span>);
</pre><pre class="insert-after">    for (int i = 0; i &lt; declaration.params.size(); i++) {
</pre></div>
<div class="source-file-narrow"><em>lox/LoxFunction.java</em>, in <em>call</em>(), replace 1 line</div>

<p>This creates an environment chain that goes from the function&rsquo;s body out through
the environments where the function is declared, all the way out to the global
scope. The runtime environment chain matches the textual nesting of the source
code like we want. The end result when we call that function looks like this:</p><img src="image/functions/closure.png" alt="The environment chain with the closure." />
<p>Now, as you can see, the interpreter can still find <code>i</code> when it needs to because
it&rsquo;s in the middle of the environment chain. Try running that <code>makeCounter()</code>
example now. It works!</p>
<p>Functions let us abstract over, reuse, and compose code. Lox is much more
powerful than the rudimentary arithmetic calculator it used to be. Alas, in our
rush to cram closures in, we have let a tiny bit of dynamic scoping leak into
the interpreter. In the <a href="resolving-and-binding.html">next chapter</a>, we will explore deeper into lexical
scope and close that hole.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Our interpreter carefully checks that the number of arguments passed to a
function matches the number of parameters it expects. Since this check is
done at runtime on every call, it has a performance cost. Smalltalk
implementations don&rsquo;t have that problem. Why not?</p>
</li>
<li>
<p>Lox&rsquo;s function declaration syntax performs two independent operations. It
creates a function and also binds it to a name. This improves usability for
the common case where you do want to associate a name with the function.
But in functional-styled code, you often want to create a function to
immediately pass it to some other function or return it. In that case, it
doesn&rsquo;t need a name.</p>
<p>Languages that encourage a functional style usually support <strong>anonymous
functions</strong> or <strong>lambdas</strong><span class="em">&mdash;</span>an expression syntax that creates a function
without binding it to a name. Add anonymous function syntax to Lox so that
this works:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">thrice</span>(<span class="i">fn</span>) {
  <span class="k">for</span> (<span class="k">var</span> <span class="i">i</span> = <span class="n">1</span>; <span class="i">i</span> &lt;= <span class="n">3</span>; <span class="i">i</span> = <span class="i">i</span> + <span class="n">1</span>) {
    <span class="i">fn</span>(<span class="i">i</span>);
  }
}

<span class="i">thrice</span>(<span class="k">fun</span> (<span class="i">a</span>) {
  <span class="k">print</span> <span class="i">a</span>;
});
<span class="c">// &quot;1&quot;.</span>
<span class="c">// &quot;2&quot;.</span>
<span class="c">// &quot;3&quot;.</span>
</pre></div>
<p>How do you handle the tricky case of an anonymous function expression
occurring in an expression statement:</p>
<div class="codehilite"><pre><span class="k">fun</span> () {};
</pre></div>
</li>
<li>
<p>Is this program valid?</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">scope</span>(<span class="i">a</span>) {
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;local&quot;</span>;
}
</pre></div>
<p>In other words, are a function&rsquo;s parameters in the <em>same</em> scope as its local
variables, or in an outer scope? What does Lox do? What about other
languages you are familiar with? What do you think a language <em>should</em> do?</p>
</li>
</ol>
</div>

<footer>
<a href="resolving-and-binding.html" class="next">
  Next Chapter: &ldquo;Resolving and Binding&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/garbage-collection.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Garbage Collection &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Garbage Collection<small>26</small></a></h3>

<ul>
    <li><a href="#reachability"><small>26.1</small> Reachability</a></li>
    <li><a href="#mark-sweep-garbage-collection"><small>26.2</small> Mark-Sweep Garbage Collection</a></li>
    <li><a href="#marking-the-roots"><small>26.3</small> Marking the Roots</a></li>
    <li><a href="#tracing-object-references"><small>26.4</small> Tracing Object References</a></li>
    <li><a href="#sweeping-unused-objects"><small>26.5</small> Sweeping Unused Objects</a></li>
    <li><a href="#when-to-collect"><small>26.6</small> When to Collect</a></li>
    <li><a href="#garbage-collection-bugs"><small>26.7</small> Garbage Collection Bugs</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Generational Collectors</a></li>
</ul>


<div class="prev-next">
    <a href="closures.html" title="Closures" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="classes-and-instances.html" title="Classes and Instances" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="closures.html" title="Closures" class="prev">←</a>
<a href="classes-and-instances.html" title="Classes and Instances" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Garbage Collection<small>26</small></a></h3>

<ul>
    <li><a href="#reachability"><small>26.1</small> Reachability</a></li>
    <li><a href="#mark-sweep-garbage-collection"><small>26.2</small> Mark-Sweep Garbage Collection</a></li>
    <li><a href="#marking-the-roots"><small>26.3</small> Marking the Roots</a></li>
    <li><a href="#tracing-object-references"><small>26.4</small> Tracing Object References</a></li>
    <li><a href="#sweeping-unused-objects"><small>26.5</small> Sweeping Unused Objects</a></li>
    <li><a href="#when-to-collect"><small>26.6</small> When to Collect</a></li>
    <li><a href="#garbage-collection-bugs"><small>26.7</small> Garbage Collection Bugs</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Generational Collectors</a></li>
</ul>


<div class="prev-next">
    <a href="closures.html" title="Closures" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="classes-and-instances.html" title="Classes and Instances" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">26</div>
  <h1>Garbage Collection</h1>

<blockquote>
<p>I wanna, I wanna,<br />
I wanna, I wanna,<br />
I wanna be trash.<br /></p>
<p><cite>The Whip, &ldquo;Trash&rdquo;</cite></p>
</blockquote>
<p>We say Lox is a &ldquo;high-level&rdquo; language because it frees programmers from worrying
about details irrelevant to the problem they&rsquo;re solving. The user becomes an
executive, giving the machine abstract goals and letting the lowly computer
figure out how to get there.</p>
<p>Dynamic memory allocation is a perfect candidate for automation. It&rsquo;s necessary
for a working program, tedious to do by hand, and yet still error-prone. The
inevitable mistakes can be catastrophic, leading to crashes, memory corruption,
or security violations. It&rsquo;s the kind of risky-yet-boring work that machines
excel at over humans.</p>
<p>This is why Lox is a <strong>managed language</strong>, which means that the language
implementation manages memory allocation and freeing on the user&rsquo;s behalf. When
a user performs an operation that requires some dynamic memory, the VM
automatically allocates it. The programmer never worries about deallocating
anything. The machine ensures any memory the program is using sticks around as
long as needed.</p>
<p>Lox provides the illusion that the computer has an infinite amount of memory.
Users can allocate and allocate and allocate and never once think about where
all these bytes are coming from. Of course, computers do not yet <em>have</em> infinite
memory. So the way managed languages maintain this illusion is by going behind
the programmer&rsquo;s back and reclaiming memory that the program no longer needs.
The component that does this is called a <strong>garbage <span
name="recycle">collector</span></strong>.</p>
<aside name="recycle">
<p>Recycling would really be a better metaphor for this. The GC doesn&rsquo;t <em>throw
away</em> the memory, it reclaims it to be reused for new data. But managed
languages are older than Earth Day, so the inventors went with the analogy they
knew.</p><img src="image/garbage-collection/recycle.png" class="above" alt="A recycle bin full of bits." />
</aside>
<h2><a href="#reachability" id="reachability"><small>26&#8202;.&#8202;1</small>Reachability</a></h2>
<p>This raises a surprisingly difficult question: how does a VM tell what memory is
<em>not</em> needed? Memory is only needed if it is read in the future, but short of
having a time machine, how can an implementation tell what code the program
<em>will</em> execute and which data it <em>will</em> use? Spoiler alert: VMs cannot travel
into the future. Instead, the language makes a <span
name="conservative">conservative</span> approximation: it considers a piece of
memory to still be in use if it <em>could possibly</em> be read in the future.</p>
<aside name="conservative">
<p>I&rsquo;m using &ldquo;conservative&rdquo; in the general sense. There is such a thing as a
&ldquo;conservative garbage collector&rdquo; which means something more specific. All
garbage collectors are &ldquo;conservative&rdquo; in that they keep memory alive if it
<em>could</em> be accessed, instead of having a Magic 8-Ball that lets them more
precisely know what data <em>will</em> be accessed.</p>
<p>A <strong>conservative GC</strong> is a special kind of collector that considers any piece of
memory to be a pointer if the value in there looks like it could be an address.
This is in contrast to a <strong>precise GC</strong><span class="em">&mdash;</span>which is what we&rsquo;ll implement<span class="em">&mdash;</span>that
knows exactly which words in memory are pointers and which store other kinds of
values like numbers or strings.</p>
</aside>
<p>That sounds <em>too</em> conservative. Couldn&rsquo;t <em>any</em> bit of memory potentially be
read? Actually, no, at least not in a memory-safe language like Lox. Here&rsquo;s an
example:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;first value&quot;</span>;
<span class="i">a</span> = <span class="s">&quot;updated&quot;</span>;
<span class="c">// GC here.</span>
<span class="k">print</span> <span class="i">a</span>;
</pre></div>
<p>Say we run the GC after the assignment has completed on the second line. The
string &ldquo;first value&rdquo; is still sitting in memory, but there is no way for the
user&rsquo;s program to ever get to it. Once <code>a</code> got reassigned, the program lost any
reference to that string. We can safely free it. A value is <strong>reachable</strong> if
there is some way for a user program to reference it. Otherwise, like the string
&ldquo;first value&rdquo; here, it is <strong>unreachable</strong>.</p>
<p>Many values can be directly accessed by the VM. Take a look at:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">global</span> = <span class="s">&quot;string&quot;</span>;
{
  <span class="k">var</span> <span class="i">local</span> = <span class="s">&quot;another&quot;</span>;
  <span class="k">print</span> <span class="i">global</span> + <span class="i">local</span>;
}
</pre></div>
<p>Pause the program right after the two strings have been concatenated but before
the <code>print</code> statement has executed. The VM can reach <code>"string"</code> by looking
through the global variable table and finding the entry for <code>global</code>. It can
find <code>"another"</code> by walking the value stack and hitting the slot for the local
variable <code>local</code>. It can even find the concatenated string <code>"stringanother"</code>
since that temporary value is also sitting on the VM&rsquo;s stack at the point when
we paused our program.</p>
<p>All of these values are called <strong>roots</strong>. A root is any object that the VM can
reach directly without going through a reference in some other object. Most
roots are global variables or on the stack, but as we&rsquo;ll see, there are a couple
of other places the VM stores references to objects that it can find.</p>
<p>Other values can be found by going through a reference inside another value.
<span name="class">Fields</span> on instances of classes are the most obvious
case, but we don&rsquo;t have those yet. Even without those, our VM still has indirect
references. Consider:</p>
<aside name="class">
<p>We&rsquo;ll get there <a href="classes-and-instances.html">soon</a>, though!</p>
</aside>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">makeClosure</span>() {
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;data&quot;</span>;

  <span class="k">fun</span> <span class="i">f</span>() { <span class="k">print</span> <span class="i">a</span>; }
  <span class="k">return</span> <span class="i">f</span>;
}

{
  <span class="k">var</span> <span class="i">closure</span> = <span class="i">makeClosure</span>();
  <span class="c">// GC here.</span>
  <span class="i">closure</span>();
}
</pre></div>
<p>Say we pause the program on the marked line and run the garbage collector. When
the collector is done and the program resumes, it will call the closure, which
will in turn print <code>"data"</code>. So the collector needs to <em>not</em> free that string.
But here&rsquo;s what the stack looks like when we pause the program:</p><img src="image/garbage-collection/stack.png" alt="The stack, containing only the script and closure." />
<p>The <code>"data"</code> string is nowhere on it. It has already been hoisted off the stack
and moved into the closed upvalue that the closure uses. The closure itself is
on the stack. But to get to the string, we need to trace through the closure and
its upvalue array. Since it <em>is</em> possible for the user&rsquo;s program to do that, all
of these indirectly accessible objects are also considered reachable.</p><img src="image/garbage-collection/reachable.png" class="wide" alt="All of the referenced objects from the closure, and the path to the 'data' string from the stack." />
<p>This gives us an inductive definition of reachability:</p>
<ul>
<li>
<p>All roots are reachable.</p>
</li>
<li>
<p>Any object referred to from a reachable object is itself reachable.</p>
</li>
</ul>
<p>These are the values that are still &ldquo;live&rdquo; and need to stay in memory. Any value
that <em>doesn&rsquo;t</em> meet this definition is fair game for the collector to reap.
That recursive pair of rules hints at a recursive algorithm we can use to free
up unneeded memory:</p>
<ol>
<li>
<p>Starting with the roots, traverse through object references to find the
full set of reachable objects.</p>
</li>
<li>
<p>Free all objects <em>not</em> in that set.</p>
</li>
</ol>
<p>Many <span name="handbook">different</span> garbage collection algorithms are in
use today, but they all roughly follow that same structure. Some may interleave
the steps or mix them, but the two fundamental operations are there. They mostly
differ in <em>how</em> they perform each step.</p>
<aside name="handbook">
<p>If you want to explore other GC algorithms,
<a href="http://gchandbook.org/"><em>The Garbage Collection Handbook</em></a> (Jones, et al.) is the canonical
reference. For a large book on such a deep, narrow topic, it is quite enjoyable
to read. Or perhaps I have a strange idea of fun.</p>
</aside>
<h2><a href="#mark-sweep-garbage-collection" id="mark-sweep-garbage-collection"><small>26&#8202;.&#8202;2</small>Mark-Sweep Garbage Collection</a></h2>
<p>The first managed language was Lisp, the second &ldquo;high-level&rdquo; language to be
invented, right after Fortran. John McCarthy considered using manual memory
management or reference counting, but <span
name="procrastination">eventually</span> settled on (and coined) garbage
collection<span class="em">&mdash;</span>once the program was out of memory, it would go back and find
unused storage it could reclaim.</p>
<aside name="procrastination">
<p>In John McCarthy&rsquo;s &ldquo;History of Lisp&rdquo;, he notes: &ldquo;Once we decided on garbage
collection, its actual implementation could be postponed, because only toy
examples were being done.&rdquo; Our choice to procrastinate adding the GC to clox
follows in the footsteps of giants.</p>
</aside>
<p>He designed the very first, simplest garbage collection algorithm, called
<strong>mark-and-sweep</strong> or just <strong>mark-sweep</strong>. Its description fits in three short
paragraphs in the initial paper on Lisp. Despite its age and simplicity, the
same fundamental algorithm underlies many modern memory managers. Some corners
of CS seem to be timeless.</p>
<p>As the name implies, mark-sweep works in two phases:</p>
<ul>
<li>
<p><strong>Marking:</strong> We start with the roots and traverse or <span
name="trace"><em>trace</em></span> through all of the objects those roots refer to.
This is a classic graph traversal of all of the reachable objects. Each time
we visit an object, we <em>mark</em> it in some way. (Implementations differ in how
they record the mark.)</p>
</li>
<li>
<p><strong>Sweeping:</strong> Once the mark phase completes, every reachable object
in the heap has been marked. That means any unmarked object is unreachable and
ripe for reclamation. We go through all the unmarked objects and free each
one.</p>
</li>
</ul>
<p>It looks something like this:</p><img src="image/garbage-collection/mark-sweep.png" class="wide" alt="Starting from a graph of objects, first the reachable ones are marked, the remaining are swept, and then only the reachable remain." />
<aside name="trace">
<p>A <strong>tracing garbage collector</strong> is any algorithm that traces through the graph
of object references. This is in contrast with reference counting, which has a
different strategy for tracking the reachable objects.</p>
</aside>
<p>That&rsquo;s what we&rsquo;re gonna implement. Whenever we decide it&rsquo;s time to reclaim some
bytes, we&rsquo;ll trace everything and mark all the reachable objects, free what
didn&rsquo;t get marked, and then resume the user&rsquo;s program.</p>
<h3><a href="#collecting-garbage" id="collecting-garbage"><small>26&#8202;.&#8202;2&#8202;.&#8202;1</small>Collecting garbage</a></h3>
<p>This entire chapter is about implementing this one <span
name="one">function</span>:</p>
<aside name="one">
<p>Of course, we&rsquo;ll end up adding a bunch of helper functions too.</p>
</aside>
<div class="codehilite"><pre class="insert-before">void* reallocate(void* pointer, size_t oldSize, size_t newSize);
</pre><div class="source-file"><em>memory.h</em><br>
add after <em>reallocate</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">collectGarbage</span>();
</pre><pre class="insert-after">void freeObjects();
</pre></div>
<div class="source-file-narrow"><em>memory.h</em>, add after <em>reallocate</em>()</div>

<p>We&rsquo;ll work our way up to a full implementation starting with this empty shell:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>freeObject</em>()</div>
<pre><span class="t">void</span> <span class="i">collectGarbage</span>() {
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>freeObject</em>()</div>

<p>The first question you might ask is, When does this function get called? It
turns out that&rsquo;s a subtle question that we&rsquo;ll spend some time on later in the
chapter. For now we&rsquo;ll sidestep the issue and build ourselves a handy diagnostic
tool in the process.</p>
<div class="codehilite"><pre class="insert-before">#define DEBUG_TRACE_EXECUTION
</pre><div class="source-file"><em>common.h</em></div>
<pre class="insert">

<span class="a">#define DEBUG_STRESS_GC</span>
</pre><pre class="insert-after">

#define UINT8_COUNT (UINT8_MAX + 1)
</pre></div>
<div class="source-file-narrow"><em>common.h</em></div>

<p>We&rsquo;ll add an optional &ldquo;stress test&rdquo; mode for the garbage collector. When this
flag is defined, the GC runs as often as it possibly can. This is, obviously,
horrendous for performance. But it&rsquo;s great for flushing out memory management
bugs that occur only when a GC is triggered at just the right moment. If <em>every</em>
moment triggers a GC, you&rsquo;re likely to find those bugs.</p>
<div class="codehilite"><pre class="insert-before">void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>reallocate</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">newSize</span> &gt; <span class="i">oldSize</span>) {
<span class="a">#ifdef DEBUG_STRESS_GC</span>
    <span class="i">collectGarbage</span>();
<span class="a">#endif</span>
  }

</pre><pre class="insert-after">  if (newSize == 0) {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>reallocate</em>()</div>

<p>Whenever we call <code>reallocate()</code> to acquire more memory, we force a collection to
run. The if check is because <code>reallocate()</code> is also called to free or shrink an
allocation. We don&rsquo;t want to trigger a GC for that<span class="em">&mdash;</span>in particular because the
GC itself will call <code>reallocate()</code> to free memory.</p>
<p>Collecting right before <span name="demand">allocation</span> is the classic way
to wire a GC into a VM. You&rsquo;re already calling into the memory manager, so it&rsquo;s
an easy place to hook in the code. Also, allocation is the only time when you
really <em>need</em> some freed up memory so that you can reuse it. If you <em>don&rsquo;t</em> use
allocation to trigger a GC, you have to make sure every possible place in code
where you can loop and allocate memory also has a way to trigger the collector.
Otherwise, the VM can get into a starved state where it needs more memory but
never collects any.</p>
<aside name="demand">
<p>More sophisticated collectors might run on a separate thread or be interleaved
periodically during program execution<span class="em">&mdash;</span>often at function call boundaries or
when a backward jump occurs.</p>
</aside>
<h3><a href="#debug-logging" id="debug-logging"><small>26&#8202;.&#8202;2&#8202;.&#8202;2</small>Debug logging</a></h3>
<p>While we&rsquo;re on the subject of diagnostics, let&rsquo;s put some more in. A real
challenge I&rsquo;ve found with garbage collectors is that they are opaque. We&rsquo;ve been
running lots of Lox programs just fine without any GC <em>at all</em> so far. Once we
add one, how do we tell if it&rsquo;s doing anything useful? Can we tell only if we
write programs that plow through acres of memory? How do we debug that?</p>
<p>An easy way to shine a light into the GC&rsquo;s inner workings is with some logging.</p>
<div class="codehilite"><pre class="insert-before">#define DEBUG_STRESS_GC
</pre><div class="source-file"><em>common.h</em></div>
<pre class="insert"><span class="a">#define DEBUG_LOG_GC</span>
</pre><pre class="insert-after">

#define UINT8_COUNT (UINT8_MAX + 1)
</pre></div>
<div class="source-file-narrow"><em>common.h</em></div>

<p>When this is enabled, clox prints information to the console when it does
something with dynamic memory.</p>
<p>We need a couple of includes.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;vm.h&quot;
</pre><div class="source-file"><em>memory.c</em></div>
<pre class="insert">

<span class="a">#ifdef DEBUG_LOG_GC</span>
<span class="a">#include &lt;stdio.h&gt;</span>
<span class="a">#include &quot;debug.h&quot;</span>
<span class="a">#endif</span>
</pre><pre class="insert-after">

void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em></div>

<p>We don&rsquo;t have a collector yet, but we can start putting in some of the logging
now. We&rsquo;ll want to know when a collection run starts.</p>
<div class="codehilite"><pre class="insert-before">void collectGarbage() {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert"><span class="a">#ifdef DEBUG_LOG_GC</span>
  <span class="i">printf</span>(<span class="s">&quot;-- gc begin</span><span class="e">\n</span><span class="s">&quot;</span>);
<span class="a">#endif</span>
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>Eventually we will log some other operations during the collection, so we&rsquo;ll
also want to know when the show&rsquo;s over.</p>
<div class="codehilite"><pre class="insert-before">  printf(&quot;-- gc begin\n&quot;);
#endif
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">

<span class="a">#ifdef DEBUG_LOG_GC</span>
  <span class="i">printf</span>(<span class="s">&quot;-- gc end</span><span class="e">\n</span><span class="s">&quot;</span>);
<span class="a">#endif</span>
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>We don&rsquo;t have any code for the collector yet, but we do have functions for
allocating and freeing, so we can instrument those now.</p>
<div class="codehilite"><pre class="insert-before">  vm.objects = object;
</pre><div class="source-file"><em>object.c</em><br>
in <em>allocateObject</em>()</div>
<pre class="insert">

<span class="a">#ifdef DEBUG_LOG_GC</span>
  <span class="i">printf</span>(<span class="s">&quot;%p allocate %zu for %d</span><span class="e">\n</span><span class="s">&quot;</span>, (<span class="t">void</span>*)<span class="i">object</span>, <span class="i">size</span>, <span class="i">type</span>);
<span class="a">#endif</span>

</pre><pre class="insert-after">  return object;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>allocateObject</em>()</div>

<p>And at the end of an object&rsquo;s lifespan:</p>
<div class="codehilite"><pre class="insert-before">static void freeObject(Obj* object) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert"><span class="a">#ifdef DEBUG_LOG_GC</span>
  <span class="i">printf</span>(<span class="s">&quot;%p free type %d</span><span class="e">\n</span><span class="s">&quot;</span>, (<span class="t">void</span>*)<span class="i">object</span>, <span class="i">object</span>-&gt;<span class="i">type</span>);
<span class="a">#endif</span>

</pre><pre class="insert-after">  switch (object-&gt;type) {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>With these two flags, we should be able to see that we&rsquo;re making progress as we
work through the rest of the chapter.</p>
<h2><a href="#marking-the-roots" id="marking-the-roots"><small>26&#8202;.&#8202;3</small>Marking the Roots</a></h2>
<p>Objects are scattered across the heap like stars in the inky night sky. A
reference from one object to another forms a connection, and these
constellations are the graph that the mark phase traverses. Marking begins at
the roots.</p>
<div class="codehilite"><pre class="insert-before">#ifdef DEBUG_LOG_GC
  printf(&quot;-- gc begin\n&quot;);
#endif
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">

  <span class="i">markRoots</span>();
</pre><pre class="insert-after">

#ifdef DEBUG_LOG_GC
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>Most roots are local variables or temporaries sitting right in the VM&rsquo;s stack,
so we start by walking that.</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>freeObject</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">markRoots</span>() {
  <span class="k">for</span> (<span class="t">Value</span>* <span class="i">slot</span> = <span class="i">vm</span>.<span class="i">stack</span>; <span class="i">slot</span> &lt; <span class="i">vm</span>.<span class="i">stackTop</span>; <span class="i">slot</span>++) {
    <span class="i">markValue</span>(*<span class="i">slot</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>freeObject</em>()</div>

<p>To mark a Lox value, we use this new function:</p>
<div class="codehilite"><pre class="insert-before">void* reallocate(void* pointer, size_t oldSize, size_t newSize);
</pre><div class="source-file"><em>memory.h</em><br>
add after <em>reallocate</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">markValue</span>(<span class="t">Value</span> <span class="i">value</span>);
</pre><pre class="insert-after">void collectGarbage();
</pre></div>
<div class="source-file-narrow"><em>memory.h</em>, add after <em>reallocate</em>()</div>

<p>Its implementation is here:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>reallocate</em>()</div>
<pre><span class="t">void</span> <span class="i">markValue</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="k">if</span> (<span class="a">IS_OBJ</span>(<span class="i">value</span>)) <span class="i">markObject</span>(<span class="a">AS_OBJ</span>(<span class="i">value</span>));
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>reallocate</em>()</div>

<p>Some Lox values<span class="em">&mdash;</span>numbers, Booleans, and <code>nil</code><span class="em">&mdash;</span>are stored directly inline in
Value and require no heap allocation. The garbage collector doesn&rsquo;t need to
worry about them at all, so the first thing we do is ensure that the value is an
actual heap object. If so, the real work happens in this function:</p>
<div class="codehilite"><pre class="insert-before">void* reallocate(void* pointer, size_t oldSize, size_t newSize);
</pre><div class="source-file"><em>memory.h</em><br>
add after <em>reallocate</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">markObject</span>(<span class="t">Obj</span>* <span class="i">object</span>);
</pre><pre class="insert-after">void markValue(Value value);
</pre></div>
<div class="source-file-narrow"><em>memory.h</em>, add after <em>reallocate</em>()</div>

<p>Which is defined here:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>reallocate</em>()</div>
<pre><span class="t">void</span> <span class="i">markObject</span>(<span class="t">Obj</span>* <span class="i">object</span>) {
  <span class="k">if</span> (<span class="i">object</span> == <span class="a">NULL</span>) <span class="k">return</span>;
  <span class="i">object</span>-&gt;<span class="i">isMarked</span> = <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>reallocate</em>()</div>

<p>The <code>NULL</code> check is unnecessary when called from <code>markValue()</code>. A Lox Value that
is some kind of Obj type will always have a valid pointer. But later we will
call this function directly from other code, and in some of those places, the
object being pointed to is optional.</p>
<p>Assuming we do have a valid object, we mark it by setting a flag. That new field
lives in the Obj header struct all objects share.</p>
<div class="codehilite"><pre class="insert-before">  ObjType type;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>Obj</em></div>
<pre class="insert">  <span class="t">bool</span> <span class="i">isMarked</span>;
</pre><pre class="insert-after">  struct Obj* next;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>Obj</em></div>

<p>Every new object begins life unmarked because we haven&rsquo;t yet determined if it is
reachable or not.</p>
<div class="codehilite"><pre class="insert-before">  object-&gt;type = type;
</pre><div class="source-file"><em>object.c</em><br>
in <em>allocateObject</em>()</div>
<pre class="insert">  <span class="i">object</span>-&gt;<span class="i">isMarked</span> = <span class="k">false</span>;
</pre><pre class="insert-after">

  object-&gt;next = vm.objects;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>allocateObject</em>()</div>

<p>Before we go any farther, let&rsquo;s add some logging to <code>markObject()</code>.</p>
<div class="codehilite"><pre class="insert-before">void markObject(Obj* object) {
  if (object == NULL) return;
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markObject</em>()</div>
<pre class="insert"><span class="a">#ifdef DEBUG_LOG_GC</span>
  <span class="i">printf</span>(<span class="s">&quot;%p mark &quot;</span>, (<span class="t">void</span>*)<span class="i">object</span>);
  <span class="i">printValue</span>(<span class="a">OBJ_VAL</span>(<span class="i">object</span>));
  <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
<span class="a">#endif</span>

</pre><pre class="insert-after">  object-&gt;isMarked = true;
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markObject</em>()</div>

<p>This way we can see what the mark phase is doing. Marking the stack takes care
of local variables and temporaries. The other main source of roots are the
global variables.</p>
<div class="codehilite"><pre class="insert-before">    markValue(*slot);
  }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markRoots</em>()</div>
<pre class="insert">

  <span class="i">markTable</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markRoots</em>()</div>

<p>Those live in a hash table owned by the VM, so we&rsquo;ll declare another helper
function for marking all of the objects in a table.</p>
<div class="codehilite"><pre class="insert-before">ObjString* tableFindString(Table* table, const char* chars,
                           int length, uint32_t hash);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>tableFindString</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">markTable</span>(<span class="t">Table</span>* <span class="i">table</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>tableFindString</em>()</div>

<p>We implement that in the &ldquo;table&rdquo; module here:</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>tableFindString</em>()</div>
<pre><span class="t">void</span> <span class="i">markTable</span>(<span class="t">Table</span>* <span class="i">table</span>) {
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">table</span>-&gt;<span class="i">capacity</span>; <span class="i">i</span>++) {
    <span class="t">Entry</span>* <span class="i">entry</span> = &amp;<span class="i">table</span>-&gt;<span class="i">entries</span>[<span class="i">i</span>];
    <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">entry</span>-&gt;<span class="i">key</span>);
    <span class="i">markValue</span>(<span class="i">entry</span>-&gt;<span class="i">value</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>tableFindString</em>()</div>

<p>Pretty straightforward. We walk the entry array. For each one, we mark its
value. We also mark the key strings for each entry since the GC manages those
strings too.</p>
<h3><a href="#less-obvious-roots" id="less-obvious-roots"><small>26&#8202;.&#8202;3&#8202;.&#8202;1</small>Less obvious roots</a></h3>
<p>Those cover the roots that we typically think of<span class="em">&mdash;</span>the values that are
obviously reachable because they&rsquo;re stored in variables the user&rsquo;s program can
see. But the VM has a few of its own hidey-holes where it squirrels away
references to values that it directly accesses.</p>
<p>Most function call state lives in the value stack, but the VM maintains a
separate stack of CallFrames. Each CallFrame contains a pointer to the closure
being called. The VM uses those pointers to access constants and upvalues, so
those closures need to be kept around too.</p>
<div class="codehilite"><pre class="insert-before">  }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markRoots</em>()</div>
<pre class="insert">

  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">vm</span>.<span class="i">frameCount</span>; <span class="i">i</span>++) {
    <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">i</span>].<span class="i">closure</span>);
  }
</pre><pre class="insert-after">

  markTable(&amp;vm.globals);
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markRoots</em>()</div>

<p>Speaking of upvalues, the open upvalue list is another set of values that the
VM can directly reach.</p>
<div class="codehilite"><pre class="insert-before">  for (int i = 0; i &lt; vm.frameCount; i++) {
    markObject((Obj*)vm.frames[i].closure);
  }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markRoots</em>()</div>
<pre class="insert">

  <span class="k">for</span> (<span class="t">ObjUpvalue</span>* <span class="i">upvalue</span> = <span class="i">vm</span>.<span class="i">openUpvalues</span>;
       <span class="i">upvalue</span> != <span class="a">NULL</span>;
       <span class="i">upvalue</span> = <span class="i">upvalue</span>-&gt;<span class="i">next</span>) {
    <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">upvalue</span>);
  }
</pre><pre class="insert-after">

  markTable(&amp;vm.globals);
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markRoots</em>()</div>

<p>Remember also that a collection can begin during <em>any</em> allocation. Those
allocations don&rsquo;t just happen while the user&rsquo;s program is running. The compiler
itself periodically grabs memory from the heap for literals and the constant
table. If the GC runs while we&rsquo;re in the middle of compiling, then any values
the compiler directly accesses need to be treated as roots too.</p>
<p>To keep the compiler module cleanly separated from the rest of the VM, we&rsquo;ll do
that in a separate function.</p>
<div class="codehilite"><pre class="insert-before">  markTable(&amp;vm.globals);
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markRoots</em>()</div>
<pre class="insert">  <span class="i">markCompilerRoots</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markRoots</em>()</div>

<p>It&rsquo;s declared here:</p>
<div class="codehilite"><pre class="insert-before">ObjFunction* compile(const char* source);
</pre><div class="source-file"><em>compiler.h</em><br>
add after <em>compile</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">markCompilerRoots</span>();
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>compiler.h</em>, add after <em>compile</em>()</div>

<p>Which means the &ldquo;memory&rdquo; module needs an include.</p>
<div class="codehilite"><pre class="insert-before">#include &lt;stdlib.h&gt;

</pre><div class="source-file"><em>memory.c</em></div>
<pre class="insert"><span class="a">#include &quot;compiler.h&quot;</span>
</pre><pre class="insert-after">#include &quot;memory.h&quot;
</pre></div>
<div class="source-file-narrow"><em>memory.c</em></div>

<p>And the definition is over in the &ldquo;compiler&rdquo; module.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>compile</em>()</div>
<pre><span class="t">void</span> <span class="i">markCompilerRoots</span>() {
  <span class="t">Compiler</span>* <span class="i">compiler</span> = <span class="i">current</span>;
  <span class="k">while</span> (<span class="i">compiler</span> != <span class="a">NULL</span>) {
    <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">compiler</span>-&gt;<span class="i">function</span>);
    <span class="i">compiler</span> = <span class="i">compiler</span>-&gt;<span class="i">enclosing</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>compile</em>()</div>

<p>Fortunately, the compiler doesn&rsquo;t have too many values that it hangs on to. The
only object it uses is the ObjFunction it is compiling into. Since function
declarations can nest, the compiler has a linked list of those and we walk the
whole list.</p>
<p>Since the &ldquo;compiler&rdquo; module is calling <code>markObject()</code>, it also needs an include.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;compiler.h&quot;
</pre><div class="source-file"><em>compiler.c</em></div>
<pre class="insert"><span class="a">#include &quot;memory.h&quot;</span>
</pre><pre class="insert-after">#include &quot;scanner.h&quot;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em></div>

<p>Those are all the roots. After running this, every object that the VM<span class="em">&mdash;</span>runtime
and compiler<span class="em">&mdash;</span>can get to <em>without</em> going through some other object has its
mark bit set.</p>
<h2><a href="#tracing-object-references" id="tracing-object-references"><small>26&#8202;.&#8202;4</small>Tracing Object References</a></h2>
<p>The next step in the marking process is tracing through the graph of references
between objects to find the indirectly reachable values. We don&rsquo;t have instances
with fields yet, so there aren&rsquo;t many objects that contain references, but we do
have <span name="some">some</span>. In particular, ObjClosure has the list of
ObjUpvalues it closes over as well as a reference to the raw ObjFunction that it
wraps. ObjFunction, in turn, has a constant table containing references to all
of the literals created in the function&rsquo;s body. This is enough to build a fairly
complex web of objects for our collector to crawl through.</p>
<aside name="some">
<p>I slotted this chapter into the book right here specifically <em>because</em> we now
have closures which give us interesting objects for the garbage collector to
process.</p>
</aside>
<p>Now it&rsquo;s time to implement that traversal. We can go breadth-first, depth-first,
or in some other order. Since we just need to find the <em>set</em> of all reachable
objects, the order we visit them <span name="dfs">mostly</span> doesn&rsquo;t matter.</p>
<aside name="dfs">
<p>I say &ldquo;mostly&rdquo; because some garbage collectors move objects in the order that
they are visited, so traversal order determines which objects end up adjacent in
memory. That impacts performance because the CPU uses locality to determine
which memory to preload into the caches.</p>
<p>Even when traversal order does matter, it&rsquo;s not clear which order is <em>best</em>.
It&rsquo;s very difficult to determine which order objects will be used in in the
future, so it&rsquo;s hard for the GC to know which order will help performance.</p>
</aside>
<h3><a href="#the-tricolor-abstraction" id="the-tricolor-abstraction"><small>26&#8202;.&#8202;4&#8202;.&#8202;1</small>The tricolor abstraction</a></h3>
<p>As the collector wanders through the graph of objects, we need to make sure it
doesn&rsquo;t lose track of where it is or get stuck going in circles. This is
particularly a concern for advanced implementations like incremental GCs that
interleave marking with running pieces of the user&rsquo;s program. The collector
needs to be able to pause and then pick up where it left off later.</p>
<p>To help us soft-brained humans reason about this complex process, VM hackers
came up with a metaphor called the <span name="color"></span><strong>tricolor
abstraction</strong>. Each object has a conceptual &ldquo;color&rdquo; that tracks what state the
object is in, and what work is left to do.</p>
<aside name="color">
<p>Advanced garbage collection algorithms often add other colors to the
abstraction. I&rsquo;ve seen multiple shades of gray, and even purple in some designs.
My puce-chartreuse-fuchsia-malachite collector paper was, alas, not accepted for
publication.</p>
</aside>
<ul>
<li>
<p><strong><img src="image/garbage-collection/white.png" alt="A white circle."
class="dot" /> White:</strong> At the beginning of a garbage collection, every
object is white. This color means we have not reached or processed the
object at all.</p>
</li>
<li>
<p><strong><img src="image/garbage-collection/gray.png" alt="A gray circle."
class="dot" /> Gray:</strong> During marking, when we first reach an object, we
darken it gray. This color means we know the object itself is reachable and
should not be collected. But we have not yet traced <em>through</em> it to see what
<em>other</em> objects it references. In graph algorithm terms, this is the
<em>worklist</em><span class="em">&mdash;</span>the set of objects we know about but haven&rsquo;t processed yet.</p>
</li>
<li>
<p><strong><img src="image/garbage-collection/black.png" alt="A black circle."
class="dot" /> Black:</strong> When
we take a gray object and mark all of the objects it references, we then
turn the gray object black. This color means the mark phase is done
processing that object.</p>
</li>
</ul>
<p>In terms of that abstraction, the marking process now looks like this:</p>
<ol>
<li>
<p>Start off with all objects white.</p>
</li>
<li>
<p>Find all the roots and mark them gray.</p>
</li>
<li>
<p>Repeat as long as there are still gray objects:</p>
<ol>
<li>
<p>Pick a gray object. Turn any white objects that the object mentions to
gray.</p>
</li>
<li>
<p>Mark the original gray object black.</p>
</li>
</ol>
</li>
</ol>
<p>I find it helps to visualize this. You have a web of objects with references
between them. Initially, they are all little white dots. Off to the side are
some incoming edges from the VM that point to the roots. Those roots turn gray.
Then each gray object&rsquo;s siblings turn gray while the object itself turns black.
The full effect is a gray wavefront that passes through the graph, leaving a
field of reachable black objects behind it. Unreachable objects are not touched
by the wavefront and stay white.</p><img src="image/garbage-collection/tricolor-trace.png" class="wide" alt="A gray wavefront working through a graph of nodes." />
<p>At the <span name="invariant">end</span>, you&rsquo;re left with a sea of reached,
black objects sprinkled with islands of white objects that can be swept up and
freed. Once the unreachable objects are freed, the remaining objects<span class="em">&mdash;</span>all
black<span class="em">&mdash;</span>are reset to white for the next garbage collection cycle.</p>
<aside name="invariant">
<p>Note that at every step of this process no black node ever points to a white
node. This property is called the <strong>tricolor invariant</strong>. The traversal process
maintains this invariant to ensure that no reachable object is ever collected.</p>
</aside>
<h3><a href="#a-worklist-for-gray-objects" id="a-worklist-for-gray-objects"><small>26&#8202;.&#8202;4&#8202;.&#8202;2</small>A worklist for gray objects</a></h3>
<p>In our implementation we have already marked the roots. They&rsquo;re all gray. The
next step is to start picking them and traversing their references. But we don&rsquo;t
have any easy way to find them. We set a field on the object, but that&rsquo;s it. We
don&rsquo;t want to have to traverse the entire object list looking for objects with
that field set.</p>
<p>Instead, we&rsquo;ll create a separate worklist to keep track of all of the gray
objects. When an object turns gray, in addition to setting the mark field we&rsquo;ll
also add it to the worklist.</p>
<div class="codehilite"><pre class="insert-before">  object-&gt;isMarked = true;
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markObject</em>()</div>
<pre class="insert">

  <span class="k">if</span> (<span class="i">vm</span>.<span class="i">grayCapacity</span> &lt; <span class="i">vm</span>.<span class="i">grayCount</span> + <span class="n">1</span>) {
    <span class="i">vm</span>.<span class="i">grayCapacity</span> = <span class="a">GROW_CAPACITY</span>(<span class="i">vm</span>.<span class="i">grayCapacity</span>);
    <span class="i">vm</span>.<span class="i">grayStack</span> = (<span class="t">Obj</span>**)<span class="i">realloc</span>(<span class="i">vm</span>.<span class="i">grayStack</span>,
                                  <span class="k">sizeof</span>(<span class="t">Obj</span>*) * <span class="i">vm</span>.<span class="i">grayCapacity</span>);
  }

  <span class="i">vm</span>.<span class="i">grayStack</span>[<span class="i">vm</span>.<span class="i">grayCount</span>++] = <span class="i">object</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markObject</em>()</div>

<p>We could use any kind of data structure that lets us put items in and take them
out easily. I picked a stack because that&rsquo;s the simplest to implement with a
dynamic array in C. It works mostly like other dynamic arrays we&rsquo;ve built in
Lox, <em>except</em>, note that it calls the <em>system</em> <code>realloc()</code> function and not our
own <code>reallocate()</code> wrapper. The memory for the gray stack itself is <em>not</em>
managed by the garbage collector. We don&rsquo;t want growing the gray stack during a
GC to cause the GC to recursively start a new GC. That could tear a hole in the
space-time continuum.</p>
<p>We&rsquo;ll manage its memory ourselves, explicitly. The VM owns the gray stack.</p>
<div class="codehilite"><pre class="insert-before">  Obj* objects;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">int</span> <span class="i">grayCount</span>;
  <span class="t">int</span> <span class="i">grayCapacity</span>;
  <span class="t">Obj</span>** <span class="i">grayStack</span>;
</pre><pre class="insert-after">} VM;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>It starts out empty.</p>
<div class="codehilite"><pre class="insert-before">  vm.objects = NULL;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">

  <span class="i">vm</span>.<span class="i">grayCount</span> = <span class="n">0</span>;
  <span class="i">vm</span>.<span class="i">grayCapacity</span> = <span class="n">0</span>;
  <span class="i">vm</span>.<span class="i">grayStack</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">

  initTable(&amp;vm.globals);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>And we need to free it when the VM shuts down.</p>
<div class="codehilite"><pre class="insert-before">    object = next;
  }
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObjects</em>()</div>
<pre class="insert">

  <span class="i">free</span>(<span class="i">vm</span>.<span class="i">grayStack</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObjects</em>()</div>

<p><span name="robust">We</span> take full responsibility for this array. That
includes allocation failure. If we can&rsquo;t create or grow the gray stack, then we
can&rsquo;t finish the garbage collection. This is bad news for the VM, but
fortunately rare since the gray stack tends to be pretty small. It would be nice
to do something more graceful, but to keep the code in this book simple, we just
abort.</p>
<aside name="robust">
<p>To be more robust, we can allocate a &ldquo;rainy day fund&rdquo; block of memory when we
start the VM. If the gray stack allocation fails, we free the rainy day block
and try again. That may give us enough wiggle room on the heap to create the
gray stack, finish the GC, and free up more memory.</p>
</aside>
<div class="codehilite"><pre class="insert-before">    vm.grayStack = (Obj**)realloc(vm.grayStack,
                                  sizeof(Obj*) * vm.grayCapacity);
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markObject</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">vm</span>.<span class="i">grayStack</span> == <span class="a">NULL</span>) <span class="i">exit</span>(<span class="n">1</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markObject</em>()</div>

<h3><a href="#processing-gray-objects" id="processing-gray-objects"><small>26&#8202;.&#8202;4&#8202;.&#8202;3</small>Processing gray objects</a></h3>
<p>OK, now when we&rsquo;re done marking the roots, we have both set a bunch of fields
and filled our work list with objects to chew through. It&rsquo;s time for the next
phase.</p>
<div class="codehilite"><pre class="insert-before">  markRoots();
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">  <span class="i">traceReferences</span>();
</pre><pre class="insert-after">

#ifdef DEBUG_LOG_GC
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>Here&rsquo;s the implementation:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>markRoots</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">traceReferences</span>() {
  <span class="k">while</span> (<span class="i">vm</span>.<span class="i">grayCount</span> &gt; <span class="n">0</span>) {
    <span class="t">Obj</span>* <span class="i">object</span> = <span class="i">vm</span>.<span class="i">grayStack</span>[--<span class="i">vm</span>.<span class="i">grayCount</span>];
    <span class="i">blackenObject</span>(<span class="i">object</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>markRoots</em>()</div>

<p>It&rsquo;s as close to that textual algorithm as you can get. Until the stack empties,
we keep pulling out gray objects, traversing their references, and then marking
them black. Traversing an object&rsquo;s references may turn up new white objects that
get marked gray and added to the stack. So this function swings back and forth
between turning white objects gray and gray objects black, gradually advancing
the entire wavefront forward.</p>
<p>Here&rsquo;s where we traverse a single object&rsquo;s references:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>markValue</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">blackenObject</span>(<span class="t">Obj</span>* <span class="i">object</span>) {
  <span class="k">switch</span> (<span class="i">object</span>-&gt;<span class="i">type</span>) {
    <span class="k">case</span> <span class="a">OBJ_NATIVE</span>:
    <span class="k">case</span> <span class="a">OBJ_STRING</span>:
      <span class="k">break</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>markValue</em>()</div>

<p>Each object <span name="leaf">kind</span> has different fields that might
reference other objects, so we need a specific blob of code for each type. We
start with the easy ones<span class="em">&mdash;</span>strings and native function objects contain no
outgoing references so there is nothing to traverse.</p>
<aside name="leaf">
<p>An easy optimization we could do in <code>markObject()</code> is to skip adding strings and
native functions to the gray stack at all since we know they don&rsquo;t need to be
processed. Instead, they could darken from white straight to black.</p>
</aside>
<p>Note that we don&rsquo;t set any state in the traversed object itself. There is no
direct encoding of &ldquo;black&rdquo; in the object&rsquo;s state. A black object is any object
whose <code>isMarked</code> field is <span name="field">set</span> and that is no longer in
the gray stack.</p>
<aside name="field">
<p>You may rightly wonder why we have the <code>isMarked</code> field at all. All in good
time, friend.</p>
</aside>
<p>Now let&rsquo;s start adding in the other object types. The simplest is upvalues.</p>
<div class="codehilite"><pre class="insert-before">static void blackenObject(Obj* object) {
  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_UPVALUE</span>:
      <span class="i">markValue</span>(((<span class="t">ObjUpvalue</span>*)<span class="i">object</span>)-&gt;<span class="i">closed</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_NATIVE:
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>When an upvalue is closed, it contains a reference to the closed-over value.
Since the value is no longer on the stack, we need to make sure we trace the
reference to it from the upvalue.</p>
<p>Next are functions.</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_FUNCTION</span>: {
      <span class="t">ObjFunction</span>* <span class="i">function</span> = (<span class="t">ObjFunction</span>*)<span class="i">object</span>;
      <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">function</span>-&gt;<span class="i">name</span>);
      <span class="i">markArray</span>(&amp;<span class="i">function</span>-&gt;<span class="i">chunk</span>.<span class="i">constants</span>);
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_UPVALUE:
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>Each function has a reference to an ObjString containing the function&rsquo;s name.
More importantly, the function has a constant table packed full of references to
other objects. We trace all of those using this helper:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>markValue</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">markArray</span>(<span class="t">ValueArray</span>* <span class="i">array</span>) {
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">array</span>-&gt;<span class="i">count</span>; <span class="i">i</span>++) {
    <span class="i">markValue</span>(<span class="i">array</span>-&gt;<span class="i">values</span>[<span class="i">i</span>]);
  }
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>markValue</em>()</div>

<p>The last object type we have now<span class="em">&mdash;</span>we&rsquo;ll add more in later chapters<span class="em">&mdash;</span>is
closures.</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_CLOSURE</span>: {
      <span class="t">ObjClosure</span>* <span class="i">closure</span> = (<span class="t">ObjClosure</span>*)<span class="i">object</span>;
      <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">closure</span>-&gt;<span class="i">function</span>);
      <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">closure</span>-&gt;<span class="i">upvalueCount</span>; <span class="i">i</span>++) {
        <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">closure</span>-&gt;<span class="i">upvalues</span>[<span class="i">i</span>]);
      }
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_FUNCTION: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>Each closure has a reference to the bare function it wraps, as well as an array
of pointers to the upvalues it captures. We trace all of those.</p>
<p>That&rsquo;s the basic mechanism for processing a gray object, but there are two loose
ends to tie up. First, some logging.</p>
<div class="codehilite"><pre class="insert-before">static void blackenObject(Obj* object) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert"><span class="a">#ifdef DEBUG_LOG_GC</span>
  <span class="i">printf</span>(<span class="s">&quot;%p blacken &quot;</span>, (<span class="t">void</span>*)<span class="i">object</span>);
  <span class="i">printValue</span>(<span class="a">OBJ_VAL</span>(<span class="i">object</span>));
  <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
<span class="a">#endif</span>

</pre><pre class="insert-after">  switch (object-&gt;type) {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>This way, we can watch the tracing percolate through the object graph. Speaking
of which, note that I said <em>graph</em>. References between objects are directed, but
that doesn&rsquo;t mean they&rsquo;re <em>acyclic!</em> It&rsquo;s entirely possible to have cycles of
objects. When that happens, we need to ensure our collector doesn&rsquo;t get stuck in
an infinite loop as it continually re-adds the same series of objects to the
gray stack.</p>
<p>The fix is easy.</p>
<div class="codehilite"><pre class="insert-before">  if (object == NULL) return;
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markObject</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">object</span>-&gt;<span class="i">isMarked</span>) <span class="k">return</span>;

</pre><pre class="insert-after">#ifdef DEBUG_LOG_GC
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markObject</em>()</div>

<p>If the object is already marked, we don&rsquo;t mark it again and thus don&rsquo;t add it to
the gray stack. This ensures that an already-gray object is not redundantly
added and that a black object is not inadvertently turned back to gray. In other
words, it keeps the wavefront moving forward through only the white objects.</p>
<h2><a href="#sweeping-unused-objects" id="sweeping-unused-objects"><small>26&#8202;.&#8202;5</small>Sweeping Unused Objects</a></h2>
<p>When the loop in <code>traceReferences()</code> exits, we have processed all the objects we
could get our hands on. The gray stack is empty, and every object in the heap is
either black or white. The black objects are reachable, and we want to hang on to
them. Anything still white never got touched by the trace and is thus garbage.
All that&rsquo;s left is to reclaim them.</p>
<div class="codehilite"><pre class="insert-before">  traceReferences();
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">  <span class="i">sweep</span>();
</pre><pre class="insert-after">

#ifdef DEBUG_LOG_GC
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>All of the logic lives in one function.</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>traceReferences</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">sweep</span>() {
  <span class="t">Obj</span>* <span class="i">previous</span> = <span class="a">NULL</span>;
  <span class="t">Obj</span>* <span class="i">object</span> = <span class="i">vm</span>.<span class="i">objects</span>;
  <span class="k">while</span> (<span class="i">object</span> != <span class="a">NULL</span>) {
    <span class="k">if</span> (<span class="i">object</span>-&gt;<span class="i">isMarked</span>) {
      <span class="i">previous</span> = <span class="i">object</span>;
      <span class="i">object</span> = <span class="i">object</span>-&gt;<span class="i">next</span>;
    } <span class="k">else</span> {
      <span class="t">Obj</span>* <span class="i">unreached</span> = <span class="i">object</span>;
      <span class="i">object</span> = <span class="i">object</span>-&gt;<span class="i">next</span>;
      <span class="k">if</span> (<span class="i">previous</span> != <span class="a">NULL</span>) {
        <span class="i">previous</span>-&gt;<span class="i">next</span> = <span class="i">object</span>;
      } <span class="k">else</span> {
        <span class="i">vm</span>.<span class="i">objects</span> = <span class="i">object</span>;
      }

      <span class="i">freeObject</span>(<span class="i">unreached</span>);
    }
  }
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>traceReferences</em>()</div>

<p>I know that&rsquo;s kind of a lot of code and pointer shenanigans, but there isn&rsquo;t
much to it once you work through it. The outer <code>while</code> loop walks the linked
list of every object in the heap, checking their mark bits. If an object is
marked (black), we leave it alone and continue past it. If it is unmarked
(white), we unlink it from the list and free it using the <code>freeObject()</code>
function we already wrote.</p><img src="image/garbage-collection/unlink.png" alt="A recycle bin full of bits." />
<p>Most of the other code in here deals with the fact that removing a node from a
singly linked list is cumbersome. We have to continuously remember the previous
node so we can unlink its next pointer, and we have to handle the edge case
where we are freeing the first node. But, otherwise, it&rsquo;s pretty simple<span class="em">&mdash;</span>delete every node in a linked list that doesn&rsquo;t have a bit set in it.</p>
<p>There&rsquo;s one little addition:</p>
<div class="codehilite"><pre class="insert-before">    if (object-&gt;isMarked) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>sweep</em>()</div>
<pre class="insert">      <span class="i">object</span>-&gt;<span class="i">isMarked</span> = <span class="k">false</span>;
</pre><pre class="insert-after">      previous = object;
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>sweep</em>()</div>

<p>After <code>sweep()</code> completes, the only remaining objects are the live black ones
with their mark bits set. That&rsquo;s correct, but when the <em>next</em> collection cycle
starts, we need every object to be white. So whenever we reach a black object,
we go ahead and clear the bit now in anticipation of the next run.</p>
<h3><a href="#weak-references-and-the-string-pool" id="weak-references-and-the-string-pool"><small>26&#8202;.&#8202;5&#8202;.&#8202;1</small>Weak references and the string pool</a></h3>
<p>We are almost done collecting. There is one remaining corner of the VM that has
some unusual requirements around memory. Recall that when we added strings to
clox we made the VM intern them all. That means the VM has a hash table
containing a pointer to every single string in the heap. The VM uses this to
de-duplicate strings.</p>
<p>During the mark phase, we deliberately did <em>not</em> treat the VM&rsquo;s string table as
a source of roots. If we had, no <span name="intern">string</span> would <em>ever</em>
be collected. The string table would grow and grow and never yield a single byte
of memory back to the operating system. That would be bad.</p>
<aside name="intern">
<p>This can be a real problem. Java does not intern <em>all</em> strings, but it does
intern string <em>literals</em>. It also provides an API to add strings to the string
table. For many years, the capacity of that table was fixed, and strings added
to it could never be removed. If users weren&rsquo;t careful about their use of
<code>String.intern()</code>, they could run out of memory and crash.</p>
<p>Ruby had a similar problem for years where symbols<span class="em">&mdash;</span>interned string-like
values<span class="em">&mdash;</span>were not garbage collected. Both eventually enabled the GC to collect
these strings.</p>
</aside>
<p>At the same time, if we <em>do</em> let the GC free strings, then the VM&rsquo;s string table
will be left with dangling pointers to freed memory. That would be even worse.</p>
<p>The string table is special and we need special support for it. In particular,
it needs a special kind of reference. The table should be able to refer to a
string, but that link should not be considered a root when determining
reachability. That implies that the referenced object can be freed. When that
happens, the dangling reference must be fixed too, sort of like a magic,
self-clearing pointer. This particular set of semantics comes up frequently
enough that it has a name: a <a href="https://en.wikipedia.org/wiki/Weak_reference"><strong>weak reference</strong></a>.</p>
<p>We have already implicitly implemented half of the string table&rsquo;s unique
behavior by virtue of the fact that we <em>don&rsquo;t</em> traverse it during marking. That
means it doesn&rsquo;t force strings to be reachable. The remaining piece is clearing
out any dangling pointers for strings that are freed.</p>
<p>To remove references to unreachable strings, we need to know which strings <em>are</em>
unreachable. We don&rsquo;t know that until after the mark phase has completed. But we
can&rsquo;t wait until after the sweep phase is done because by then the objects<span class="em">&mdash;</span>and their mark bits<span class="em">&mdash;</span>are no longer around to check. So the right time is
exactly between the marking and sweeping phases.</p>
<div class="codehilite"><pre class="insert-before">  traceReferences();
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">  <span class="i">tableRemoveWhite</span>(&amp;<span class="i">vm</span>.<span class="i">strings</span>);
</pre><pre class="insert-after">  sweep();
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>The logic for removing the about-to-be-deleted strings exists in a new function
in the &ldquo;table&rdquo; module.</p>
<div class="codehilite"><pre class="insert-before">ObjString* tableFindString(Table* table, const char* chars,
                           int length, uint32_t hash);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>tableFindString</em>()</div>
<pre class="insert">

<span class="t">void</span> <span class="i">tableRemoveWhite</span>(<span class="t">Table</span>* <span class="i">table</span>);
</pre><pre class="insert-after">void markTable(Table* table);

</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>tableFindString</em>()</div>

<p>The implementation is here:</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>tableFindString</em>()</div>
<pre><span class="t">void</span> <span class="i">tableRemoveWhite</span>(<span class="t">Table</span>* <span class="i">table</span>) {
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">table</span>-&gt;<span class="i">capacity</span>; <span class="i">i</span>++) {
    <span class="t">Entry</span>* <span class="i">entry</span> = &amp;<span class="i">table</span>-&gt;<span class="i">entries</span>[<span class="i">i</span>];
    <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> != <span class="a">NULL</span> &amp;&amp; !<span class="i">entry</span>-&gt;<span class="i">key</span>-&gt;<span class="i">obj</span>.<span class="i">isMarked</span>) {
      <span class="i">tableDelete</span>(<span class="i">table</span>, <span class="i">entry</span>-&gt;<span class="i">key</span>);
    }
  }
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>tableFindString</em>()</div>

<p>We walk every entry in the table. The string intern table uses only the key of
each entry<span class="em">&mdash;</span>it&rsquo;s basically a hash <em>set</em> not a hash <em>map</em>. If the key string
object&rsquo;s mark bit is not set, then it is a white object that is moments from
being swept away. We delete it from the hash table first and thus ensure we
won&rsquo;t see any dangling pointers.</p>
<h2><a href="#when-to-collect" id="when-to-collect"><small>26&#8202;.&#8202;6</small>When to Collect</a></h2>
<p>We have a fully functioning mark-sweep garbage collector now. When the stress
testing flag is enabled, it gets called all the time, and with the logging
enabled too, we can watch it do its thing and see that it is indeed reclaiming
memory. But, when the stress testing flag is off, it never runs at all. It&rsquo;s
time to decide when the collector should be invoked during normal program
execution.</p>
<p>As far as I can tell, this question is poorly answered by the literature. When
garbage collectors were first invented, computers had a tiny, fixed amount of
memory. Many of the early GC papers assumed that you set aside a few thousand
words of memory<span class="em">&mdash;</span>in other words, most of it<span class="em">&mdash;</span>and invoked the collector
whenever you ran out. Simple.</p>
<p>Modern machines have gigs of physical RAM, hidden behind the operating system&rsquo;s
even larger virtual memory abstraction, which is shared among a slew of other
programs all fighting for their chunk of memory. The operating system will let
your program request as much as it wants and then page in and out from the disc
when physical memory gets full. You never really &ldquo;run out&rdquo; of memory, you just
get slower and slower.</p>
<h3><a href="#latency-and-throughput" id="latency-and-throughput"><small>26&#8202;.&#8202;6&#8202;.&#8202;1</small>Latency and throughput</a></h3>
<p>It no longer makes sense to wait until you &ldquo;have to&rdquo;, to run the GC, so we need
a more subtle timing strategy. To reason about this more precisely, it&rsquo;s time to
introduce two fundamental numbers used when measuring a memory manager&rsquo;s
performance: <em>throughput</em> and <em>latency</em>.</p>
<p>Every managed language pays a performance price compared to explicit,
user-authored deallocation. The time spent actually freeing memory is the same,
but the GC spends cycles figuring out <em>which</em> memory to free. That is time <em>not</em>
spent running the user&rsquo;s code and doing useful work. In our implementation,
that&rsquo;s the entirety of the mark phase. The goal of a sophisticated garbage
collector is to minimize that overhead.</p>
<p>There are two key metrics we can use to understand that cost better:</p>
<ul>
<li>
<p><strong>Throughput</strong> is the total fraction of time spent running user code versus
doing garbage collection work. Say you run a clox program for ten seconds
and it spends a second of that inside <code>collectGarbage()</code>. That means the
throughput is 90%<span class="em">&mdash;</span>it spent 90% of the time running the program and 10%
on GC overhead.</p>
<p>Throughput is the most fundamental measure because it tracks the total cost
of collection overhead. All else being equal, you want to maximize
throughput. Up until this chapter, clox had no GC at all and thus <span
name="hundred">100%</span> throughput. That&rsquo;s pretty hard to beat. Of
course, it came at the slight expense of potentially running out of memory
and crashing if the user&rsquo;s program ran long enough. You can look at the goal
of a GC as fixing that &ldquo;glitch&rdquo; while sacrificing as little throughput as
possible.</p>
</li>
</ul>
<aside name="hundred">
<p>Well, not <em>exactly</em> 100%. It did still put the allocated objects into a linked
list, so there was some tiny overhead for setting those pointers.</p>
</aside>
<ul>
<li>
<p><strong>Latency</strong> is the longest <em>continuous</em> chunk of time where the user&rsquo;s
program is completely paused while garbage collection happens. It&rsquo;s a
measure of how &ldquo;chunky&rdquo; the collector is. Latency is an entirely different
metric than throughput.</p>
<p>Consider two runs of a clox program that both take ten seconds. In the first
run, the GC kicks in once and spends a solid second in <code>collectGarbage()</code> in
one massive collection. In the second run, the GC gets invoked five times,
each for a fifth of a second. The <em>total</em> amount of time spent collecting is
still a second, so the throughput is 90% in both cases. But in the second
run, the latency is only 1/5th of a second, five times less than in the
first.</p>
</li>
</ul>
<p><span name="latency"></span></p><img src="image/garbage-collection/latency-throughput.png" alt="A bar representing execution time with slices for running user code and running the GC. The largest GC slice is latency. The size of all of the user code slices is throughput." />
<aside name="latency">
<p>The bar represents the execution of a program, divided into time spent running
user code and time spent in the GC. The size of the largest single slice of time
running the GC is the latency. The size of all of the user code slices added up
is the throughput.</p>
</aside>
<p>If you like analogies, imagine your program is a bakery selling fresh-baked
bread to customers. Throughput is the total number of warm, crusty baguettes you
can serve to customers in a single day. Latency is how long the unluckiest
customer has to wait in line before they get served.</p>
<p><span name="dishwasher">Running</span> the garbage collector is like shutting
down the bakery temporarily to go through all of the dishes, sort out the dirty
from the clean, and then wash the used ones. In our analogy, we don&rsquo;t have
dedicated dishwashers, so while this is going on, no baking is happening. The
baker is washing up.</p>
<aside name="dishwasher">
<p>If each person represents a thread, then an obvious optimization is to have
separate threads running garbage collection, giving you a <strong>concurrent garbage
collector</strong>. In other words, hire some dishwashers to clean while others bake.
This is how very sophisticated GCs work because it does let the bakers<span class="em">&mdash;</span>the worker threads<span class="em">&mdash;</span>keep running user code with little interruption.</p>
<p>However, coordination is required. You don&rsquo;t want a dishwasher grabbing a bowl
out of a baker&rsquo;s hands! This coordination adds overhead and a lot of complexity.
Concurrent collectors are fast, but challenging to implement correctly.</p><img src="image/garbage-collection/baguette.png" class="above" alt="Un baguette." />
</aside>
<p>Selling fewer loaves of bread a day is bad, and making any particular customer
sit and wait while you clean all the dishes is too. The goal is to maximize
throughput and minimize latency, but there is no free lunch, even inside a
bakery. Garbage collectors make different trade-offs between how much throughput
they sacrifice and latency they tolerate.</p>
<p>Being able to make these trade-offs is useful because different user programs
have different needs. An overnight batch job that is generating a report from a
terabyte of data just needs to get as much work done as fast as possible.
Throughput is queen. Meanwhile, an app running on a user&rsquo;s smartphone needs to
always respond immediately to user input so that dragging on the screen feels
<span name="butter">buttery</span> smooth. The app can&rsquo;t freeze for a few
seconds while the GC mucks around in the heap.</p>
<aside name="butter">
<p>Clearly the baking analogy is going to my head.</p>
</aside>
<p>As a garbage collector author, you control some of the trade-off between
throughput and latency by your choice of collection algorithm. But even within a
single algorithm, we have a lot of control over <em>how frequently</em> the collector
runs.</p>
<p>Our collector is a <span name="incremental"><strong>stop-the-world GC</strong></span> which
means the user&rsquo;s program is paused until the entire garbage collection process
has completed. If we wait a long time before we run the collector, then a large
number of dead objects will accumulate. That leads to a very long pause while
the collector runs, and thus high latency. So, clearly, we want to run the
collector really frequently.</p>
<aside name="incremental">
<p>In contrast, an <strong>incremental garbage collector</strong> can do a little collection,
then run some user code, then collect a little more, and so on.</p>
</aside>
<p>But every time the collector runs, it spends some time visiting live objects.
That doesn&rsquo;t really <em>do</em> anything useful (aside from ensuring that they don&rsquo;t
incorrectly get deleted). Time visiting live objects is time not freeing memory
and also time not running user code. If you run the GC <em>really</em> frequently, then
the user&rsquo;s program doesn&rsquo;t have enough time to even generate new garbage for the
VM to collect. The VM will spend all of its time obsessively revisiting the same
set of live objects over and over, and throughput will suffer. So, clearly, we
want to run the collector really <em>in</em>frequently.</p>
<p>In fact, we want something in the middle, and the frequency of when the
collector runs is one of our main knobs for tuning the trade-off between latency
and throughput.</p>
<h3><a href="#self-adjusting-heap" id="self-adjusting-heap"><small>26&#8202;.&#8202;6&#8202;.&#8202;2</small>Self-adjusting heap</a></h3>
<p>We want our GC to run frequently enough to minimize latency but infrequently
enough to maintain decent throughput. But how do we find the balance between
these when we have no idea how much memory the user&rsquo;s program needs and how
often it allocates? We could pawn the problem onto the user and force them to
pick by exposing GC tuning parameters. Many VMs do this. But if we, the GC
authors, don&rsquo;t know how to tune it well, odds are good most users won&rsquo;t either.
They deserve a reasonable default behavior.</p>
<p>I&rsquo;ll be honest with you, this is not my area of expertise. I&rsquo;ve talked to a
number of professional GC hackers<span class="em">&mdash;</span>this is something you can build an entire
career on<span class="em">&mdash;</span>and read a lot of the literature, and all of the answers I got
were<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>vague. The strategy I ended up picking is common, pretty simple, and (I
hope!) good enough for most uses.</p>
<p>The idea is that the collector frequency automatically adjusts based on the live
size of the heap. We track the total number of bytes of managed memory that the
VM has allocated. When it goes above some threshold, we trigger a GC. After
that, we note how many bytes of memory remain<span class="em">&mdash;</span>how many were <em>not</em> freed. Then
we adjust the threshold to some value larger than that.</p>
<p>The result is that as the amount of live memory increases, we collect less
frequently in order to avoid sacrificing throughput by re-traversing the growing
pile of live objects. As the amount of live memory goes down, we collect more
frequently so that we don&rsquo;t lose too much latency by waiting too long.</p>
<p>The implementation requires two new bookkeeping fields in the VM.</p>
<div class="codehilite"><pre class="insert-before">  ObjUpvalue* openUpvalues;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">

  <span class="t">size_t</span> <span class="i">bytesAllocated</span>;
  <span class="t">size_t</span> <span class="i">nextGC</span>;
</pre><pre class="insert-after">  Obj* objects;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>The first is a running total of the number of bytes of managed memory the VM has
allocated. The second is the threshold that triggers the next collection. We
initialize them when the VM starts up.</p>
<div class="codehilite"><pre class="insert-before">  vm.objects = NULL;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">  <span class="i">vm</span>.<span class="i">bytesAllocated</span> = <span class="n">0</span>;
  <span class="i">vm</span>.<span class="i">nextGC</span> = <span class="n">1024</span> * <span class="n">1024</span>;
</pre><pre class="insert-after">

  vm.grayCount = 0;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>The starting threshold here is <span name="lab">arbitrary</span>. It&rsquo;s similar
to the initial capacity we picked for our various dynamic arrays. The goal is to
not trigger the first few GCs <em>too</em> quickly but also to not wait too long. If we
had some real-world Lox programs, we could profile those to tune this. But since
all we have are toy programs, I just picked a number.</p>
<aside name="lab">
<p>A challenge with learning garbage collectors is that it&rsquo;s <em>very</em> hard to
discover the best practices in an isolated lab environment. You don&rsquo;t see how a
collector actually performs unless you run it on the kind of large, messy
real-world programs it is actually intended for. It&rsquo;s like tuning a rally car<span class="em">&mdash;</span>you need to take it out on the course.</p>
</aside>
<p>Every time we allocate or free some memory, we adjust the counter by that delta.</p>
<div class="codehilite"><pre class="insert-before">void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>reallocate</em>()</div>
<pre class="insert">  <span class="i">vm</span>.<span class="i">bytesAllocated</span> += <span class="i">newSize</span> - <span class="i">oldSize</span>;
</pre><pre class="insert-after">  if (newSize &gt; oldSize) {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>reallocate</em>()</div>

<p>When the total crosses the limit, we run the collector.</p>
<div class="codehilite"><pre class="insert-before">    collectGarbage();
#endif
</pre><div class="source-file"><em>memory.c</em><br>
in <em>reallocate</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">vm</span>.<span class="i">bytesAllocated</span> &gt; <span class="i">vm</span>.<span class="i">nextGC</span>) {
      <span class="i">collectGarbage</span>();
    }
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>reallocate</em>()</div>

<p>Now, finally, our garbage collector actually does something when the user runs a
program without our hidden diagnostic flag enabled. The sweep phase frees
objects by calling <code>reallocate()</code>, which lowers the value of <code>bytesAllocated</code>,
so after the collection completes, we know how many live bytes remain. We adjust
the threshold of the next GC based on that.</p>
<div class="codehilite"><pre class="insert-before">  sweep();
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">

  <span class="i">vm</span>.<span class="i">nextGC</span> = <span class="i">vm</span>.<span class="i">bytesAllocated</span> * <span class="a">GC_HEAP_GROW_FACTOR</span>;
</pre><pre class="insert-after">

#ifdef DEBUG_LOG_GC
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>The threshold is a multiple of the heap size. This way, as the amount of memory
the program uses grows, the threshold moves farther out to limit the total time
spent re-traversing the larger live set. Like other numbers in this chapter, the
scaling factor is basically arbitrary.</p>
<div class="codehilite"><pre class="insert-before">#endif
</pre><div class="source-file"><em>memory.c</em></div>
<pre class="insert">

<span class="a">#define GC_HEAP_GROW_FACTOR 2</span>
</pre><pre class="insert-after">

void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em></div>

<p>You&rsquo;d want to tune this in your implementation once you had some real programs
to benchmark it on. Right now, we can at least log some of the statistics that
we have. We capture the heap size before the collection.</p>
<div class="codehilite"><pre class="insert-before">  printf(&quot;-- gc begin\n&quot;);
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">  <span class="t">size_t</span> <span class="i">before</span> = <span class="i">vm</span>.<span class="i">bytesAllocated</span>;
</pre><pre class="insert-after">#endif
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>And then print the results at the end.</p>
<div class="codehilite"><pre class="insert-before">  printf(&quot;-- gc end\n&quot;);
</pre><div class="source-file"><em>memory.c</em><br>
in <em>collectGarbage</em>()</div>
<pre class="insert">  <span class="i">printf</span>(<span class="s">&quot;   collected %zu bytes (from %zu to %zu) next at %zu</span><span class="e">\n</span><span class="s">&quot;</span>,
         <span class="i">before</span> - <span class="i">vm</span>.<span class="i">bytesAllocated</span>, <span class="i">before</span>, <span class="i">vm</span>.<span class="i">bytesAllocated</span>,
         <span class="i">vm</span>.<span class="i">nextGC</span>);
</pre><pre class="insert-after">#endif
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>collectGarbage</em>()</div>

<p>This way we can see how much the garbage collector accomplished while it ran.</p>
<h2><a href="#garbage-collection-bugs" id="garbage-collection-bugs"><small>26&#8202;.&#8202;7</small>Garbage Collection Bugs</a></h2>
<p>In theory, we are all done now. We have a GC. It kicks in periodically, collects
what it can, and leaves the rest. If this were a typical textbook, we would wipe
the dust from our hands and bask in the soft glow of the flawless marble edifice
we have created.</p>
<p>But I aim to teach you not just the theory of programming languages but the
sometimes painful reality. I am going to roll over a rotten log and show you the
nasty bugs that live under it, and garbage collector bugs really are some of the
grossest invertebrates out there.</p>
<p>The collector&rsquo;s job is to free dead objects and preserve live ones. Mistakes are
easy to make in both directions. If the VM fails to free objects that aren&rsquo;t
needed, it slowly leaks memory. If it frees an object that is in use, the user&rsquo;s
program can access invalid memory. These failures often don&rsquo;t immediately cause
a crash, which makes it hard for us to trace backward in time to find the bug.</p>
<p>This is made harder by the fact that we don&rsquo;t know when the collector will run.
Any call that eventually allocates some memory is a place in the VM where a
collection could happen. It&rsquo;s like musical chairs. At any point, the GC might
stop the music. Every single heap-allocated object that we want to keep needs to
find a chair quickly<span class="em">&mdash;</span>get marked as a root or stored as a reference in some
other object<span class="em">&mdash;</span>before the sweep phase comes to kick it out of the game.</p>
<p>How is it possible for the VM to use an object later<span class="em">&mdash;</span>one that the GC itself
doesn&rsquo;t see? How can the VM find it? The most common answer is through a pointer
stored in some local variable on the C stack. The GC walks the <em>VM&rsquo;s</em> value and
CallFrame stacks, but the C stack is <span name="c">hidden</span> to it.</p>
<aside name="c">
<p>Our GC can&rsquo;t find addresses in the C stack, but many can. Conservative garbage
collectors look all through memory, including the native stack. The most
well-known of this variety is the <a href="https://en.wikipedia.org/wiki/Boehm_garbage_collector"><strong>Boehm–Demers–Weiser garbage
collector</strong></a>, usually just called the &ldquo;Boehm collector&rdquo;. (The shortest
path to fame in CS is a last name that&rsquo;s alphabetically early so that it shows
up first in sorted lists of names.)</p>
<p>Many precise GCs walk the C stack too. Even those have to be careful about
pointers to live objects that exist only in <em>CPU registers</em>.</p>
</aside>
<p>In previous chapters, we wrote seemingly pointless code that pushed an object
onto the VM&rsquo;s value stack, did a little work, and then popped it right back off.
Most times, I said this was for the GC&rsquo;s benefit. Now you see why. The code
between pushing and popping potentially allocates memory and thus can trigger a
GC. We had to make sure the object was on the value stack so that the
collector&rsquo;s mark phase would find it and keep it alive.</p>
<p>I wrote the entire clox implementation before splitting it into chapters and
writing the prose, so I had plenty of time to find all of these corners and
flush out most of these bugs. The stress testing code we put in at the beginning
of this chapter and a pretty good test suite were very helpful.</p>
<p>But I fixed only <em>most</em> of them. I left a couple in because I want to give you a
hint of what it&rsquo;s like to encounter these bugs in the wild. If you enable the
stress test flag and run some toy Lox programs, you can probably stumble onto a
few. Give it a try and <em>see if you can fix any yourself</em>.</p>
<h3><a href="#adding-to-the-constant-table" id="adding-to-the-constant-table"><small>26&#8202;.&#8202;7&#8202;.&#8202;1</small>Adding to the constant table</a></h3>
<p>You are very likely to hit the first bug. The constant table each chunk owns is
a dynamic array. When the compiler adds a new constant to the current function&rsquo;s
table, that array may need to grow. The constant itself may also be some
heap-allocated object like a string or a nested function.</p>
<p>The new object being added to the constant table is passed to <code>addConstant()</code>.
At that moment, the object can be found only in the parameter to that function
on the C stack. That function appends the object to the constant table. If the
table doesn&rsquo;t have enough capacity and needs to grow, it calls <code>reallocate()</code>.
That in turn triggers a GC, which fails to mark the new constant object and
thus sweeps it right before we have a chance to add it to the table. Crash.</p>
<p>The fix, as you&rsquo;ve seen in other places, is to push the constant onto the stack
temporarily.</p>
<div class="codehilite"><pre class="insert-before">int addConstant(Chunk* chunk, Value value) {
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>addConstant</em>()</div>
<pre class="insert">  <span class="i">push</span>(<span class="i">value</span>);
</pre><pre class="insert-after">  writeValueArray(&amp;chunk-&gt;constants, value);
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>addConstant</em>()</div>

<p>Once the constant table contains the object, we pop it off the stack.</p>
<div class="codehilite"><pre class="insert-before">  writeValueArray(&amp;chunk-&gt;constants, value);
</pre><div class="source-file"><em>chunk.c</em><br>
in <em>addConstant</em>()</div>
<pre class="insert">  <span class="i">pop</span>();
</pre><pre class="insert-after">  return chunk-&gt;constants.count - 1;
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em>, in <em>addConstant</em>()</div>

<p>When the GC is marking roots, it walks the chain of compilers and marks each of
their functions, so the new constant is reachable now. We do need an include
to call into the VM from the &ldquo;chunk&rdquo; module.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;memory.h&quot;
</pre><div class="source-file"><em>chunk.c</em></div>
<pre class="insert"><span class="a">#include &quot;vm.h&quot;</span>
</pre><pre class="insert-after">

void initChunk(Chunk* chunk) {
</pre></div>
<div class="source-file-narrow"><em>chunk.c</em></div>

<h3><a href="#interning-strings" id="interning-strings"><small>26&#8202;.&#8202;7&#8202;.&#8202;2</small>Interning strings</a></h3>
<p>Here&rsquo;s another similar one. All strings are interned in clox, so whenever we
create a new string, we also add it to the intern table. You can see where this
is going. Since the string is brand new, it isn&rsquo;t reachable anywhere. And
resizing the string pool can trigger a collection. Again, we go ahead and stash
the string on the stack first.</p>
<div class="codehilite"><pre class="insert-before">  string-&gt;chars = chars;
  string-&gt;hash = hash;
</pre><div class="source-file"><em>object.c</em><br>
in <em>allocateString</em>()</div>
<pre class="insert">

  <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">string</span>));
</pre><pre class="insert-after">  tableSet(&amp;vm.strings, string, NIL_VAL);
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>allocateString</em>()</div>

<p>And then pop it back off once it&rsquo;s safely nestled in the table.</p>
<div class="codehilite"><pre class="insert-before">  tableSet(&amp;vm.strings, string, NIL_VAL);
</pre><div class="source-file"><em>object.c</em><br>
in <em>allocateString</em>()</div>
<pre class="insert">  <span class="i">pop</span>();

</pre><pre class="insert-after">  return string;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>allocateString</em>()</div>

<p>This ensures the string is safe while the table is being resized. Once it
survives that, <code>allocateString()</code> will return it to some caller which can then
take responsibility for ensuring the string is still reachable before the next
heap allocation occurs.</p>
<h3><a href="#concatenating-strings" id="concatenating-strings"><small>26&#8202;.&#8202;7&#8202;.&#8202;3</small>Concatenating strings</a></h3>
<p>One last example: Over in the interpreter, the <code>OP_ADD</code> instruction can be used
to concatenate two strings. As it does with numbers, it pops the two operands
from the stack, computes the result, and pushes that new value back onto the
stack. For numbers that&rsquo;s perfectly safe.</p>
<p>But concatenating two strings requires allocating a new character array on the
heap, which can in turn trigger a GC. Since we&rsquo;ve already popped the operand
strings by that point, they can potentially be missed by the mark phase and get
swept away. Instead of popping them off the stack eagerly, we peek them.</p>
<div class="codehilite"><pre class="insert-before">static void concatenate() {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>concatenate</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="t">ObjString</span>* <span class="i">b</span> = <span class="a">AS_STRING</span>(<span class="i">peek</span>(<span class="n">0</span>));
  <span class="t">ObjString</span>* <span class="i">a</span> = <span class="a">AS_STRING</span>(<span class="i">peek</span>(<span class="n">1</span>));
</pre><pre class="insert-after">

  int length = a-&gt;length + b-&gt;length;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>concatenate</em>(), replace 2 lines</div>

<p>That way, they are still hanging out on the stack when we create the result
string. Once that&rsquo;s done, we can safely pop them off and replace them with the
result.</p>
<div class="codehilite"><pre class="insert-before">  ObjString* result = takeString(chars, length);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>concatenate</em>()</div>
<pre class="insert">  <span class="i">pop</span>();
  <span class="i">pop</span>();
</pre><pre class="insert-after">  push(OBJ_VAL(result));
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>concatenate</em>()</div>

<p>Those were all pretty easy, especially because I <em>showed</em> you where the fix was.
In practice, <em>finding</em> them is the hard part. All you see is an object that
<em>should</em> be there but isn&rsquo;t. It&rsquo;s not like other bugs where you&rsquo;re looking for
the code that <em>causes</em> some problem. You&rsquo;re looking for the <em>absence</em> of code
which fails to <em>prevent</em> a problem, and that&rsquo;s a much harder search.</p>
<p>But, for now at least, you can rest easy. As far as I know, we&rsquo;ve found all of
the collection bugs in clox, and now we have a working, robust, self-tuning,
mark-sweep garbage collector.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>The Obj header struct at the top of each object now has three fields:
<code>type</code>, <code>isMarked</code>, and <code>next</code>. How much memory do those take up (on your
machine)? Can you come up with something more compact? Is there a runtime
cost to doing so?</p>
</li>
<li>
<p>When the sweep phase traverses a live object, it clears the <code>isMarked</code>
field to prepare it for the next collection cycle. Can you come up with a
more efficient approach?</p>
</li>
<li>
<p>Mark-sweep is only one of a variety of garbage collection algorithms out
there. Explore those by replacing or augmenting the current collector with
another one. Good candidates to consider are reference counting, Cheney&rsquo;s
algorithm, or the Lisp 2 mark-compact algorithm.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Generational Collectors</a></h2>
<p>A collector loses throughput if it spends a long time re-visiting objects that
are still alive. But it can increase latency if it avoids collecting and
accumulates a large pile of garbage to wade through. If only there were some way
to tell which objects were likely to be long-lived and which weren&rsquo;t. Then the
GC could avoid revisiting the long-lived ones as often and clean up the
ephemeral ones more frequently.</p>
<p>It turns out there kind of is. Many years ago, GC researchers gathered metrics
on the lifetime of objects in real-world running programs. They tracked every
object when it was allocated, and eventually when it was no longer needed, and
then graphed out how long objects tended to live.</p>
<p>They discovered something they called the <strong>generational hypothesis</strong>, or the
much less tactful term <strong>infant mortality</strong>. Their observation was that most
objects are very short-lived but once they survive beyond a certain age, they
tend to stick around quite a long time. The longer an object <em>has</em> lived, the
longer it likely will <em>continue</em> to live. This observation is powerful because
it gave them a handle on how to partition objects into groups that benefit from
frequent collections and those that don&rsquo;t.</p>
<p>They designed a technique called <strong>generational garbage collection</strong>. It works
like this: Every time a new object is allocated, it goes into a special,
relatively small region of the heap called the &ldquo;nursery&rdquo;. Since objects tend to
die young, the garbage collector is invoked <span
name="nursery">frequently</span> over the objects just in this region.</p>
<aside name="nursery">
<p>Nurseries are also usually managed using a copying collector which is faster at
allocating and freeing objects than a mark-sweep collector.</p>
</aside>
<p>Each time the GC runs over the nursery is called a &ldquo;generation&rdquo;. Any objects
that are no longer needed get freed. Those that survive are now considered one
generation older, and the GC tracks this for each object. If an object survives
a certain number of generations<span class="em">&mdash;</span>often just a single collection<span class="em">&mdash;</span>it gets
<em>tenured</em>. At this point, it is copied out of the nursery into a much larger
heap region for long-lived objects. The garbage collector runs over that region
too, but much less frequently since odds are good that most of those objects
will still be alive.</p>
<p>Generational collectors are a beautiful marriage of empirical data<span class="em">&mdash;</span>the
observation that object lifetimes are <em>not</em> evenly distributed<span class="em">&mdash;</span>and clever
algorithm design that takes advantage of that fact. They&rsquo;re also conceptually
quite simple. You can think of one as just two separately tuned GCs and a pretty
simple policy for moving objects from one to the other.</p>
</div>

<footer>
<a href="classes-and-instances.html" class="next">
  Next Chapter: &ldquo;Classes and Instances&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/global-variables.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Global Variables &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Global Variables<small>21</small></a></h3>

<ul>
    <li><a href="#statements"><small>21.1</small> Statements</a></li>
    <li><a href="#variable-declarations"><small>21.2</small> Variable Declarations</a></li>
    <li><a href="#reading-variables"><small>21.3</small> Reading Variables</a></li>
    <li><a href="#assignment"><small>21.4</small> Assignment</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="hash-tables.html" title="Hash Tables" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="local-variables.html" title="Local Variables" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="hash-tables.html" title="Hash Tables" class="prev">←</a>
<a href="local-variables.html" title="Local Variables" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Global Variables<small>21</small></a></h3>

<ul>
    <li><a href="#statements"><small>21.1</small> Statements</a></li>
    <li><a href="#variable-declarations"><small>21.2</small> Variable Declarations</a></li>
    <li><a href="#reading-variables"><small>21.3</small> Reading Variables</a></li>
    <li><a href="#assignment"><small>21.4</small> Assignment</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="hash-tables.html" title="Hash Tables" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="local-variables.html" title="Local Variables" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">21</div>
  <h1>Global Variables</h1>

<blockquote>
<p>If only there could be an invention that bottled up a memory, like scent. And
it never faded, and it never got stale. And then, when one wanted it, the
bottle could be uncorked, and it would be like living the moment all over
again.</p>
<p><cite>Daphne du Maurier, <em>Rebecca</em></cite></p>
</blockquote>
<p>The <a href="hash-tables.html">previous chapter</a> was a long exploration of one big, deep,
fundamental computer science data structure. Heavy on theory and concept. There
may have been some discussion of big-O notation and algorithms. This chapter has
fewer intellectual pretensions. There are no large ideas to learn. Instead, it&rsquo;s
a handful of straightforward engineering tasks. Once we&rsquo;ve completed them, our
virtual machine will support variables.</p>
<p>Actually, it will support only <em>global</em> variables. Locals are coming in the
<a href="local-variables.html">next chapter</a>. In jlox, we managed to cram them both into a single chapter
because we used the same implementation technique for all variables. We built a
chain of environments, one for each scope, all the way up to the top. That was a
simple, clean way to learn how to manage state.</p>
<p>But it&rsquo;s also <em>slow</em>. Allocating a new hash table each time you enter a block or
call a function is not the road to a fast VM. Given how much code is concerned
with using variables, if variables go slow, everything goes slow. For clox,
we&rsquo;ll improve that by using a much more efficient strategy for <span
name="different">local</span> variables, but globals aren&rsquo;t as easily optimized.</p>
<aside name="different">
<p>This is a common meta-strategy in sophisticated language implementations. Often,
the same language feature will have multiple implementation techniques, each
tuned for different use patterns. For example, JavaScript VMs often have a
faster representation for objects that are used more like instances of classes
compared to other objects whose set of properties is more freely modified. C and
C++ compilers usually have a variety of ways to compile <code>switch</code> statements
based on the number of cases and how densely packed the case values are.</p>
</aside>
<p>A quick refresher on Lox semantics: Global variables in Lox are &ldquo;late bound&rdquo;, or
resolved dynamically. This means you can compile a chunk of code that refers to
a global variable before it&rsquo;s defined. As long as the code doesn&rsquo;t <em>execute</em>
before the definition happens, everything is fine. In practice, that means you
can refer to later variables inside the body of functions.</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">showVariable</span>() {
  <span class="k">print</span> <span class="i">global</span>;
}

<span class="k">var</span> <span class="i">global</span> = <span class="s">&quot;after&quot;</span>;
<span class="i">showVariable</span>();
</pre></div>
<p>Code like this might seem odd, but it&rsquo;s handy for defining mutually recursive
functions. It also plays nicer with the REPL. You can write a little function in
one line, then define the variable it uses in the next.</p>
<p>Local variables work differently. Since a local variable&rsquo;s declaration <em>always</em>
occurs before it is used, the VM can resolve them at compile time, even in a
simple single-pass compiler. That will let us use a smarter representation for
locals. But that&rsquo;s for the next chapter. Right now, let&rsquo;s just worry about
globals.</p>
<h2><a href="#statements" id="statements"><small>21&#8202;.&#8202;1</small>Statements</a></h2>
<p>Variables come into being using variable declarations, which means now is also
the time to add support for statements to our compiler. If you recall, Lox
splits statements into two categories. &ldquo;Declarations&rdquo; are those statements that
bind a new name to a value. The other kinds of statements<span class="em">&mdash;</span>control flow,
print, etc.<span class="em">&mdash;</span>are just called &ldquo;statements&rdquo;. We disallow declarations directly
inside control flow statements, like this:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">monday</span>) <span class="k">var</span> <span class="i">croissant</span> = <span class="s">&quot;yes&quot;</span>; <span class="c">// Error.</span>
</pre></div>
<p>Allowing it would raise confusing questions around the scope of the variable.
So, like other languages, we prohibit it syntactically by having a separate
grammar rule for the subset of statements that <em>are</em> allowed inside a control
flow body.</p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">forStmt</span>
               | <span class="i">ifStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">returnStmt</span>
               | <span class="i">whileStmt</span>
               | <span class="i">block</span> ;
</pre></div>
<p>Then we use a separate rule for the top level of a script and inside a block.</p>
<div class="codehilite"><pre><span class="i">declaration</span>    → <span class="i">classDecl</span>
               | <span class="i">funDecl</span>
               | <span class="i">varDecl</span>
               | <span class="i">statement</span> ;
</pre></div>
<p>The <code>declaration</code> rule contains the statements that declare names, and also
includes <code>statement</code> so that all statement types are allowed. Since <code>block</code>
itself is in <code>statement</code>, you can put declarations <span
name="parens">inside</span> a control flow construct by nesting them inside a
block.</p>
<aside name="parens">
<p>Blocks work sort of like parentheses do for expressions. A block lets you put
the &ldquo;lower-precedence&rdquo; declaration statements in places where only a
&ldquo;higher-precedence&rdquo; non-declaring statement is allowed.</p>
</aside>
<p>In this chapter, we&rsquo;ll cover only a couple of statements and one
declaration.</p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">printStmt</span> ;

<span class="i">declaration</span>    → <span class="i">varDecl</span>
               | <span class="i">statement</span> ;
</pre></div>
<p>Up to now, our VM considered a &ldquo;program&rdquo; to be a single expression since that&rsquo;s
all we could parse and compile. In a full Lox implementation, a program is a
sequence of declarations. We&rsquo;re ready to support that now.</p>
<div class="codehilite"><pre class="insert-before">  advance();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()<br>
replace 2 lines</div>
<pre class="insert">

  <span class="k">while</span> (!<span class="i">match</span>(<span class="a">TOKEN_EOF</span>)) {
    <span class="i">declaration</span>();
  }

</pre><pre class="insert-after">  endCompiler();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>(), replace 2 lines</div>

<p>We keep compiling declarations until we hit the end of the source file. We
compile a single declaration using this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>expression</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">declaration</span>() {
  <span class="i">statement</span>();
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expression</em>()</div>

<p>We&rsquo;ll get to variable declarations later in the chapter, so for now, we simply
forward to <code>statement()</code>.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>declaration</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">statement</span>() {
  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_PRINT</span>)) {
    <span class="i">printStatement</span>();
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>declaration</em>()</div>

<p>Blocks can contain declarations, and control flow statements can contain other
statements. That means these two functions will eventually be recursive. We may
as well write out the forward declarations now.</p>
<div class="codehilite"><pre class="insert-before">static void expression();
</pre><div class="source-file"><em>compiler.c</em><br>
add after <em>expression</em>()</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">statement</span>();
<span class="k">static</span> <span class="t">void</span> <span class="i">declaration</span>();
</pre><pre class="insert-after">static ParseRule* getRule(TokenType type);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expression</em>()</div>

<h3><a href="#print-statements" id="print-statements"><small>21&#8202;.&#8202;1&#8202;.&#8202;1</small>Print statements</a></h3>
<p>We have two statement types to support in this chapter. Let&rsquo;s start with <code>print</code>
statements, which begin, naturally enough, with a <code>print</code> token. We detect that
using this helper function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>consume</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">match</span>(<span class="t">TokenType</span> <span class="i">type</span>) {
  <span class="k">if</span> (!<span class="i">check</span>(<span class="i">type</span>)) <span class="k">return</span> <span class="k">false</span>;
  <span class="i">advance</span>();
  <span class="k">return</span> <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>consume</em>()</div>

<p>You may recognize it from jlox. If the current token has the given type, we
consume the token and return <code>true</code>. Otherwise we leave the token alone and
return <code>false</code>. This <span name="turtles">helper</span> function is implemented
in terms of this other helper:</p>
<aside name="turtles">
<p>It&rsquo;s helpers all the way down!</p>
</aside>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>consume</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">check</span>(<span class="t">TokenType</span> <span class="i">type</span>) {
  <span class="k">return</span> <span class="i">parser</span>.<span class="i">current</span>.<span class="i">type</span> == <span class="i">type</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>consume</em>()</div>

<p>The <code>check()</code> function returns <code>true</code> if the current token has the given type.
It seems a little <span name="read">silly</span> to wrap this in a function, but
we&rsquo;ll use it more later, and I think short verb-named functions like this make
the parser easier to read.</p>
<aside name="read">
<p>This sounds trivial, but handwritten parsers for non-toy languages get pretty
big. When you have thousands of lines of code, a utility function that turns two
lines into one and makes the result a little more readable easily earns its
keep.</p>
</aside>
<p>If we did match the <code>print</code> token, then we compile the rest of the statement
here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>expression</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">printStatement</span>() {
  <span class="i">expression</span>();
  <span class="i">consume</span>(<span class="a">TOKEN_SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after value.&quot;</span>);
  <span class="i">emitByte</span>(<span class="a">OP_PRINT</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expression</em>()</div>

<p>A <code>print</code> statement evaluates an expression and prints the result, so we first
parse and compile that expression. The grammar expects a semicolon after that,
so we consume it. Finally, we emit a new instruction to print the result.</p>
<div class="codehilite"><pre class="insert-before">  OP_NEGATE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_PRINT</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>At runtime, we execute this instruction like so:</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_PRINT</span>: {
        <span class="i">printValue</span>(<span class="i">pop</span>());
        <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>When the interpreter reaches this instruction, it has already executed the code
for the expression, leaving the result value on top of the stack. Now we simply
pop and print it.</p>
<p>Note that we don&rsquo;t push anything else after that. This is a key difference
between expressions and statements in the VM. Every bytecode instruction has a
<span name="effect"><strong>stack effect</strong></span> that describes how the instruction
modifies the stack. For example, <code>OP_ADD</code> pops two values and pushes one,
leaving the stack one element smaller than before.</p>
<aside name="effect">
<p>The stack is one element shorter after an <code>OP_ADD</code>, so its effect is -1:</p><img src="image/global-variables/stack-effect.png" alt="The stack effect of an OP_ADD instruction." />
</aside>
<p>You can sum the stack effects of a series of instructions to get their total
effect. When you add the stack effects of the series of instructions compiled
from any complete expression, it will total one. Each expression leaves one
result value on the stack.</p>
<p>The bytecode for an entire statement has a total stack effect of zero. Since a
statement produces no values, it ultimately leaves the stack unchanged, though
it of course uses the stack while it&rsquo;s doing its thing. This is important
because when we get to control flow and looping, a program might execute a long
series of statements. If each statement grew or shrank the stack, it might
eventually overflow or underflow.</p>
<p>While we&rsquo;re in the interpreter loop, we should delete a bit of code.</p>
<div class="codehilite"><pre class="insert-before">      case OP_RETURN: {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 2 lines</div>
<pre class="insert">        <span class="c">// Exit interpreter.</span>
</pre><pre class="insert-after">        return INTERPRET_OK;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 2 lines</div>

<p>When the VM only compiled and evaluated a single expression, we had some
temporary code in <code>OP_RETURN</code> to output the value. Now that we have statements
and <code>print</code>, we don&rsquo;t need that anymore. We&rsquo;re one <span
name="return">step</span> closer to the complete implementation of clox.</p>
<aside name="return">
<p>We&rsquo;re only one step closer, though. We will revisit <code>OP_RETURN</code> again when we
add functions. Right now, it exits the entire interpreter loop.</p>
</aside>
<p>As usual, a new instruction needs support in the disassembler.</p>
<div class="codehilite"><pre class="insert-before">      return simpleInstruction(&quot;OP_NEGATE&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_PRINT</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_PRINT&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>That&rsquo;s our <code>print</code> statement. If you want, give it a whirl:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="n">1</span> + <span class="n">2</span>;
<span class="k">print</span> <span class="n">3</span> * <span class="n">4</span>;
</pre></div>
<p>Exciting! OK, maybe not thrilling, but we can build scripts that contain as many
statements as we want now, which feels like progress.</p>
<h3><a href="#expression-statements" id="expression-statements"><small>21&#8202;.&#8202;1&#8202;.&#8202;2</small>Expression statements</a></h3>
<p>Wait until you see the next statement. If we <em>don&rsquo;t</em> see a <code>print</code> keyword, then
we must be looking at an expression statement.</p>
<div class="codehilite"><pre class="insert-before">    printStatement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>statement</em>()</div>
<pre class="insert">  } <span class="k">else</span> {
    <span class="i">expressionStatement</span>();
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>statement</em>()</div>

<p>It&rsquo;s parsed like so:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>expression</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">expressionStatement</span>() {
  <span class="i">expression</span>();
  <span class="i">consume</span>(<span class="a">TOKEN_SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after expression.&quot;</span>);
  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expression</em>()</div>

<p>An &ldquo;expression statement&rdquo; is simply an expression followed by a semicolon.
They&rsquo;re how you write an expression in a context where a statement is expected.
Usually, it&rsquo;s so that you can call a function or evaluate an assignment for its
side effect, like this:</p>
<div class="codehilite"><pre><span class="i">brunch</span> = <span class="s">&quot;quiche&quot;</span>;
<span class="i">eat</span>(<span class="i">brunch</span>);
</pre></div>
<p>Semantically, an expression statement evaluates the expression and discards the
result. The compiler directly encodes that behavior. It compiles the expression,
and then emits an <code>OP_POP</code> instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_FALSE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_POP</span>,
</pre><pre class="insert-after">  OP_EQUAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>As the name implies, that instruction pops the top value off the stack and
forgets it.</p>
<div class="codehilite"><pre class="insert-before">      case OP_FALSE: push(BOOL_VAL(false)); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_POP</span>: <span class="i">pop</span>(); <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We can disassemble it too.</p>
<div class="codehilite"><pre class="insert-before">      return simpleInstruction(&quot;OP_FALSE&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_POP</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_POP&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_EQUAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>Expression statements aren&rsquo;t very useful yet since we can&rsquo;t create any
expressions that have side effects, but they&rsquo;ll be essential when we
<a href="calls-and-functions.html">add functions later</a>. The <span name="majority">majority</span> of
statements in real-world code in languages like C are expression statements.</p>
<aside name="majority">
<p>By my count, 80 of the 149 statements, in the version of &ldquo;compiler.c&rdquo; that we
have at the end of this chapter are expression statements.</p>
</aside>
<h3><a href="#error-synchronization" id="error-synchronization"><small>21&#8202;.&#8202;1&#8202;.&#8202;3</small>Error synchronization</a></h3>
<p>While we&rsquo;re getting this initial work done in the compiler, we can tie off a
loose end we left <a href="compiling-expressions.html#handling-syntax-errors">several chapters back</a>. Like jlox, clox uses panic
mode error recovery to minimize the number of cascaded compile errors that it
reports. The compiler exits panic mode when it reaches a synchronization point.
For Lox, we chose statement boundaries as that point. Now that we have
statements, we can implement synchronization.</p>
<div class="codehilite"><pre class="insert-before">  statement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>declaration</em>()</div>
<pre class="insert">

  <span class="k">if</span> (<span class="i">parser</span>.<span class="i">panicMode</span>) <span class="i">synchronize</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>declaration</em>()</div>

<p>If we hit a compile error while parsing the previous statement, we enter panic
mode. When that happens, after the statement we start synchronizing.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>printStatement</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">synchronize</span>() {
  <span class="i">parser</span>.<span class="i">panicMode</span> = <span class="k">false</span>;

  <span class="k">while</span> (<span class="i">parser</span>.<span class="i">current</span>.<span class="i">type</span> != <span class="a">TOKEN_EOF</span>) {
    <span class="k">if</span> (<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">type</span> == <span class="a">TOKEN_SEMICOLON</span>) <span class="k">return</span>;
    <span class="k">switch</span> (<span class="i">parser</span>.<span class="i">current</span>.<span class="i">type</span>) {
      <span class="k">case</span> <span class="a">TOKEN_CLASS</span>:
      <span class="k">case</span> <span class="a">TOKEN_FUN</span>:
      <span class="k">case</span> <span class="a">TOKEN_VAR</span>:
      <span class="k">case</span> <span class="a">TOKEN_FOR</span>:
      <span class="k">case</span> <span class="a">TOKEN_IF</span>:
      <span class="k">case</span> <span class="a">TOKEN_WHILE</span>:
      <span class="k">case</span> <span class="a">TOKEN_PRINT</span>:
      <span class="k">case</span> <span class="a">TOKEN_RETURN</span>:
        <span class="k">return</span>;

      <span class="k">default</span>:
        ; <span class="c">// Do nothing.</span>
    }

    <span class="i">advance</span>();
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>printStatement</em>()</div>

<p>We skip tokens indiscriminately until we reach something that looks like a
statement boundary. We recognize the boundary by looking for a preceding token
that can end a statement, like a semicolon. Or we&rsquo;ll look for a subsequent token
that begins a statement, usually one of the control flow or declaration
keywords.</p>
<h2><a href="#variable-declarations" id="variable-declarations"><small>21&#8202;.&#8202;2</small>Variable Declarations</a></h2>
<p>Merely being able to <em>print</em> doesn&rsquo;t win your language any prizes at the
programming language <span name="fair">fair</span>, so let&rsquo;s move on to
something a little more ambitious and get variables going. There are three
operations we need to support:</p>
<aside name="fair">
<p>I can&rsquo;t help but imagine a &ldquo;language fair&rdquo; like some country 4H thing. Rows of
straw-lined stalls full of baby languages <em>moo</em>ing and <em>baa</em>ing at each other.</p>
</aside>
<ul>
<li>Declaring a new variable using a <code>var</code> statement.</li>
<li>Accessing the value of a variable using an identifier expression.</li>
<li>Storing a new value in an existing variable using an assignment expression.</li>
</ul>
<p>We can&rsquo;t do either of the last two until we have some variables, so we start
with declarations.</p>
<div class="codehilite"><pre class="insert-before">static void declaration() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>declaration</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_VAR</span>)) {
    <span class="i">varDeclaration</span>();
  } <span class="k">else</span> {
    <span class="i">statement</span>();
  }
</pre><pre class="insert-after">

  if (parser.panicMode) synchronize();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>declaration</em>(), replace 1 line</div>

<p>The placeholder parsing function we sketched out for the declaration grammar
rule has an actual production now. If we match a <code>var</code> token, we jump here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>expression</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">varDeclaration</span>() {
  <span class="t">uint8_t</span> <span class="i">global</span> = <span class="i">parseVariable</span>(<span class="s">&quot;Expect variable name.&quot;</span>);

  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_EQUAL</span>)) {
    <span class="i">expression</span>();
  } <span class="k">else</span> {
    <span class="i">emitByte</span>(<span class="a">OP_NIL</span>);
  }
  <span class="i">consume</span>(<span class="a">TOKEN_SEMICOLON</span>,
          <span class="s">&quot;Expect &#39;;&#39; after variable declaration.&quot;</span>);

  <span class="i">defineVariable</span>(<span class="i">global</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expression</em>()</div>

<p>The keyword is followed by the variable name. That&rsquo;s compiled by
<code>parseVariable()</code>, which we&rsquo;ll get to in a second. Then we look for an <code>=</code>
followed by an initializer expression. If the user doesn&rsquo;t initialize the
variable, the compiler implicitly initializes it to <span
name="nil"><code>nil</code></span> by emitting an <code>OP_NIL</code> instruction. Either way, we
expect the statement to be terminated with a semicolon.</p>
<aside name="nil" class="bottom">
<p>Essentially, the compiler desugars a variable declaration like:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span>;
</pre></div>
<p>into:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="k">nil</span>;
</pre></div>
<p>The code it generates for the former is identical to what it produces for the
latter.</p>
</aside>
<p>There are two new functions here for working with variables and identifiers.
Here is the first:</p>
<div class="codehilite"><pre class="insert-before">static void parsePrecedence(Precedence precedence);

</pre><div class="source-file"><em>compiler.c</em><br>
add after <em>parsePrecedence</em>()</div>
<pre class="insert"><span class="k">static</span> <span class="t">uint8_t</span> <span class="i">parseVariable</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">errorMessage</span>) {
  <span class="i">consume</span>(<span class="a">TOKEN_IDENTIFIER</span>, <span class="i">errorMessage</span>);
  <span class="k">return</span> <span class="i">identifierConstant</span>(&amp;<span class="i">parser</span>.<span class="i">previous</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>parsePrecedence</em>()</div>

<p>It requires the next token to be an identifier, which it consumes and sends
here:</p>
<div class="codehilite"><pre class="insert-before">static void parsePrecedence(Precedence precedence);

</pre><div class="source-file"><em>compiler.c</em><br>
add after <em>parsePrecedence</em>()</div>
<pre class="insert"><span class="k">static</span> <span class="t">uint8_t</span> <span class="i">identifierConstant</span>(<span class="t">Token</span>* <span class="i">name</span>) {
  <span class="k">return</span> <span class="i">makeConstant</span>(<span class="a">OBJ_VAL</span>(<span class="i">copyString</span>(<span class="i">name</span>-&gt;<span class="i">start</span>,
                                         <span class="i">name</span>-&gt;<span class="i">length</span>)));
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>parsePrecedence</em>()</div>

<p>This function takes the given token and adds its lexeme to the chunk&rsquo;s constant
table as a string. It then returns the index of that constant in the constant
table.</p>
<p>Global variables are looked up <em>by name</em> at runtime. That means the VM<span class="em">&mdash;</span>the
bytecode interpreter loop<span class="em">&mdash;</span>needs access to the name. A whole string is too big
to stuff into the bytecode stream as an operand. Instead, we store the string in
the constant table and the instruction then refers to the name by its index in
the table.</p>
<p>This function returns that index all the way to <code>varDeclaration()</code> which later
hands it over to here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>parseVariable</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">defineVariable</span>(<span class="t">uint8_t</span> <span class="i">global</span>) {
  <span class="i">emitBytes</span>(<span class="a">OP_DEFINE_GLOBAL</span>, <span class="i">global</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>parseVariable</em>()</div>

<p><span name="helper">This</span> outputs the bytecode instruction that defines
the new variable and stores its initial value. The index of the variable&rsquo;s name
in the constant table is the instruction&rsquo;s operand. As usual in a stack-based
VM, we emit this instruction last. At runtime, we execute the code for the
variable&rsquo;s initializer first. That leaves the value on the stack. Then this
instruction takes that value and stores it away for later.</p>
<aside name="helper">
<p>I know some of these functions seem pretty pointless right now. But we&rsquo;ll get
more mileage out of them as we add more language features for working with
names. Function and class declarations both declare new variables, and variable
and assignment expressions access them.</p>
</aside>
<p>Over in the runtime, we begin with this new instruction:</p>
<div class="codehilite"><pre class="insert-before">  OP_POP,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_DEFINE_GLOBAL</span>,
</pre><pre class="insert-after">  OP_EQUAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>Thanks to our handy-dandy hash table, the implementation isn&rsquo;t too hard.</p>
<div class="codehilite"><pre class="insert-before">      case OP_POP: pop(); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_DEFINE_GLOBAL</span>: {
        <span class="t">ObjString</span>* <span class="i">name</span> = <span class="a">READ_STRING</span>();
        <span class="i">tableSet</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>, <span class="i">name</span>, <span class="i">peek</span>(<span class="n">0</span>));
        <span class="i">pop</span>();
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We get the name of the variable from the constant table. Then we <span
name="pop">take</span> the value from the top of the stack and store it in a
hash table with that name as the key.</p>
<aside name="pop">
<p>Note that we don&rsquo;t <em>pop</em> the value until <em>after</em> we add it to the hash table.
That ensures the VM can still find the value if a garbage collection is
triggered right in the middle of adding it to the hash table. That&rsquo;s a distinct
possibility since the hash table requires dynamic allocation when it resizes.</p>
</aside>
<p>This code doesn&rsquo;t check to see if the key is already in the table. Lox is pretty
lax with global variables and lets you redefine them without error. That&rsquo;s
useful in a REPL session, so the VM supports that by simply overwriting the
value if the key happens to already be in the hash table.</p>
<p>There&rsquo;s another little helper macro:</p>
<div class="codehilite"><pre class="insert-before">#define READ_CONSTANT() (vm.chunk-&gt;constants.values[READ_BYTE()])
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#define READ_STRING() AS_STRING(READ_CONSTANT())</span>
</pre><pre class="insert-after">#define BINARY_OP(valueType, op) \
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>It reads a one-byte operand from the bytecode chunk. It treats that as an index
into the chunk&rsquo;s constant table and returns the string at that index. It doesn&rsquo;t
check that the value <em>is</em> a string<span class="em">&mdash;</span>it just indiscriminately casts it. That&rsquo;s
safe because the compiler never emits an instruction that refers to a non-string
constant.</p>
<p>Because we care about lexical hygiene, we also undefine this macro at the end of
the interpret function.</p>
<div class="codehilite"><pre class="insert-before">#undef READ_CONSTANT
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#undef READ_STRING</span>
</pre><pre class="insert-after">#undef BINARY_OP
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>I keep saying &ldquo;the hash table&rdquo;, but we don&rsquo;t actually have one yet. We need a
place to store these globals. Since we want them to persist as long as clox is
running, we store them right in the VM.</p>
<div class="codehilite"><pre class="insert-before">  Value* stackTop;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">Table</span> <span class="i">globals</span>;
</pre><pre class="insert-after">  Table strings;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>As we did with the string table, we need to initialize the hash table to a valid
state when the VM boots up.</p>
<div class="codehilite"><pre class="insert-before">  vm.objects = NULL;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">

  <span class="i">initTable</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>);
</pre><pre class="insert-after">  initTable(&amp;vm.strings);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>And we <span name="tear">tear</span> it down when we exit.</p>
<aside name="tear">
<p>The process will free everything on exit, but it feels undignified to require
the operating system to clean up our mess.</p>
</aside>
<div class="codehilite"><pre class="insert-before">void freeVM() {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>freeVM</em>()</div>
<pre class="insert">  <span class="i">freeTable</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>);
</pre><pre class="insert-after">  freeTable(&amp;vm.strings);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>freeVM</em>()</div>

<p>As usual, we want to be able to disassemble the new instruction too.</p>
<div class="codehilite"><pre class="insert-before">      return simpleInstruction(&quot;OP_POP&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_DEFINE_GLOBAL</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_DEFINE_GLOBAL&quot;</span>, <span class="i">chunk</span>,
                                 <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_EQUAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>And with that, we can define global variables. Not that users can <em>tell</em> that
they&rsquo;ve done so, because they can&rsquo;t actually <em>use</em> them. So let&rsquo;s fix that next.</p>
<h2><a href="#reading-variables" id="reading-variables"><small>21&#8202;.&#8202;3</small>Reading Variables</a></h2>
<p>As in every programming language ever, we access a variable&rsquo;s value using its
name. We hook up identifier tokens to the expression parser here:</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_LESS_EQUAL]    = {NULL,     binary, PREC_COMPARISON},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_IDENTIFIER</span>]    = {<span class="i">variable</span>, <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_STRING]        = {string,   NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>That calls this new parser function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>string</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">variable</span>() {
  <span class="i">namedVariable</span>(<span class="i">parser</span>.<span class="i">previous</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>string</em>()</div>

<p>Like with declarations, there are a couple of tiny helper functions that seem
pointless now but will become more useful in later chapters. I promise.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>string</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">namedVariable</span>(<span class="t">Token</span> <span class="i">name</span>) {
  <span class="t">uint8_t</span> <span class="i">arg</span> = <span class="i">identifierConstant</span>(&amp;<span class="i">name</span>);
  <span class="i">emitBytes</span>(<span class="a">OP_GET_GLOBAL</span>, <span class="i">arg</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>string</em>()</div>

<p>This calls the same <code>identifierConstant()</code> function from before to take the
given identifier token and add its lexeme to the chunk&rsquo;s constant table as a
string. All that remains is to emit an instruction that loads the global
variable with that name. Here&rsquo;s the instruction:</p>
<div class="codehilite"><pre class="insert-before">  OP_POP,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_GET_GLOBAL</span>,
</pre><pre class="insert-after">  OP_DEFINE_GLOBAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>Over in the interpreter, the implementation mirrors <code>OP_DEFINE_GLOBAL</code>.</p>
<div class="codehilite"><pre class="insert-before">      case OP_POP: pop(); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_GET_GLOBAL</span>: {
        <span class="t">ObjString</span>* <span class="i">name</span> = <span class="a">READ_STRING</span>();
        <span class="t">Value</span> <span class="i">value</span>;
        <span class="k">if</span> (!<span class="i">tableGet</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>, <span class="i">name</span>, &amp;<span class="i">value</span>)) {
          <span class="i">runtimeError</span>(<span class="s">&quot;Undefined variable &#39;%s&#39;.&quot;</span>, <span class="i">name</span>-&gt;<span class="i">chars</span>);
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="i">push</span>(<span class="i">value</span>);
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_DEFINE_GLOBAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We pull the constant table index from the instruction&rsquo;s operand and get the
variable name. Then we use that as a key to look up the variable&rsquo;s value in the
globals hash table.</p>
<p>If the key isn&rsquo;t present in the hash table, it means that global variable has
never been defined. That&rsquo;s a runtime error in Lox, so we report it and exit the
interpreter loop if that happens. Otherwise, we take the value and push it
onto the stack.</p>
<div class="codehilite"><pre class="insert-before">      return simpleInstruction(&quot;OP_POP&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_GET_GLOBAL</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_GET_GLOBAL&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_DEFINE_GLOBAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>A little bit of disassembling, and we&rsquo;re done. Our interpreter is now able to
run code like this:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">beverage</span> = <span class="s">&quot;cafe au lait&quot;</span>;
<span class="k">var</span> <span class="i">breakfast</span> = <span class="s">&quot;beignets with &quot;</span> + <span class="i">beverage</span>;
<span class="k">print</span> <span class="i">breakfast</span>;
</pre></div>
<p>There&rsquo;s only one operation left.</p>
<h2><a href="#assignment" id="assignment"><small>21&#8202;.&#8202;4</small>Assignment</a></h2>
<p>Throughout this book, I&rsquo;ve tried to keep you on a fairly safe and easy path. I
don&rsquo;t avoid hard <em>problems</em>, but I try to not make the <em>solutions</em> more complex
than they need to be. Alas, other design choices in our <span
name="jlox">bytecode</span> compiler make assignment annoying to implement.</p>
<aside name="jlox">
<p>If you recall, assignment was pretty easy in jlox.</p>
</aside>
<p>Our bytecode VM uses a single-pass compiler. It parses and generates bytecode
on the fly without any intermediate AST. As soon as it recognizes a piece of
syntax, it emits code for it. Assignment doesn&rsquo;t naturally fit that. Consider:</p>
<div class="codehilite"><pre><span class="i">menu</span>.<span class="i">brunch</span>(<span class="i">sunday</span>).<span class="i">beverage</span> = <span class="s">&quot;mimosa&quot;</span>;
</pre></div>
<p>In this code, the parser doesn&rsquo;t realize <code>menu.brunch(sunday).beverage</code> is the
target of an assignment and not a normal expression until it reaches <code>=</code>, many
tokens after the first <code>menu</code>. By then, the compiler has already emitted
bytecode for the whole thing.</p>
<p>The problem is not as dire as it might seem, though. Look at how the parser sees that example:</p><img src="image/global-variables/setter.png" alt="The 'menu.brunch(sunday).beverage = &quot;mimosa&quot;' statement, showing that 'menu.brunch(sunday)' is an expression." />
<p>Even though the <code>.beverage</code> part must not be compiled as a get expression,
everything to the left of the <code>.</code> is an expression, with the normal expression
semantics. The <code>menu.brunch(sunday)</code> part can be compiled and executed as usual.</p>
<p>Fortunately for us, the only semantic differences on the left side of an
assignment appear at the very right-most end of the tokens, immediately
preceding the <code>=</code>. Even though the receiver of a setter may be an arbitrarily
long expression, the part whose behavior differs from a get expression is only
the trailing identifier, which is right before the <code>=</code>. We don&rsquo;t need much
lookahead to realize <code>beverage</code> should be compiled as a set expression and not a
getter.</p>
<p>Variables are even easier since they are just a single bare identifier before an
<code>=</code>. The idea then is that right <em>before</em> compiling an expression that can also
be used as an assignment target, we look for a subsequent <code>=</code> token. If we see
one, we compile it as an assignment or setter instead of a variable access or
getter.</p>
<p>We don&rsquo;t have setters to worry about yet, so all we need to handle are variables.</p>
<div class="codehilite"><pre class="insert-before">  uint8_t arg = identifierConstant(&amp;name);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>namedVariable</em>()<br>
replace 1 line</div>
<pre class="insert">

  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_EQUAL</span>)) {
    <span class="i">expression</span>();
    <span class="i">emitBytes</span>(<span class="a">OP_SET_GLOBAL</span>, <span class="i">arg</span>);
  } <span class="k">else</span> {
    <span class="i">emitBytes</span>(<span class="a">OP_GET_GLOBAL</span>, <span class="i">arg</span>);
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>namedVariable</em>(), replace 1 line</div>

<p>In the parse function for identifier expressions, we look for an equals sign
after the identifier. If we find one, instead of emitting code for a variable
access, we compile the assigned value and then emit an assignment instruction.</p>
<p>That&rsquo;s the last instruction we need to add in this chapter.</p>
<div class="codehilite"><pre class="insert-before">  OP_DEFINE_GLOBAL,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_SET_GLOBAL</span>,
</pre><pre class="insert-after">  OP_EQUAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>As you&rsquo;d expect, its runtime behavior is similar to defining a new variable.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_SET_GLOBAL</span>: {
        <span class="t">ObjString</span>* <span class="i">name</span> = <span class="a">READ_STRING</span>();
        <span class="k">if</span> (<span class="i">tableSet</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>, <span class="i">name</span>, <span class="i">peek</span>(<span class="n">0</span>))) {
          <span class="i">tableDelete</span>(&amp;<span class="i">vm</span>.<span class="i">globals</span>, <span class="i">name</span>);<span name="delete"> </span>
          <span class="i">runtimeError</span>(<span class="s">&quot;Undefined variable &#39;%s&#39;.&quot;</span>, <span class="i">name</span>-&gt;<span class="i">chars</span>);
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>The main difference is what happens when the key doesn&rsquo;t already exist in the
globals hash table. If the variable hasn&rsquo;t been defined yet, it&rsquo;s a runtime
error to try to assign to it. Lox <a href="statements-and-state.html#design-note">doesn&rsquo;t do implicit variable
declaration</a>.</p>
<aside name="delete">
<p>The call to <code>tableSet()</code> stores the value in the global variable table even if
the variable wasn&rsquo;t previously defined. That fact is visible in a REPL session,
since it keeps running even after the runtime error is reported. So we also take
care to delete that zombie value from the table.</p>
</aside>
<p>The other difference is that setting a variable doesn&rsquo;t pop the value off the
stack. Remember, assignment is an expression, so it needs to leave that value
there in case the assignment is nested inside some larger expression.</p>
<p>Add a dash of disassembly:</p>
<div class="codehilite"><pre class="insert-before">      return constantInstruction(&quot;OP_DEFINE_GLOBAL&quot;, chunk,
                                 offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_SET_GLOBAL</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_SET_GLOBAL&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_EQUAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>So we&rsquo;re done, right? Well<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>not quite. We&rsquo;ve made a mistake! Take a gander at:</p>
<div class="codehilite"><pre><span class="i">a</span> * <span class="i">b</span> = <span class="i">c</span> + <span class="i">d</span>;
</pre></div>
<p>According to Lox&rsquo;s grammar, <code>=</code> has the lowest precedence, so this should be
parsed roughly like:</p><img src="image/global-variables/ast-good.png" alt="The expected parse, like '(a * b) = (c + d)'." />
<p>Obviously, <code>a * b</code> isn&rsquo;t a <span name="do">valid</span> assignment target, so
this should be a syntax error. But here&rsquo;s what our parser does:</p>
<aside name="do">
<p>Wouldn&rsquo;t it be wild if <code>a * b</code> <em>was</em> a valid assignment target, though? You
could imagine some algebra-like language that tried to divide the assigned value
up in some reasonable way and distribute it to <code>a</code> and <code>b</code><span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>that&rsquo;s probably
a terrible idea.</p>
</aside>
<ol>
<li>First, <code>parsePrecedence()</code> parses <code>a</code> using the <code>variable()</code> prefix parser.</li>
<li>After that, it enters the infix parsing loop.</li>
<li>It reaches the <code>*</code> and calls <code>binary()</code>.</li>
<li>That recursively calls <code>parsePrecedence()</code> to parse the right-hand operand.</li>
<li>That calls <code>variable()</code> again for parsing <code>b</code>.</li>
<li>Inside that call to <code>variable()</code>, it looks for a trailing <code>=</code>. It sees one
and thus parses the rest of the line as an assignment.</li>
</ol>
<p>In other words, the parser sees the above code like:</p><img src="image/global-variables/ast-bad.png" alt="The actual parse, like 'a * (b = c + d)'." />
<p>We&rsquo;ve messed up the precedence handling because <code>variable()</code> doesn&rsquo;t take into
account the precedence of the surrounding expression that contains the variable.
If the variable happens to be the right-hand side of an infix operator, or the
operand of a unary operator, then that containing expression is too high
precedence to permit the <code>=</code>.</p>
<p>To fix this, <code>variable()</code> should look for and consume the <code>=</code> only if it&rsquo;s in
the context of a low-precedence expression. The code that knows the current
precedence is, logically enough, <code>parsePrecedence()</code>. The <code>variable()</code> function
doesn&rsquo;t need to know the actual level. It just cares that the precedence is low
enough to allow assignment, so we pass that fact in as a Boolean.</p>
<div class="codehilite"><pre class="insert-before">    error(&quot;Expect expression.&quot;);
    return;
  }

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>parsePrecedence</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">bool</span> <span class="i">canAssign</span> = <span class="i">precedence</span> &lt;= <span class="a">PREC_ASSIGNMENT</span>;
  <span class="i">prefixRule</span>(<span class="i">canAssign</span>);
</pre><pre class="insert-after">

  while (precedence &lt;= getRule(parser.current.type)-&gt;precedence) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>parsePrecedence</em>(), replace 1 line</div>

<p>Since assignment is the lowest-precedence expression, the only time we allow an
assignment is when parsing an assignment expression or top-level expression like
in an expression statement. That flag makes its way to the parser function here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>variable</em>()<br>
replace 3 lines</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">variable</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
  <span class="i">namedVariable</span>(<span class="i">parser</span>.<span class="i">previous</span>, <span class="i">canAssign</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>variable</em>(), replace 3 lines</div>

<p>Which passes it through a new parameter:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>namedVariable</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">namedVariable</span>(<span class="t">Token</span> <span class="i">name</span>, <span class="t">bool</span> <span class="i">canAssign</span>) {
</pre><pre class="insert-after">  uint8_t arg = identifierConstant(&amp;name);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>namedVariable</em>(), replace 1 line</div>

<p>And then finally uses it here:</p>
<div class="codehilite"><pre class="insert-before">  uint8_t arg = identifierConstant(&amp;name);

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>namedVariable</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">canAssign</span> &amp;&amp; <span class="i">match</span>(<span class="a">TOKEN_EQUAL</span>)) {
</pre><pre class="insert-after">    expression();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>namedVariable</em>(), replace 1 line</div>

<p>That&rsquo;s a lot of plumbing to get literally one bit of data to the right place in
the compiler, but arrived it has. If the variable is nested inside some
expression with higher precedence, <code>canAssign</code> will be <code>false</code> and this will
ignore the <code>=</code> even if there is one there. Then <code>namedVariable()</code> returns, and
execution eventually makes its way back to <code>parsePrecedence()</code>.</p>
<p>Then what? What does the compiler do with our broken example from before? Right
now, <code>variable()</code> won&rsquo;t consume the <code>=</code>, so that will be the current token. The
compiler returns back to <code>parsePrecedence()</code> from the <code>variable()</code> prefix parser
and then tries to enter the infix parsing loop. There is no parsing function
associated with <code>=</code>, so it skips that loop.</p>
<p>Then <code>parsePrecedence()</code> silently returns back to the caller. That also isn&rsquo;t
right. If the <code>=</code> doesn&rsquo;t get consumed as part of the expression, nothing else
is going to consume it. It&rsquo;s an error and we should report it.</p>
<div class="codehilite"><pre class="insert-before">    infixRule();
  }
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>parsePrecedence</em>()</div>
<pre class="insert">

  <span class="k">if</span> (<span class="i">canAssign</span> &amp;&amp; <span class="i">match</span>(<span class="a">TOKEN_EQUAL</span>)) {
    <span class="i">error</span>(<span class="s">&quot;Invalid assignment target.&quot;</span>);
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>parsePrecedence</em>()</div>

<p>With that, the previous bad program correctly gets an error at compile time. OK,
<em>now</em> are we done? Still not quite. See, we&rsquo;re passing an argument to one of the
parse functions. But those functions are stored in a table of function pointers,
so all of the parse functions need to have the same type. Even though most parse
functions don&rsquo;t support being used as an assignment target<span class="em">&mdash;</span>setters are the
<span name="index">only</span> other one<span class="em">&mdash;</span>our friendly C compiler requires
them <em>all</em> to accept the parameter.</p>
<aside name="index">
<p>If Lox had arrays and subscript operators like <code>array[index]</code> then an infix <code>[</code>
would also allow assignment to support <code>array[index] = value</code>.</p>
</aside>
<p>So we&rsquo;re going to finish off this chapter with some grunt work. First, let&rsquo;s go
ahead and pass the flag to the infix parse functions.</p>
<div class="codehilite"><pre class="insert-before">    ParseFn infixRule = getRule(parser.previous.type)-&gt;infix;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>parsePrecedence</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">infixRule</span>(<span class="i">canAssign</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>parsePrecedence</em>(), replace 1 line</div>

<p>We&rsquo;ll need that for setters eventually. Then we&rsquo;ll fix the typedef for the
function type.</p>
<div class="codehilite"><pre class="insert-before">} Precedence;

</pre><div class="source-file"><em>compiler.c</em><br>
add after enum <em>Precedence</em><br>
replace 1 line</div>
<pre class="insert"><span class="k">typedef</span> <span class="t">void</span> (*<span class="t">ParseFn</span>)(<span class="t">bool</span> <span class="i">canAssign</span>);
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after enum <em>Precedence</em>, replace 1 line</div>

<p>And some completely tedious code to accept this parameter in all of our existing
parse functions. Here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>binary</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">binary</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
</pre><pre class="insert-after">  TokenType operatorType = parser.previous.type;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>binary</em>(), replace 1 line</div>

<p>And here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>literal</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">literal</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
</pre><pre class="insert-after">  switch (parser.previous.type) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>literal</em>(), replace 1 line</div>

<p>And here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>grouping</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">grouping</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
</pre><pre class="insert-after">  expression();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>grouping</em>(), replace 1 line</div>

<p>And here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>number</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">number</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
</pre><pre class="insert-after">  double value = strtod(parser.previous.start, NULL);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>number</em>(), replace 1 line</div>

<p>And here too:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>string</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">string</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
</pre><pre class="insert-after">  emitConstant(OBJ_VAL(copyString(parser.previous.start + 1,
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>string</em>(), replace 1 line</div>

<p>And, finally:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
function <em>unary</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">void</span> <span class="i">unary</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
</pre><pre class="insert-after">  TokenType operatorType = parser.previous.type;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, function <em>unary</em>(), replace 1 line</div>

<p>Phew! We&rsquo;re back to a C program we can compile. Fire it up and now you can run
this:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">breakfast</span> = <span class="s">&quot;beignets&quot;</span>;
<span class="k">var</span> <span class="i">beverage</span> = <span class="s">&quot;cafe au lait&quot;</span>;
<span class="i">breakfast</span> = <span class="s">&quot;beignets with &quot;</span> + <span class="i">beverage</span>;

<span class="k">print</span> <span class="i">breakfast</span>;
</pre></div>
<p>It&rsquo;s starting to look like real code for an actual language!</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>The compiler adds a global variable&rsquo;s name to the constant table as a string
every time an identifier is encountered. It creates a new constant each
time, even if that variable name is already in a previous slot in the
constant table. That&rsquo;s wasteful in cases where the same variable is
referenced multiple times by the same function. That, in turn, increases the
odds of filling up the constant table and running out of slots since we
allow only 256 constants in a single chunk.</p>
<p>Optimize this. How does your optimization affect the performance of the
compiler compared to the runtime? Is this the right trade-off?</p>
</li>
<li>
<p>Looking up a global variable by name in a hash table each time it is used
is pretty slow, even with a good hash table. Can you come up with a more
efficient way to store and access global variables without changing the
semantics?</p>
</li>
<li>
<p>When running in the REPL, a user might write a function that references an
unknown global variable. Then, in the next line, they declare the variable.
Lox should handle this gracefully by not reporting an &ldquo;unknown variable&rdquo;
compile error when the function is first defined.</p>
<p>But when a user runs a Lox <em>script</em>, the compiler has access to the full
text of the entire program before any code is run. Consider this program:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">useVar</span>() {
  <span class="k">print</span> <span class="i">oops</span>;
}

<span class="k">var</span> <span class="i">ooops</span> = <span class="s">&quot;too many o&#39;s!&quot;</span>;
</pre></div>
<p>Here, we can tell statically that <code>oops</code> will not be defined because there
is <em>no</em> declaration of that global anywhere in the program. Note that
<code>useVar()</code> is never called either, so even though the variable isn&rsquo;t
defined, no runtime error will occur because it&rsquo;s never used either.</p>
<p>We could report mistakes like this as compile errors, at least when running
from a script. Do you think we should? Justify your answer. What do other
scripting languages you know do?</p>
</li>
</ol>
</div>

<footer>
<a href="local-variables.html" class="next">
  Next Chapter: &ldquo;Local Variables&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/hash-tables.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Hash Tables &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Hash Tables<small>20</small></a></h3>

<ul>
    <li><a href="#an-array-of-buckets"><small>20.1</small> An Array of Buckets</a></li>
    <li><a href="#collision-resolution"><small>20.2</small> Collision Resolution</a></li>
    <li><a href="#hash-functions"><small>20.3</small> Hash Functions</a></li>
    <li><a href="#building-a-hash-table"><small>20.4</small> Building a Hash Table</a></li>
    <li><a href="#string-interning"><small>20.5</small> String Interning</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="strings.html" title="Strings" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="global-variables.html" title="Global Variables" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="strings.html" title="Strings" class="prev">←</a>
<a href="global-variables.html" title="Global Variables" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Hash Tables<small>20</small></a></h3>

<ul>
    <li><a href="#an-array-of-buckets"><small>20.1</small> An Array of Buckets</a></li>
    <li><a href="#collision-resolution"><small>20.2</small> Collision Resolution</a></li>
    <li><a href="#hash-functions"><small>20.3</small> Hash Functions</a></li>
    <li><a href="#building-a-hash-table"><small>20.4</small> Building a Hash Table</a></li>
    <li><a href="#string-interning"><small>20.5</small> String Interning</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="strings.html" title="Strings" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="global-variables.html" title="Global Variables" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">20</div>
  <h1>Hash Tables</h1>

<blockquote>
<p>Hash, x. There is no definition for this word<span class="em">&mdash;</span>nobody knows what hash is.</p>
<p><cite>Ambrose Bierce, <em>The Unabridged Devil&rsquo;s Dictionary</em></cite></p>
</blockquote>
<p>Before we can add variables to our burgeoning virtual machine, we need some way
to look up a value given a variable&rsquo;s name. Later, when we add classes, we&rsquo;ll
also need a way to store fields on instances. The perfect data structure for
these problems and others is a hash table.</p>
<p>You probably already know what a hash table is, even if you don&rsquo;t know it by
that name. If you&rsquo;re a Java programmer, you call them &ldquo;HashMaps&rdquo;. C# and Python
users call them &ldquo;dictionaries&rdquo;. In C++, it&rsquo;s an &ldquo;unordered map&rdquo;. &ldquo;Objects&rdquo; in
JavaScript and &ldquo;tables&rdquo; in Lua are hash tables under the hood, which is what
gives them their flexibility.</p>
<p>A hash table, whatever your language calls it, associates a set of <strong>keys</strong> with
a set of <strong>values</strong>. Each key/value pair is an <strong>entry</strong> in the table. Given a
key, you can look up its corresponding value. You can add new key/value pairs
and remove entries by key. If you add a new value for an existing key, it
replaces the previous entry.</p>
<p>Hash tables appear in so many languages because they are incredibly powerful.
Much of this power comes from one metric: given a key, a hash table returns the
corresponding value in <span name="constant">constant time</span>, <em>regardless
of how many keys are in the hash table</em>.</p>
<aside name="constant">
<p>More specifically, the <em>average-case</em> lookup time is constant. Worst-case
performance can be, well, worse. In practice, it&rsquo;s easy to avoid degenerate
behavior and stay on the happy path.</p>
</aside>
<p>That&rsquo;s pretty remarkable when you think about it. Imagine you&rsquo;ve got a big stack
of business cards and I ask you to find a certain person. The bigger the pile
is, the longer it will take. Even if the pile is nicely sorted and you&rsquo;ve got
the manual dexterity to do a binary search by hand, you&rsquo;re still talking
<em>O(log n)</em>. But with a <span name="rolodex">hash table</span>, it takes the
same time to find that business card when the stack has ten cards as when it has
a million.</p>
<aside name="rolodex">
<p>Stuff all those cards in a Rolodex<span class="em">&mdash;</span>does anyone even remember those things
anymore?<span class="em">&mdash;</span>with dividers for each letter, and you improve your speed
dramatically. As we&rsquo;ll see, that&rsquo;s not too far from the trick a hash table uses.</p>
</aside>
<h2><a href="#an-array-of-buckets" id="an-array-of-buckets"><small>20&#8202;.&#8202;1</small>An Array of Buckets</a></h2>
<p>A complete, fast hash table has a couple of moving parts. I&rsquo;ll introduce them
one at a time by working through a couple of toy problems and their solutions.
Eventually, we&rsquo;ll build up to a data structure that can associate any set of
names with their values.</p>
<p>For now, imagine if Lox was a <em>lot</em> more restricted in variable names. What if a
variable&rsquo;s name could only be a <span name="basic">single</span> lowercase
letter. How could we very efficiently represent a set of variable names and
their values?</p>
<aside name="basic">
<p>This limitation isn&rsquo;t <em>too</em> far-fetched. The initial versions of BASIC out of
Dartmouth allowed variable names to be only a single letter followed by one
optional digit.</p>
</aside>
<p>With only 26 possible variables (27 if you consider underscore a &ldquo;letter&rdquo;, I
guess), the answer is easy. Declare a fixed-size array with 26 elements. We&rsquo;ll
follow tradition and call each element a <strong>bucket</strong>. Each represents a variable
with <code>a</code> starting at index zero. If there&rsquo;s a value in the array at some
letter&rsquo;s index, then that key is present with that value. Otherwise, the bucket
is empty and that key/value pair isn&rsquo;t in the data structure.</p>
<aside name="bucket">
<p><img src="image/hash-tables/bucket-array.png" alt="A row of buckets, each
labeled with a letter of the alphabet." /></p>
</aside>
<p>Memory usage is great<span class="em">&mdash;</span>just a single, reasonably sized <span
name="bucket">array</span>. There&rsquo;s some waste from the empty buckets, but it&rsquo;s
not huge. There&rsquo;s no overhead for node pointers, padding, or other stuff you&rsquo;d
get with something like a linked list or tree.</p>
<p>Performance is even better. Given a variable name<span class="em">&mdash;</span>its character<span class="em">&mdash;</span>you can
subtract the ASCII value of <code>a</code> and use the result to index directly into the
array. Then you can either look up the existing value or store a new value
directly into that slot. It doesn&rsquo;t get much faster than that.</p>
<p>This is sort of our Platonic ideal data structure. Lightning fast, dead simple,
and compact in memory. As we add support for more complex keys, we&rsquo;ll have to
make some concessions, but this is what we&rsquo;re aiming for. Even once you add in
hash functions, dynamic resizing, and collision resolution, this is still the
core of every hash table out there<span class="em">&mdash;</span>a contiguous array of buckets that you
index directly into.</p>
<h3><a href="#load-factor-and-wrapped-keys" id="load-factor-and-wrapped-keys"><small>20&#8202;.&#8202;1&#8202;.&#8202;1</small>Load factor and wrapped keys</a></h3>
<p>Confining Lox to single-letter variables would make our job as implementers
easier, but it&rsquo;s probably no fun programming in a language that gives you only
26 storage locations. What if we loosened it a little and allowed variables up
to <span name="six">eight</span> characters long?</p>
<aside name="six">
<p>Again, this restriction isn&rsquo;t so crazy. Early linkers for C treated only the
first six characters of external identifiers as meaningful. Everything after
that was ignored. If you&rsquo;ve ever wondered why the C standard library is so
enamored of abbreviation<span class="em">&mdash;</span>looking at you, <code>strncmp()</code><span class="em">&mdash;</span>it turns out it
wasn&rsquo;t entirely because of the small screens (or teletypes!) of the day.</p>
</aside>
<p>That&rsquo;s small enough that we can pack all eight characters into a 64-bit integer
and easily turn the string into a number. We can then use it as an array index.
Or, at least, we could if we could somehow allocate a 295,148 <em>petabyte</em> array.
Memory&rsquo;s gotten cheaper over time, but not quite <em>that</em> cheap. Even if we could
make an array that big, it would be heinously wasteful. Almost every bucket
would be empty unless users started writing way bigger Lox programs than we&rsquo;ve
anticipated.</p>
<p>Even though our variable keys cover the full 64-bit numeric range, we clearly
don&rsquo;t need an array that large. Instead, we allocate an array with more than
enough capacity for the entries we need, but not unreasonably large. We map the
full 64-bit keys down to that smaller range by taking the value modulo the size
of the array. Doing that essentially folds the larger numeric range onto itself
until it fits the smaller range of array elements.</p>
<p>For example, say we want to store &ldquo;bagel&rdquo;. We allocate an array with eight
elements, plenty enough to store it and more later. We treat the key string as a
64-bit integer. On a little-endian machine like Intel, packing those characters
into a 64-bit word puts the first letter, &ldquo;b&rdquo; (ASCII value 98), in the
least-significant byte. We take that integer modulo the array size (<span
name="power-of-two">8</span>) to fit it in the bounds and get a bucket index, 2.
Then we store the value there as usual.</p>
<aside name="power-of-two">
<p>I&rsquo;m using powers of two for the array sizes here, but they don&rsquo;t need to be.
Some styles of hash tables work best with powers of two, including the one we&rsquo;ll
build in this book. Others prefer prime number array sizes or have other rules.</p>
</aside>
<p>Using the array size as a modulus lets us map the key&rsquo;s numeric range down to
fit an array of any size. We can thus control the number of buckets
independently of the key range. That solves our waste problem, but introduces a
new one. Any two variables whose key number has the same remainder when divided
by the array size will end up in the same bucket. Keys can <strong>collide</strong>. For
example, if we try to add &ldquo;jam&rdquo;, it also ends up in bucket 2.</p><img src="image/hash-tables/collision.png" alt="'Bagel' and 'jam' both end up in bucket index 2." />
<p>We have some control over this by tuning the array size. The bigger the array,
the fewer the indexes that get mapped to the same bucket and the fewer the
collisions that are likely to occur. Hash table implementers track this
collision likelihood by measuring the table&rsquo;s <strong>load factor</strong>. It&rsquo;s defined as
the number of entries divided by the number of buckets. So a hash table with
five entries and an array of 16 elements has a load factor of 0.3125. The higher
the load factor, the greater the chance of collisions.</p>
<p>One way we mitigate collisions is by resizing the array. Just like the dynamic
arrays we implemented earlier, we reallocate and grow the hash table&rsquo;s array as
it fills up. Unlike a regular dynamic array, though, we won&rsquo;t wait until the
array is <em>full</em>. Instead, we pick a desired load factor and grow the array when
it goes over that.</p>
<h2><a href="#collision-resolution" id="collision-resolution"><small>20&#8202;.&#8202;2</small>Collision Resolution</a></h2>
<p>Even with a very low load factor, collisions can still occur. The <a href="https://en.wikipedia.org/wiki/Birthday_problem"><em>birthday
paradox</em></a> tells us that as the number of entries in the hash table
increases, the chance of collision increases very quickly. We can pick a large
array size to reduce that, but it&rsquo;s a losing game. Say we wanted to store a
hundred items in a hash table. To keep the chance of collision below a
still-pretty-high 10%, we need an array with at least 47,015 elements. To get
the chance below 1% requires an array with 492,555 elements, over 4,000 empty
buckets for each one in use.</p>
<p>A low load factor can make collisions <span name="pigeon">rarer</span>, but the
<a href="https://en.wikipedia.org/wiki/Pigeonhole_principle"><em>pigeonhole principle</em></a> tells us we can never eliminate them entirely.
If you&rsquo;ve got five pet pigeons and four holes to put them in, at least one hole
is going to end up with more than one pigeon. With 18,446,744,073,709,551,616
different variable names, any reasonably sized array can potentially end up with
multiple keys in the same bucket.</p>
<p>Thus we still have to handle collisions gracefully when they occur. Users don&rsquo;t
like it when their programming language can look up variables correctly only
<em>most</em> of the time.</p>
<aside name="pigeon">
<p>Put these two funny-named mathematical rules together and you get this
observation: Take a birdhouse containing 365 pigeonholes, and use each pigeon&rsquo;s
birthday to assign it to a pigeonhole. You&rsquo;ll need only about 26 randomly chosen
pigeons before you get a greater than 50% chance of two pigeons in the same box.</p><img src="image/hash-tables/pigeons.png" alt="Two pigeons in the same hole." />
</aside>
<h3><a href="#separate-chaining" id="separate-chaining"><small>20&#8202;.&#8202;2&#8202;.&#8202;1</small>Separate chaining</a></h3>
<p>Techniques for resolving collisions fall into two broad categories. The first is
<strong>separate chaining</strong>. Instead of each bucket containing a single entry, we let
it contain a collection of them. In the classic implementation, each bucket
points to a linked list of entries. To look up an entry, you find its bucket and
then walk the list until you find an entry with the matching key.</p><img src="image/hash-tables/chaining.png" alt="An array with eight buckets. Bucket 2 links to a chain of two nodes. Bucket 5 links to a single node." />
<p>In catastrophically bad cases where every entry collides in the same bucket, the
data structure degrades into a single unsorted linked list with <em>O(n)</em> lookup.
In practice, it&rsquo;s easy to avoid that by controlling the load factor and how
entries get scattered across buckets. In typical separate-chained hash tables,
it&rsquo;s rare for a bucket to have more than one or two entries.</p>
<p>Separate chaining is conceptually simple<span class="em">&mdash;</span>it&rsquo;s literally an array of linked
lists. Most operations are straightforward to implement, even deletion which, as
we&rsquo;ll see, can be a pain. But it&rsquo;s not a great fit for modern CPUs. It has a lot
of overhead from pointers and tends to scatter little linked list <span
name="node">nodes</span> around in memory which isn&rsquo;t great for cache usage.</p>
<aside name="node">
<p>There are a few tricks to optimize this. Many implementations store the first
entry right in the bucket so that in the common case where there&rsquo;s only one, no
extra pointer indirection is needed. You can also make each linked list node
store a few entries to reduce the pointer overhead.</p>
</aside>
<h3><a href="#open-addressing" id="open-addressing"><small>20&#8202;.&#8202;2&#8202;.&#8202;2</small>Open addressing</a></h3>
<p>The other technique is <span name="open">called</span> <strong>open addressing</strong> or
(confusingly) <strong>closed hashing</strong>. With this technique, all entries live directly
in the bucket array, with one entry per bucket. If two entries collide in the
same bucket, we find a different empty bucket to use instead.</p>
<aside name="open">
<p>It&rsquo;s called &ldquo;open&rdquo; addressing because the entry may end up at an address
(bucket) outside of its preferred one. It&rsquo;s called &ldquo;closed&rdquo; hashing because all
of the entries stay inside the array of buckets.</p>
</aside>
<p>Storing all entries in a single, big, contiguous array is great for keeping the
memory representation simple and fast. But it makes all of the operations on the
hash table more complex. When inserting an entry, its bucket may be full,
sending us to look at another bucket. That bucket itself may be occupied and so
on. This process of finding an available bucket is called <strong>probing</strong>, and the
order that you examine buckets is a <strong>probe sequence</strong>.</p>
<p>There are a <span name="probe">number</span> of algorithms for determining
which buckets to probe and how to decide which entry goes in which bucket.
There&rsquo;s been a ton of research here because even slight tweaks can have a large
performance impact. And, on a data structure as heavily used as hash tables,
that performance impact touches a very large number of real-world programs
across a range of hardware capabilities.</p>
<aside name="probe">
<p>If you&rsquo;d like to learn more (and you should, because some of these are really
cool), look into &ldquo;double hashing&rdquo;, &ldquo;cuckoo hashing&rdquo;, &ldquo;Robin Hood hashing&rdquo;, and
anything those lead you to.</p>
</aside>
<p>As usual in this book, we&rsquo;ll pick the simplest one that gets the job done
efficiently. That&rsquo;s good old <strong>linear probing</strong>. When looking for an entry, we
look in the first bucket its key maps to. If it&rsquo;s not in there, we look in the
very next element in the array, and so on. If we reach the end, we wrap back
around to the beginning.</p>
<p>The good thing about linear probing is that it&rsquo;s cache friendly. Since you walk
the array directly in memory order, it keeps the CPU&rsquo;s cache lines full and
happy. The bad thing is that it&rsquo;s prone to <strong>clustering</strong>. If you have a lot of
entries with numerically similar key values, you can end up with a lot of
colliding, overflowing buckets right next to each other.</p>
<p>Compared to separate chaining, open addressing can be harder to wrap your head
around. I think of open addressing as similar to separate chaining except that
the &ldquo;list&rdquo; of nodes is threaded through the bucket array itself. Instead of
storing the links between them in pointers, the connections are calculated
implicitly by the order that you look through the buckets.</p>
<p>The tricky part is that more than one of these implicit lists may be interleaved
together. Let&rsquo;s walk through an example that covers all the interesting cases.
We&rsquo;ll ignore values for now and just worry about a set of keys. We start with an
empty array of 8 buckets.</p><img src="image/hash-tables/insert-1.png" alt="An array with eight empty buckets." class="wide" />
<p>We decide to insert &ldquo;bagel&rdquo;. The first letter, &ldquo;b&rdquo; (ASCII value 98), modulo the
array size (8) puts it in bucket 2.</p><img src="image/hash-tables/insert-2.png" alt="Bagel goes into bucket 2." class="wide" />
<p>Next, we insert &ldquo;jam&rdquo;. That also wants to go in bucket 2 (106 mod 8 = 2), but
that bucket&rsquo;s taken. We keep probing to the next bucket. It&rsquo;s empty, so we put
it there.</p><img src="image/hash-tables/insert-3.png" alt="Jam goes into bucket 3, since 2 is full." class="wide" />
<p>We insert &ldquo;fruit&rdquo;, which happily lands in bucket 6.</p><img src="image/hash-tables/insert-4.png" alt="Fruit goes into bucket 6." class="wide" />
<p>Likewise, &ldquo;migas&rdquo; can go in its preferred bucket 5.</p><img src="image/hash-tables/insert-5.png" alt="Migas goes into bucket 5." class="wide" />
<p>When we try to insert &ldquo;eggs&rdquo;, it also wants to be in bucket 5. That&rsquo;s full, so we
skip to 6. Bucket 6 is also full. Note that the entry in there is <em>not</em> part of
the same probe sequence. &ldquo;Fruit&rdquo; is in its preferred bucket, 6. So the 5 and 6
sequences have collided and are interleaved. We skip over that and finally put
&ldquo;eggs&rdquo; in bucket 7.</p><img src="image/hash-tables/insert-6.png" alt="Eggs goes into bucket 7 because 5 and 6 are full." class="wide" />
<p>We run into a similar problem with &ldquo;nuts&rdquo;. It can&rsquo;t land in 6 like it wants to.
Nor can it go into 7. So we keep going. But we&rsquo;ve reached the end of the array,
so we wrap back around to 0 and put it there.</p><img src="image/hash-tables/insert-7.png" alt="Nuts wraps around to bucket 0 because 6 and 7 are full." class="wide" />
<p>In practice, the interleaving turns out to not be much of a problem. Even in
separate chaining, we need to walk the list to check each entry&rsquo;s key because
multiple keys can reduce to the same bucket. With open addressing, we need to do
that same check, and that also covers the case where you are stepping over
entries that &ldquo;belong&rdquo; to a different original bucket.</p>
<h2><a href="#hash-functions" id="hash-functions"><small>20&#8202;.&#8202;3</small>Hash Functions</a></h2>
<p>We can now build ourselves a reasonably efficient table for storing variable
names up to eight characters long, but that limitation is still annoying. In
order to relax the last constraint, we need a way to take a string of any length
and convert it to a fixed-size integer.</p>
<p>Finally, we get to the &ldquo;hash&rdquo; part of &ldquo;hash table&rdquo;. A <strong>hash function</strong> takes
some larger blob of data and &ldquo;hashes&rdquo; it to produce a fixed-size integer <strong>hash
code</strong> whose value depends on all of the bits of the original data. A <span
name="crypto">good</span> hash function has three main goals:</p>
<aside name="crypto">
<p>Hash functions are also used for cryptography. In that domain, &ldquo;good&rdquo; has a
<em>much</em> more stringent definition to avoid exposing details about the data being
hashed. We, thankfully, don&rsquo;t need to worry about those concerns for this book.</p>
</aside>
<ul>
<li>
<p><strong>It must be <em>deterministic</em>.</strong> The same input must always hash to the same
number. If the same variable ends up in different buckets at different
points in time, it&rsquo;s gonna get really hard to find it.</p>
</li>
<li>
<p><strong>It must be <em>uniform</em>.</strong> Given a typical set of inputs, it should produce a
wide and evenly distributed range of output numbers, with as few clumps or
patterns as possible. We want it to <span name="scatter">scatter</span>
values across the whole numeric range to minimize collisions and clustering.</p>
</li>
<li>
<p><strong>It must be <em>fast</em>.</strong> Every operation on the hash table requires us to hash
the key first. If hashing is slow, it can potentially cancel out the speed
of the underlying array storage.</p>
</li>
</ul>
<aside name="scatter">
<p>One of the original names for a hash table was &ldquo;scatter table&rdquo; because it takes
the entries and scatters them throughout the array. The word &ldquo;hash&rdquo; came from
the idea that a hash function takes the input data, chops it up, and tosses it
all together into a pile to come up with a single number from all of those bits.</p>
</aside>
<p>There is a veritable pile of hash functions out there. Some are old and
optimized for architectures no one uses anymore. Some are designed to be fast,
others cryptographically secure. Some take advantage of vector instructions and
cache sizes for specific chips, others aim to maximize portability.</p>
<p>There are people out there for whom designing and evaluating hash functions is,
like, their <em>jam</em>. I admire them, but I&rsquo;m not mathematically astute enough to
<em>be</em> one. So for clox, I picked a simple, well-worn hash function called
<a href="http://www.isthe.com/chongo/tech/comp/fnv/">FNV-1a</a> that&rsquo;s served me fine over the years. Consider <span
name="thing">trying</span> out different ones in your code and see if they make
a difference.</p>
<aside name="thing">
<p>Who knows, maybe hash functions could turn out to be your thing too?</p>
</aside>
<p>OK, that&rsquo;s a quick run through of buckets, load factors, open addressing,
collision resolution, and hash functions. That&rsquo;s an awful lot of text and not a
lot of real code. Don&rsquo;t worry if it still seems vague. Once we&rsquo;re done coding it
up, it will all click into place.</p>
<h2><a href="#building-a-hash-table" id="building-a-hash-table"><small>20&#8202;.&#8202;4</small>Building a Hash Table</a></h2>
<p>The great thing about hash tables compared to other classic techniques like
balanced search trees is that the actual data structure is so simple. Ours goes
into a new module.</p>
<div class="codehilite"><div class="source-file"><em>table.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_table_h</span>
<span class="a">#define clox_table_h</span>

<span class="a">#include &quot;common.h&quot;</span>
<span class="a">#include &quot;value.h&quot;</span>

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">int</span> <span class="i">count</span>;
  <span class="t">int</span> <span class="i">capacity</span>;
  <span class="t">Entry</span>* <span class="i">entries</span>;
} <span class="t">Table</span>;

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, create new file</div>

<p>A hash table is an array of entries. As in our dynamic array earlier, we keep
track of both the allocated size of the array (<code>capacity</code>) and the number of
key/value pairs currently stored in it (<code>count</code>). The ratio of count to capacity
is exactly the load factor of the hash table.</p>
<p>Each entry is one of these:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;value.h&quot;
</pre><div class="source-file"><em>table.h</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">ObjString</span>* <span class="i">key</span>;
  <span class="t">Value</span> <span class="i">value</span>;
} <span class="t">Entry</span>;
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>table.h</em></div>

<p>It&rsquo;s a simple key/value pair. Since the key is always a <span
name="string">string</span>, we store the ObjString pointer directly instead of
wrapping it in a Value. It&rsquo;s a little faster and smaller this way.</p>
<aside name="string">
<p>In clox, we only need to support keys that are strings. Handling other types of
keys doesn&rsquo;t add much complexity. As long as you can compare two objects for
equality and reduce them to sequences of bits, it&rsquo;s easy to use them as hash
keys.</p>
</aside>
<p>To create a new, empty hash table, we declare a constructor-like function.</p>
<div class="codehilite"><pre class="insert-before">} Table;

</pre><div class="source-file"><em>table.h</em><br>
add after struct <em>Table</em></div>
<pre class="insert"><span class="t">void</span> <span class="i">initTable</span>(<span class="t">Table</span>* <span class="i">table</span>);

</pre><pre class="insert-after">#endif
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after struct <em>Table</em></div>

<p>We need a new implementation file to define that. While we&rsquo;re at it, let&rsquo;s get
all of the pesky includes out of the way.</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdlib.h&gt;</span>
<span class="a">#include &lt;string.h&gt;</span>

<span class="a">#include &quot;memory.h&quot;</span>
<span class="a">#include &quot;object.h&quot;</span>
<span class="a">#include &quot;table.h&quot;</span>
<span class="a">#include &quot;value.h&quot;</span>

<span class="t">void</span> <span class="i">initTable</span>(<span class="t">Table</span>* <span class="i">table</span>) {
  <span class="i">table</span>-&gt;<span class="i">count</span> = <span class="n">0</span>;
  <span class="i">table</span>-&gt;<span class="i">capacity</span> = <span class="n">0</span>;
  <span class="i">table</span>-&gt;<span class="i">entries</span> = <span class="a">NULL</span>;
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, create new file</div>

<p>As in our dynamic value array type, a hash table initially starts with zero
capacity and a <code>NULL</code> array. We don&rsquo;t allocate anything until needed. Assuming
we do eventually allocate something, we need to be able to free it too.</p>
<div class="codehilite"><pre class="insert-before">void initTable(Table* table);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>initTable</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">freeTable</span>(<span class="t">Table</span>* <span class="i">table</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>initTable</em>()</div>

<p>And its glorious implementation:</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>initTable</em>()</div>
<pre><span class="t">void</span> <span class="i">freeTable</span>(<span class="t">Table</span>* <span class="i">table</span>) {
  <span class="a">FREE_ARRAY</span>(<span class="t">Entry</span>, <span class="i">table</span>-&gt;<span class="i">entries</span>, <span class="i">table</span>-&gt;<span class="i">capacity</span>);
  <span class="i">initTable</span>(<span class="i">table</span>);
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>initTable</em>()</div>

<p>Again, it looks just like a dynamic array. In fact, you can think of a hash
table as basically a dynamic array with a really strange policy for inserting
items. We don&rsquo;t need to check for <code>NULL</code> here since <code>FREE_ARRAY()</code> already
handles that gracefully.</p>
<h3><a href="#hashing-strings" id="hashing-strings"><small>20&#8202;.&#8202;4&#8202;.&#8202;1</small>Hashing strings</a></h3>
<p>Before we can start putting entries in the table, we need to, well, hash them.
To ensure that the entries get distributed uniformly throughout the array, we
want a good hash function that looks at all of the bits of the key string. If it
looked at, say, only the first few characters, then a series of strings that all
shared the same prefix would end up colliding in the same bucket.</p>
<p>On the other hand, walking the entire string to calculate the hash is kind of
slow. We&rsquo;d lose some of the performance benefit of the hash table if we had to
walk the string every time we looked for a key in the table. So we&rsquo;ll do the
obvious thing: cache it.</p>
<p>Over in the &ldquo;object&rdquo; module in ObjString, we add:</p>
<div class="codehilite"><pre class="insert-before">  char* chars;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>ObjString</em></div>
<pre class="insert">  <span class="t">uint32_t</span> <span class="i">hash</span>;
</pre><pre class="insert-after">};
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>ObjString</em></div>

<p>Each ObjString stores the hash code for its string. Since strings are immutable
in Lox, we can calculate the hash code once up front and be certain that it will
never get invalidated. Caching it eagerly makes a kind of sense: allocating the
string and copying its characters over is already an <em>O(n)</em> operation, so it&rsquo;s a
good time to also do the <em>O(n)</em> calculation of the string&rsquo;s hash.</p>
<p>Whenever we call the internal function to allocate a string, we pass in its
hash code.</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
function <em>allocateString</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="k">static</span> <span class="t">ObjString</span>* <span class="i">allocateString</span>(<span class="t">char</span>* <span class="i">chars</span>, <span class="t">int</span> <span class="i">length</span>,
                                 <span class="t">uint32_t</span> <span class="i">hash</span>) {
</pre><pre class="insert-after">  ObjString* string = ALLOCATE_OBJ(ObjString, OBJ_STRING);
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, function <em>allocateString</em>(), replace 1 line</div>

<p>That function simply stores the hash in the struct.</p>
<div class="codehilite"><pre class="insert-before">  string-&gt;chars = chars;
</pre><div class="source-file"><em>object.c</em><br>
in <em>allocateString</em>()</div>
<pre class="insert">  <span class="i">string</span>-&gt;<span class="i">hash</span> = <span class="i">hash</span>;
</pre><pre class="insert-after">  return string;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>allocateString</em>()</div>

<p>The fun happens over at the callers. <code>allocateString()</code> is called from two
places: the function that copies a string and the one that takes ownership of an
existing dynamically allocated string. We&rsquo;ll start with the first.</p>
<div class="codehilite"><pre class="insert-before">ObjString* copyString(const char* chars, int length) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>copyString</em>()</div>
<pre class="insert">  <span class="t">uint32_t</span> <span class="i">hash</span> = <span class="i">hashString</span>(<span class="i">chars</span>, <span class="i">length</span>);
</pre><pre class="insert-after">  char* heapChars = ALLOCATE(char, length + 1);
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>copyString</em>()</div>

<p>No magic here. We calculate the hash code and then pass it along.</p>
<div class="codehilite"><pre class="insert-before">  memcpy(heapChars, chars, length);
  heapChars[length] = '\0';
</pre><div class="source-file"><em>object.c</em><br>
in <em>copyString</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">return</span> <span class="i">allocateString</span>(<span class="i">heapChars</span>, <span class="i">length</span>, <span class="i">hash</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>copyString</em>(), replace 1 line</div>

<p>The other string function is similar.</p>
<div class="codehilite"><pre class="insert-before">ObjString* takeString(char* chars, int length) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>takeString</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">uint32_t</span> <span class="i">hash</span> = <span class="i">hashString</span>(<span class="i">chars</span>, <span class="i">length</span>);
  <span class="k">return</span> <span class="i">allocateString</span>(<span class="i">chars</span>, <span class="i">length</span>, <span class="i">hash</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>takeString</em>(), replace 1 line</div>

<p>The interesting code is over here:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>allocateString</em>()</div>
<pre><span class="k">static</span> <span class="t">uint32_t</span> <span class="i">hashString</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">key</span>, <span class="t">int</span> <span class="i">length</span>) {
  <span class="t">uint32_t</span> <span class="i">hash</span> = <span class="n">2166136261u</span>;
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">length</span>; <span class="i">i</span>++) {
    <span class="i">hash</span> ^= (<span class="t">uint8_t</span>)<span class="i">key</span>[<span class="i">i</span>];
    <span class="i">hash</span> *= <span class="n">16777619</span>;
  }
  <span class="k">return</span> <span class="i">hash</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>allocateString</em>()</div>

<p>This is the actual bona fide &ldquo;hash function&rdquo; in clox. The algorithm is called
&ldquo;FNV-1a&rdquo;, and is the shortest decent hash function I know. Brevity is certainly
a virtue in a book that aims to show you every line of code.</p>
<p>The basic idea is pretty simple, and many hash functions follow the same
pattern. You start with some initial hash value, usually a constant with certain
carefully chosen mathematical properties. Then you walk the data to be hashed.
For each byte (or sometimes word), you mix the bits into the hash value somehow,
and then scramble the resulting bits around some.</p>
<p>What it means to &ldquo;mix&rdquo; and &ldquo;scramble&rdquo; can get pretty sophisticated. Ultimately,
though, the basic goal is <em>uniformity</em><span class="em">&mdash;</span>we want the resulting hash values to
be as widely scattered around the numeric range as possible to avoid collisions
and clustering.</p>
<h3><a href="#inserting-entries" id="inserting-entries"><small>20&#8202;.&#8202;4&#8202;.&#8202;2</small>Inserting entries</a></h3>
<p>Now that string objects know their hash code, we can start putting them into
hash tables.</p>
<div class="codehilite"><pre class="insert-before">void freeTable(Table* table);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>freeTable</em>()</div>
<pre class="insert"><span class="t">bool</span> <span class="i">tableSet</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="t">ObjString</span>* <span class="i">key</span>, <span class="t">Value</span> <span class="i">value</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>freeTable</em>()</div>

<p>This function adds the given key/value pair to the given hash table. If an entry
for that key is already present, the new value overwrites the old value. The
function returns <code>true</code> if a new entry was added. Here&rsquo;s the implementation:</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>freeTable</em>()</div>
<pre><span class="t">bool</span> <span class="i">tableSet</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="t">ObjString</span>* <span class="i">key</span>, <span class="t">Value</span> <span class="i">value</span>) {
  <span class="t">Entry</span>* <span class="i">entry</span> = <span class="i">findEntry</span>(<span class="i">table</span>-&gt;<span class="i">entries</span>, <span class="i">table</span>-&gt;<span class="i">capacity</span>, <span class="i">key</span>);
  <span class="t">bool</span> <span class="i">isNewKey</span> = <span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>;
  <span class="k">if</span> (<span class="i">isNewKey</span>) <span class="i">table</span>-&gt;<span class="i">count</span>++;

  <span class="i">entry</span>-&gt;<span class="i">key</span> = <span class="i">key</span>;
  <span class="i">entry</span>-&gt;<span class="i">value</span> = <span class="i">value</span>;
  <span class="k">return</span> <span class="i">isNewKey</span>;
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>freeTable</em>()</div>

<p>Most of the interesting logic is in <code>findEntry()</code> which we&rsquo;ll get to soon. That
function&rsquo;s job is to take a key and figure out which bucket in the array it
should go in. It returns a pointer to that bucket<span class="em">&mdash;</span>the address of the Entry in
the array.</p>
<p>Once we have a bucket, inserting is straightforward. We update the hash table&rsquo;s
size, taking care to not increase the count if we overwrote the value for an
already-present key. Then we copy the key and value into the corresponding
fields in the Entry.</p>
<p>We&rsquo;re missing a little something here, though. We haven&rsquo;t actually allocated the
Entry array yet. Oops! Before we can insert anything, we need to make sure we
have an array, and that it&rsquo;s big enough.</p>
<div class="codehilite"><pre class="insert-before">bool tableSet(Table* table, ObjString* key, Value value) {
</pre><div class="source-file"><em>table.c</em><br>
in <em>tableSet</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">table</span>-&gt;<span class="i">count</span> + <span class="n">1</span> &gt; <span class="i">table</span>-&gt;<span class="i">capacity</span> * <span class="a">TABLE_MAX_LOAD</span>) {
    <span class="t">int</span> <span class="i">capacity</span> = <span class="a">GROW_CAPACITY</span>(<span class="i">table</span>-&gt;<span class="i">capacity</span>);
    <span class="i">adjustCapacity</span>(<span class="i">table</span>, <span class="i">capacity</span>);
  }

</pre><pre class="insert-after">  Entry* entry = findEntry(table-&gt;entries, table-&gt;capacity, key);
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>tableSet</em>()</div>

<p>This is similar to the code we wrote a while back for growing a dynamic array.
If we don&rsquo;t have enough capacity to insert an item, we reallocate and grow the
array. The <code>GROW_CAPACITY()</code> macro takes an existing capacity and grows it by
a multiple to ensure that we get amortized constant performance over a series
of inserts.</p>
<p>The interesting difference here is that <code>TABLE_MAX_LOAD</code> constant.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;value.h&quot;

</pre><div class="source-file"><em>table.c</em></div>
<pre class="insert"><span class="a">#define TABLE_MAX_LOAD 0.75</span>

</pre><pre class="insert-after">void initTable(Table* table) {
</pre></div>
<div class="source-file-narrow"><em>table.c</em></div>

<p>This is how we manage the table&rsquo;s <span name="75">load</span> factor. We don&rsquo;t
grow when the capacity is completely full. Instead, we grow the array before
then, when the array becomes at least 75% full.</p>
<aside name="75">
<p>Ideal max load factor varies based on the hash function, collision-handling
strategy, and typical keysets you&rsquo;ll see. Since a toy language like Lox doesn&rsquo;t
have &ldquo;real world&rdquo; data sets, it&rsquo;s hard to optimize this, and I picked 75%
somewhat arbitrarily. When you build your own hash tables, benchmark and tune
this.</p>
</aside>
<p>We&rsquo;ll get to the implementation of <code>adjustCapacity()</code> soon. First, let&rsquo;s look
at that <code>findEntry()</code> function you&rsquo;ve been wondering about.</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>freeTable</em>()</div>
<pre><span class="k">static</span> <span class="t">Entry</span>* <span class="i">findEntry</span>(<span class="t">Entry</span>* <span class="i">entries</span>, <span class="t">int</span> <span class="i">capacity</span>,
                        <span class="t">ObjString</span>* <span class="i">key</span>) {
  <span class="t">uint32_t</span> <span class="i">index</span> = <span class="i">key</span>-&gt;<span class="i">hash</span> % <span class="i">capacity</span>;
  <span class="k">for</span> (;;) {
    <span class="t">Entry</span>* <span class="i">entry</span> = &amp;<span class="i">entries</span>[<span class="i">index</span>];
    <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="i">key</span> || <span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>) {
      <span class="k">return</span> <span class="i">entry</span>;
    }

    <span class="i">index</span> = (<span class="i">index</span> + <span class="n">1</span>) % <span class="i">capacity</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>freeTable</em>()</div>

<p>This function is the real core of the hash table. It&rsquo;s responsible for taking a
key and an array of buckets, and figuring out which bucket the entry belongs in.
This function is also where linear probing and collision handling come into
play. We&rsquo;ll use <code>findEntry()</code> both to look up existing entries in the hash
table and to decide where to insert new ones.</p>
<p>For all that, there isn&rsquo;t much to it. First, we use modulo to map the key&rsquo;s hash
code to an index within the array&rsquo;s bounds. That gives us a bucket index where,
ideally, we&rsquo;ll be able to find or place the entry.</p>
<p>There are a few cases to check for:</p>
<ul>
<li>
<p>If the key for the Entry at that array index is <code>NULL</code>, then the bucket is
empty. If we&rsquo;re using <code>findEntry()</code> to look up something in the hash table,
this means it isn&rsquo;t there. If we&rsquo;re using it to insert, it means we&rsquo;ve found
a place to add the new entry.</p>
</li>
<li>
<p>If the key in the bucket is <span name="equal">equal</span> to the key we&rsquo;re
looking for, then that key is already present in the table. If we&rsquo;re doing a
lookup, that&rsquo;s good<span class="em">&mdash;</span>we&rsquo;ve found the key we seek. If we&rsquo;re doing an insert,
this means we&rsquo;ll be replacing the value for that key instead of adding a new
entry.</p>
</li>
</ul>
<aside name="equal">
<p>It looks like we&rsquo;re using <code>==</code> to see if two strings are equal. That doesn&rsquo;t
work, does it? There could be two copies of the same string at different places
in memory. Fear not, astute reader. We&rsquo;ll solve this further on. And, strangely
enough, it&rsquo;s a hash table that provides the tool we need.</p>
</aside>
<ul>
<li>Otherwise, the bucket has an entry in it, but with a different key. This is
a collision. In that case, we start probing. That&rsquo;s what that <code>for</code> loop
does. We start at the bucket where the entry would ideally go. If that
bucket is empty or has the same key, we&rsquo;re done. Otherwise, we advance to
the next element<span class="em">&mdash;</span>this is the <em>linear</em> part of &ldquo;linear probing&rdquo;<span class="em">&mdash;</span>and
check there. If we go past the end of the array, that second modulo operator
wraps us back around to the beginning.</li>
</ul>
<p>We exit the loop when we find either an empty bucket or a bucket with the same
key as the one we&rsquo;re looking for. You might be wondering about an infinite loop.
What if we collide with <em>every</em> bucket? Fortunately, that can&rsquo;t happen thanks to
our load factor. Because we grow the array as soon as it gets close to being
full, we know there will always be empty buckets.</p>
<p>We return directly from within the loop, yielding a pointer to the found Entry
so the caller can either insert something into it or read from it. Way back in
<code>tableSet()</code>, the function that first kicked this off, we store the new entry in
that returned bucket and we&rsquo;re done.</p>
<h3><a href="#allocating-and-resizing" id="allocating-and-resizing"><small>20&#8202;.&#8202;4&#8202;.&#8202;3</small>Allocating and resizing</a></h3>
<p>Before we can put entries in the hash table, we do need a place to actually
store them. We need to allocate an array of buckets. That happens in this
function:</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>findEntry</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">adjustCapacity</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="t">int</span> <span class="i">capacity</span>) {
  <span class="t">Entry</span>* <span class="i">entries</span> = <span class="a">ALLOCATE</span>(<span class="t">Entry</span>, <span class="i">capacity</span>);
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">capacity</span>; <span class="i">i</span>++) {
    <span class="i">entries</span>[<span class="i">i</span>].<span class="i">key</span> = <span class="a">NULL</span>;
    <span class="i">entries</span>[<span class="i">i</span>].<span class="i">value</span> = <span class="a">NIL_VAL</span>;
  }

  <span class="i">table</span>-&gt;<span class="i">entries</span> = <span class="i">entries</span>;
  <span class="i">table</span>-&gt;<span class="i">capacity</span> = <span class="i">capacity</span>;
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>findEntry</em>()</div>

<p>We create a bucket array with <code>capacity</code> entries. After we allocate the array,
we initialize every element to be an empty bucket and then store the array (and
its capacity) in the hash table&rsquo;s main struct. This code is fine for when we
insert the very first entry into the table, and we require the first allocation
of the array. But what about when we already have one and we need to grow it?</p>
<p>Back when we were doing a dynamic array, we could just use <code>realloc()</code> and let
the C standard library copy everything over. That doesn&rsquo;t work for a hash table.
Remember that to choose the bucket for each entry, we take its hash key <em>modulo
the array size</em>. That means that when the array size changes, entries may end up
in different buckets.</p>
<p>Those new buckets may have new collisions that we need to deal with. So the
simplest way to get every entry where it belongs is to rebuild the table from
scratch by re-inserting every entry into the new empty array.</p>
<div class="codehilite"><pre class="insert-before">    entries[i].value = NIL_VAL;
  }
</pre><div class="source-file"><em>table.c</em><br>
in <em>adjustCapacity</em>()</div>
<pre class="insert">

  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">table</span>-&gt;<span class="i">capacity</span>; <span class="i">i</span>++) {
    <span class="t">Entry</span>* <span class="i">entry</span> = &amp;<span class="i">table</span>-&gt;<span class="i">entries</span>[<span class="i">i</span>];
    <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>) <span class="k">continue</span>;

    <span class="t">Entry</span>* <span class="i">dest</span> = <span class="i">findEntry</span>(<span class="i">entries</span>, <span class="i">capacity</span>, <span class="i">entry</span>-&gt;<span class="i">key</span>);
    <span class="i">dest</span>-&gt;<span class="i">key</span> = <span class="i">entry</span>-&gt;<span class="i">key</span>;
    <span class="i">dest</span>-&gt;<span class="i">value</span> = <span class="i">entry</span>-&gt;<span class="i">value</span>;
  }
</pre><pre class="insert-after">

  table-&gt;entries = entries;
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>adjustCapacity</em>()</div>

<p>We walk through the old array front to back. Any time we find a non-empty
bucket, we insert that entry into the new array. We use <code>findEntry()</code>, passing
in the <em>new</em> array instead of the one currently stored in the Table. (This is
why <code>findEntry()</code> takes a pointer directly to an Entry array and not the whole
<code>Table</code> struct. That way, we can pass the new array and capacity before we&rsquo;ve
stored those in the struct.)</p>
<p>After that&rsquo;s done, we can release the memory for the old array.</p>
<div class="codehilite"><pre class="insert-before">    dest-&gt;value = entry-&gt;value;
  }

</pre><div class="source-file"><em>table.c</em><br>
in <em>adjustCapacity</em>()</div>
<pre class="insert">  <span class="a">FREE_ARRAY</span>(<span class="t">Entry</span>, <span class="i">table</span>-&gt;<span class="i">entries</span>, <span class="i">table</span>-&gt;<span class="i">capacity</span>);
</pre><pre class="insert-after">  table-&gt;entries = entries;
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>adjustCapacity</em>()</div>

<p>With that, we have a hash table that we can stuff as many entries into as we
like. It handles overwriting existing keys and growing itself as needed to
maintain the desired load capacity.</p>
<p>While we&rsquo;re at it, let&rsquo;s also define a helper function for copying all of the
entries of one hash table into another.</p>
<div class="codehilite"><pre class="insert-before">bool tableSet(Table* table, ObjString* key, Value value);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>tableSet</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">tableAddAll</span>(<span class="t">Table</span>* <span class="i">from</span>, <span class="t">Table</span>* <span class="i">to</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>tableSet</em>()</div>

<p>We won&rsquo;t need this until much later when we support method inheritance, but we
may as well implement it now while we&rsquo;ve got all the hash table stuff fresh in
our minds.</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>tableSet</em>()</div>
<pre><span class="t">void</span> <span class="i">tableAddAll</span>(<span class="t">Table</span>* <span class="i">from</span>, <span class="t">Table</span>* <span class="i">to</span>) {
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">from</span>-&gt;<span class="i">capacity</span>; <span class="i">i</span>++) {
    <span class="t">Entry</span>* <span class="i">entry</span> = &amp;<span class="i">from</span>-&gt;<span class="i">entries</span>[<span class="i">i</span>];
    <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> != <span class="a">NULL</span>) {
      <span class="i">tableSet</span>(<span class="i">to</span>, <span class="i">entry</span>-&gt;<span class="i">key</span>, <span class="i">entry</span>-&gt;<span class="i">value</span>);
    }
  }
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>tableSet</em>()</div>

<p>There&rsquo;s not much to say about this. It walks the bucket array of the source hash
table. Whenever it finds a non-empty bucket, it adds the entry to the
destination hash table using the <code>tableSet()</code> function we recently defined.</p>
<h3><a href="#retrieving-values" id="retrieving-values"><small>20&#8202;.&#8202;4&#8202;.&#8202;4</small>Retrieving values</a></h3>
<p>Now that our hash table contains some stuff, let&rsquo;s start pulling things back
out. Given a key, we can look up the corresponding value, if there is one, with
this function:</p>
<div class="codehilite"><pre class="insert-before">void freeTable(Table* table);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>freeTable</em>()</div>
<pre class="insert"><span class="t">bool</span> <span class="i">tableGet</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="t">ObjString</span>* <span class="i">key</span>, <span class="t">Value</span>* <span class="i">value</span>);
</pre><pre class="insert-after">bool tableSet(Table* table, ObjString* key, Value value);
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>freeTable</em>()</div>

<p>You pass in a table and a key. If it finds an entry with that key, it returns
<code>true</code>, otherwise it returns <code>false</code>. If the entry exists, the <code>value</code> output
parameter points to the resulting value.</p>
<p>Since <code>findEntry()</code> already does the hard work, the implementation isn&rsquo;t bad.</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>findEntry</em>()</div>
<pre><span class="t">bool</span> <span class="i">tableGet</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="t">ObjString</span>* <span class="i">key</span>, <span class="t">Value</span>* <span class="i">value</span>) {
  <span class="k">if</span> (<span class="i">table</span>-&gt;<span class="i">count</span> == <span class="n">0</span>) <span class="k">return</span> <span class="k">false</span>;

  <span class="t">Entry</span>* <span class="i">entry</span> = <span class="i">findEntry</span>(<span class="i">table</span>-&gt;<span class="i">entries</span>, <span class="i">table</span>-&gt;<span class="i">capacity</span>, <span class="i">key</span>);
  <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>) <span class="k">return</span> <span class="k">false</span>;

  *<span class="i">value</span> = <span class="i">entry</span>-&gt;<span class="i">value</span>;
  <span class="k">return</span> <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>findEntry</em>()</div>

<p>If the table is completely empty, we definitely won&rsquo;t find the entry, so we
check for that first. This isn&rsquo;t just an optimization<span class="em">&mdash;</span>it also ensures that we
don&rsquo;t try to access the bucket array when the array is <code>NULL</code>. Otherwise, we let
<code>findEntry()</code> work its magic. That returns a pointer to a bucket. If the bucket
is empty, which we detect by seeing if the key is <code>NULL</code>, then we didn&rsquo;t find an
Entry with our key. If <code>findEntry()</code> does return a non-empty Entry, then that&rsquo;s
our match. We take the Entry&rsquo;s value and copy it to the output parameter so the
caller can get it. Piece of cake.</p>
<h3><a href="#deleting-entries" id="deleting-entries"><small>20&#8202;.&#8202;4&#8202;.&#8202;5</small>Deleting entries</a></h3>
<p>There is one more fundamental operation a full-featured hash table needs to
support: removing an entry. This seems pretty obvious, if you can add things,
you should be able to <em>un</em>-add them, right? But you&rsquo;d be surprised how many
tutorials on hash tables omit this.</p>
<p>I could have taken that route too. In fact, we use deletion in clox only in a
tiny edge case in the VM. But if you want to actually understand how to
completely implement a hash table, this feels important. I can sympathize with
their desire to overlook it. As we&rsquo;ll see, deleting from a hash table that uses
<span name="delete">open</span> addressing is tricky.</p>
<aside name="delete">
<p>With separate chaining, deleting is as easy as removing a node from a linked
list.</p>
</aside>
<p>At least the declaration is simple.</p>
<div class="codehilite"><pre class="insert-before">bool tableSet(Table* table, ObjString* key, Value value);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>tableSet</em>()</div>
<pre class="insert"><span class="t">bool</span> <span class="i">tableDelete</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="t">ObjString</span>* <span class="i">key</span>);
</pre><pre class="insert-after">void tableAddAll(Table* from, Table* to);
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>tableSet</em>()</div>

<p>The obvious approach is to mirror insertion. Use <code>findEntry()</code> to look up the
entry&rsquo;s bucket. Then clear out the bucket. Done!</p>
<p>In cases where there are no collisions, that works fine. But if a collision has
occurred, then the bucket where the entry lives may be part of one or more
implicit probe sequences. For example, here&rsquo;s a hash table containing three keys
all with the same preferred bucket, 2:</p><img src="image/hash-tables/delete-1.png" alt="A hash table containing 'bagel' in bucket 2, 'biscuit' in bucket 3, and 'jam' in bucket 4." />
<p>Remember that when we&rsquo;re walking a probe sequence to find an entry, we know
we&rsquo;ve reached the end of a sequence and that the entry isn&rsquo;t present when we hit
an empty bucket. It&rsquo;s like the probe sequence is a list of entries and an empty
entry terminates that list.</p>
<p>If we delete &ldquo;biscuit&rdquo; by simply clearing the Entry, then we break that probe
sequence in the middle, leaving the trailing entries orphaned and unreachable.
Sort of like removing a node from a linked list without relinking the pointer
from the previous node to the next one.</p>
<p>If we later try to look for &ldquo;jam&rdquo;, we&rsquo;d start at &ldquo;bagel&rdquo;, stop at the next
empty Entry, and never find it.</p><img src="image/hash-tables/delete-2.png" alt="The 'biscuit' entry has been deleted from the hash table, breaking the chain." />
<p>To solve this, most implementations use a trick called <span
name="tombstone"><strong>tombstones</strong></span>. Instead of clearing the entry on
deletion, we replace it with a special sentinel entry called a &ldquo;tombstone&rdquo;. When
we are following a probe sequence during a lookup, and we hit a tombstone, we
<em>don&rsquo;t</em> treat it like an empty slot and stop iterating. Instead, we keep going
so that deleting an entry doesn&rsquo;t break any implicit collision chains and we can
still find entries after it.</p><img src="image/hash-tables/delete-3.png" alt="Instead of deleting 'biscuit', it's replaced with a tombstone." />
<p>The code looks like this:</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>tableSet</em>()</div>
<pre><span class="t">bool</span> <span class="i">tableDelete</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="t">ObjString</span>* <span class="i">key</span>) {
  <span class="k">if</span> (<span class="i">table</span>-&gt;<span class="i">count</span> == <span class="n">0</span>) <span class="k">return</span> <span class="k">false</span>;

  <span class="c">// Find the entry.</span>
  <span class="t">Entry</span>* <span class="i">entry</span> = <span class="i">findEntry</span>(<span class="i">table</span>-&gt;<span class="i">entries</span>, <span class="i">table</span>-&gt;<span class="i">capacity</span>, <span class="i">key</span>);
  <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>) <span class="k">return</span> <span class="k">false</span>;

  <span class="c">// Place a tombstone in the entry.</span>
  <span class="i">entry</span>-&gt;<span class="i">key</span> = <span class="a">NULL</span>;
  <span class="i">entry</span>-&gt;<span class="i">value</span> = <span class="a">BOOL_VAL</span>(<span class="k">true</span>);
  <span class="k">return</span> <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>tableSet</em>()</div>

<p>First, we find the bucket containing the entry we want to delete. (If we don&rsquo;t
find it, there&rsquo;s nothing to delete, so we bail out.) We replace the entry with a
tombstone. In clox, we use a <code>NULL</code> key and a <code>true</code> value to represent that,
but any representation that can&rsquo;t be confused with an empty bucket or a valid
entry works.</p>
<aside name="tombstone"><img src="image/hash-tables/tombstone.png" alt="A tombstone enscribed 'Here lies entry biscuit &rarr; 3.75, gone but not deleted'." />
</aside>
<p>That&rsquo;s all we need to do to delete an entry. Simple and fast. But all of the
other operations need to correctly handle tombstones too. A tombstone is a sort
of &ldquo;half&rdquo; entry. It has some of the characteristics of a present entry, and some
of the characteristics of an empty one.</p>
<p>When we are following a probe sequence during a lookup, and we hit a tombstone,
we note it and keep going.</p>
<div class="codehilite"><pre class="insert-before">  for (;;) {
    Entry* entry = &amp;entries[index];
</pre><div class="source-file"><em>table.c</em><br>
in <em>findEntry</em>()<br>
replace 3 lines</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>) {
      <span class="k">if</span> (<span class="a">IS_NIL</span>(<span class="i">entry</span>-&gt;<span class="i">value</span>)) {
        <span class="c">// Empty entry.</span>
        <span class="k">return</span> <span class="i">tombstone</span> != <span class="a">NULL</span> ? <span class="i">tombstone</span> : <span class="i">entry</span>;
      } <span class="k">else</span> {
        <span class="c">// We found a tombstone.</span>
        <span class="k">if</span> (<span class="i">tombstone</span> == <span class="a">NULL</span>) <span class="i">tombstone</span> = <span class="i">entry</span>;
      }
    } <span class="k">else</span> <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="i">key</span>) {
      <span class="c">// We found the key.</span>
      <span class="k">return</span> <span class="i">entry</span>;
    }
</pre><pre class="insert-after">

    index = (index + 1) % capacity;
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>findEntry</em>(), replace 3 lines</div>

<p>The first time we pass a tombstone, we store it in this local variable:</p>
<div class="codehilite"><pre class="insert-before">  uint32_t index = key-&gt;hash % capacity;
</pre><div class="source-file"><em>table.c</em><br>
in <em>findEntry</em>()</div>
<pre class="insert">  <span class="t">Entry</span>* <span class="i">tombstone</span> = <span class="a">NULL</span>;

</pre><pre class="insert-after">  for (;;) {
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>findEntry</em>()</div>

<p>If we reach a truly empty entry, then the key isn&rsquo;t present. In that case, if we
have passed a tombstone, we return its bucket instead of the later empty one. If
we&rsquo;re calling <code>findEntry()</code> in order to insert a node, that lets us treat the
tombstone bucket as empty and reuse it for the new entry.</p>
<p>Reusing tombstone slots automatically like this helps reduce the number of
tombstones wasting space in the bucket array. In typical use cases where there
is a mixture of insertions and deletions, the number of tombstones grows for a
while and then tends to stabilize.</p>
<p>Even so, there&rsquo;s no guarantee that a large number of deletes won&rsquo;t cause the
array to be full of tombstones. In the very worst case, we could end up with
<em>no</em> empty buckets. That would be bad because, remember, the only thing
preventing an infinite loop in <code>findEntry()</code> is the assumption that we&rsquo;ll
eventually hit an empty bucket.</p>
<p>So we need to be thoughtful about how tombstones interact with the table&rsquo;s load
factor and resizing. The key question is, when calculating the load factor,
should we treat tombstones like full buckets or empty ones?</p>
<h3><a href="#counting-tombstones" id="counting-tombstones"><small>20&#8202;.&#8202;4&#8202;.&#8202;6</small>Counting tombstones</a></h3>
<p>If we treat tombstones like full buckets, then we may end up with a bigger array
than we probably need because it artificially inflates the load factor. There
are tombstones we could reuse, but they aren&rsquo;t treated as unused so we end up
growing the array prematurely.</p>
<p>But if we treat tombstones like empty buckets and <em>don&rsquo;t</em> include them in the
load factor, then we run the risk of ending up with <em>no</em> actual empty buckets to
terminate a lookup. An infinite loop is a much worse problem than a few extra
array slots, so for load factor, we consider tombstones to be full buckets.</p>
<p>That&rsquo;s why we don&rsquo;t reduce the count when deleting an entry in the previous
code. The count is no longer the number of entries in the hash table, it&rsquo;s the
number of entries plus tombstones. That implies that we increment the count
during insertion only if the new entry goes into an entirely empty bucket.</p>
<div class="codehilite"><pre class="insert-before">  bool isNewKey = entry-&gt;key == NULL;
</pre><div class="source-file"><em>table.c</em><br>
in <em>tableSet</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">isNewKey</span> &amp;&amp; <span class="a">IS_NIL</span>(<span class="i">entry</span>-&gt;<span class="i">value</span>)) <span class="i">table</span>-&gt;<span class="i">count</span>++;
</pre><pre class="insert-after">

  entry-&gt;key = key;
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>tableSet</em>(), replace 1 line</div>

<p>If we are replacing a tombstone with a new entry, the bucket has already been
accounted for and the count doesn&rsquo;t change.</p>
<p>When we resize the array, we allocate a new array and re-insert all of the
existing entries into it. During that process, we <em>don&rsquo;t</em> copy the tombstones
over. They don&rsquo;t add any value since we&rsquo;re rebuilding the probe sequences
anyway, and would just slow down lookups. That means we need to recalculate the
count since it may change during a resize. So we clear it out:</p>
<div class="codehilite"><pre class="insert-before">  }

</pre><div class="source-file"><em>table.c</em><br>
in <em>adjustCapacity</em>()</div>
<pre class="insert">  <span class="i">table</span>-&gt;<span class="i">count</span> = <span class="n">0</span>;
</pre><pre class="insert-after">  for (int i = 0; i &lt; table-&gt;capacity; i++) {
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>adjustCapacity</em>()</div>

<p>Then each time we find a non-tombstone entry, we increment it.</p>
<div class="codehilite"><pre class="insert-before">    dest-&gt;value = entry-&gt;value;
</pre><div class="source-file"><em>table.c</em><br>
in <em>adjustCapacity</em>()</div>
<pre class="insert">    <span class="i">table</span>-&gt;<span class="i">count</span>++;
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>adjustCapacity</em>()</div>

<p>This means that when we grow the capacity, we may end up with <em>fewer</em> entries in
the resulting larger array because all of the tombstones get discarded. That&rsquo;s a
little wasteful, but not a huge practical problem.</p>
<p>I find it interesting that much of the work to support deleting entries is in
<code>findEntry()</code> and <code>adjustCapacity()</code>. The actual delete logic is quite simple
and fast. In practice, deletions tend to be rare, so you&rsquo;d expect a hash table
to do as much work as it can in the delete function and leave the other
functions alone to keep them faster. With our tombstone approach, deletes are
fast, but lookups get penalized.</p>
<p>I did a little benchmarking to test this out in a few different deletion
scenarios. I was surprised to discover that tombstones did end up being faster
overall compared to doing all the work during deletion to reinsert the affected
entries.</p>
<p>But if you think about it, it&rsquo;s not that the tombstone approach pushes the work
of fully deleting an entry to other operations, it&rsquo;s more that it makes deleting
<em>lazy</em>. At first, it does the minimal work to turn the entry into a tombstone.
That can cause a penalty when later lookups have to skip over it. But it also
allows that tombstone bucket to be reused by a later insert too. That reuse is a
very efficient way to avoid the cost of rearranging all of the following
affected entries. You basically recycle a node in the chain of probed entries.
It&rsquo;s a neat trick.</p>
<h2><a href="#string-interning" id="string-interning"><small>20&#8202;.&#8202;5</small>String Interning</a></h2>
<p>We&rsquo;ve got ourselves a hash table that mostly works, though it has a critical
flaw in its center. Also, we aren&rsquo;t using it for anything yet. It&rsquo;s time to
address both of those and, in the process, learn a classic technique used by
interpreters.</p>
<p>The reason the hash table doesn&rsquo;t totally work is that when <code>findEntry()</code> checks
to see if an existing key matches the one it&rsquo;s looking for, it uses <code>==</code> to
compare two strings for equality. That only returns true if the two keys are the
exact same string in memory. Two separate strings with the same characters
should be considered equal, but aren&rsquo;t.</p>
<p>Remember, back when we added strings in the last chapter, we added <a href="strings.html#operations-on-strings">explicit
support to compare the strings character-by-character</a> in order to get
true value equality. We could do that in <code>findEntry()</code>, but that&rsquo;s <span
name="hash-collision">slow</span>.</p>
<aside name="hash-collision">
<p>In practice, we would first compare the hash codes of the two strings. That
quickly detects almost all different strings<span class="em">&mdash;</span>it wouldn&rsquo;t be a very good hash
function if it didn&rsquo;t. But when the two hashes are the same, we still have to
compare characters to make sure we didn&rsquo;t have a hash collision on different
strings.</p>
</aside>
<p>Instead, we&rsquo;ll use a technique called <strong>string interning</strong>. The core problem is
that it&rsquo;s possible to have different strings in memory with the same characters.
Those need to behave like equivalent values even though they are distinct
objects. They&rsquo;re essentially duplicates, and we have to compare all of their
bytes to detect that.</p>
<p><span name="intern">String interning</span> is a process of deduplication. We
create a collection of &ldquo;interned&rdquo; strings. Any string in that collection is
guaranteed to be textually distinct from all others. When you intern a string,
you look for a matching string in the collection. If found, you use that
original one. Otherwise, the string you have is unique, so you add it to the
collection.</p>
<aside name="intern">
<p>I&rsquo;m guessing &ldquo;intern&rdquo; is short for &ldquo;internal&rdquo;. I think the idea is that the
language&rsquo;s runtime keeps its own &ldquo;internal&rdquo; collection of these strings, whereas
other strings could be user created and floating around in memory. When you
intern a string, you ask the runtime to add the string to that internal
collection and return a pointer to it.</p>
<p>Languages vary in how much string interning they do and how it&rsquo;s exposed to the
user. Lua interns <em>all</em> strings, which is what clox will do too. Lisp, Scheme,
Smalltalk, Ruby and others have a separate string-like type called &ldquo;symbol&rdquo; that
is implicitly interned. (This is why they say symbols are &ldquo;faster&rdquo; in Ruby.)
Java interns constant strings by default, and provides an API to let you
explicitly intern any string you give it.</p>
</aside>
<p>In this way, you know that each sequence of characters is represented by only
one string in memory. This makes value equality trivial. If two strings point
to the same address in memory, they are obviously the same string and must be
equal. And, because we know strings are unique, if two strings point to
different addresses, they must be distinct strings.</p>
<p>Thus, pointer equality exactly matches value equality. Which in turn means that
our existing <code>==</code> in <code>findEntry()</code> does the right thing. Or, at least, it will
once we intern all the strings. In order to reliably deduplicate all strings,
the VM needs to be able to find every string that&rsquo;s created. We do that by
giving it a hash table to store them all.</p>
<div class="codehilite"><pre class="insert-before">  Value* stackTop;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">Table</span> <span class="i">strings</span>;
</pre><pre class="insert-after">  Obj* objects;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>As usual, we need an include.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;chunk.h&quot;
</pre><div class="source-file"><em>vm.h</em></div>
<pre class="insert"><span class="a">#include &quot;table.h&quot;</span>
</pre><pre class="insert-after">#include &quot;value.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em></div>

<p>When we spin up a new VM, the string table is empty.</p>
<div class="codehilite"><pre class="insert-before">  vm.objects = NULL;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">  <span class="i">initTable</span>(&amp;<span class="i">vm</span>.<span class="i">strings</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>And when we shut down the VM, we clean up any resources used by the table.</p>
<div class="codehilite"><pre class="insert-before">void freeVM() {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>freeVM</em>()</div>
<pre class="insert">  <span class="i">freeTable</span>(&amp;<span class="i">vm</span>.<span class="i">strings</span>);
</pre><pre class="insert-after">  freeObjects();
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>freeVM</em>()</div>

<p>Some languages have a separate type or an explicit step to intern a string. For
clox, we&rsquo;ll automatically intern every one. That means whenever we create a new
unique string, we add it to the table.</p>
<div class="codehilite"><pre class="insert-before">  string-&gt;hash = hash;
</pre><div class="source-file"><em>object.c</em><br>
in <em>allocateString</em>()</div>
<pre class="insert">  <span class="i">tableSet</span>(&amp;<span class="i">vm</span>.<span class="i">strings</span>, <span class="i">string</span>, <span class="a">NIL_VAL</span>);
</pre><pre class="insert-after">  return string;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>allocateString</em>()</div>

<p>We&rsquo;re using the table more like a hash <em>set</em> than a hash <em>table</em>. The keys are
the strings and those are all we care about, so we just use <code>nil</code> for the
values.</p>
<p>This gets a string into the table assuming that it&rsquo;s unique, but we need to
actually check for duplication before we get here. We do that in the two
higher-level functions that call <code>allocateString()</code>. Here&rsquo;s one:</p>
<div class="codehilite"><pre class="insert-before">  uint32_t hash = hashString(chars, length);
</pre><div class="source-file"><em>object.c</em><br>
in <em>copyString</em>()</div>
<pre class="insert">  <span class="t">ObjString</span>* <span class="i">interned</span> = <span class="i">tableFindString</span>(&amp;<span class="i">vm</span>.<span class="i">strings</span>, <span class="i">chars</span>, <span class="i">length</span>,
                                        <span class="i">hash</span>);
  <span class="k">if</span> (<span class="i">interned</span> != <span class="a">NULL</span>) <span class="k">return</span> <span class="i">interned</span>;

</pre><pre class="insert-after">  char* heapChars = ALLOCATE(char, length + 1);
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>copyString</em>()</div>

<p>When copying a string into a new LoxString, we look it up in the string table
first. If we find it, instead of &ldquo;copying&rdquo;, we just return a reference to that
string. Otherwise, we fall through, allocate a new string, and store it in the
string table.</p>
<p>Taking ownership of a string is a little different.</p>
<div class="codehilite"><pre class="insert-before">  uint32_t hash = hashString(chars, length);
</pre><div class="source-file"><em>object.c</em><br>
in <em>takeString</em>()</div>
<pre class="insert">  <span class="t">ObjString</span>* <span class="i">interned</span> = <span class="i">tableFindString</span>(&amp;<span class="i">vm</span>.<span class="i">strings</span>, <span class="i">chars</span>, <span class="i">length</span>,
                                        <span class="i">hash</span>);
  <span class="k">if</span> (<span class="i">interned</span> != <span class="a">NULL</span>) {
    <span class="a">FREE_ARRAY</span>(<span class="t">char</span>, <span class="i">chars</span>, <span class="i">length</span> + <span class="n">1</span>);
    <span class="k">return</span> <span class="i">interned</span>;
  }

</pre><pre class="insert-after">  return allocateString(chars, length, hash);
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>takeString</em>()</div>

<p>Again, we look up the string in the string table first. If we find it, before we
return it, we free the memory for the string that was passed in. Since ownership
is being passed to this function and we no longer need the duplicate string,
it&rsquo;s up to us to free it.</p>
<p>Before we get to the new function we need to write, there&rsquo;s one more include.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;object.h&quot;
</pre><div class="source-file"><em>object.c</em></div>
<pre class="insert"><span class="a">#include &quot;table.h&quot;</span>
</pre><pre class="insert-after">#include &quot;value.h&quot;
</pre></div>
<div class="source-file-narrow"><em>object.c</em></div>

<p>To look for a string in the table, we can&rsquo;t use the normal <code>tableGet()</code> function
because that calls <code>findEntry()</code>, which has the exact problem with duplicate
strings that we&rsquo;re trying to fix right now. Instead, we use this new function:</p>
<div class="codehilite"><pre class="insert-before">void tableAddAll(Table* from, Table* to);
</pre><div class="source-file"><em>table.h</em><br>
add after <em>tableAddAll</em>()</div>
<pre class="insert"><span class="t">ObjString</span>* <span class="i">tableFindString</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="k">const</span> <span class="t">char</span>* <span class="i">chars</span>,
                           <span class="t">int</span> <span class="i">length</span>, <span class="t">uint32_t</span> <span class="i">hash</span>);
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>table.h</em>, add after <em>tableAddAll</em>()</div>

<p>The implementation looks like so:</p>
<div class="codehilite"><div class="source-file"><em>table.c</em><br>
add after <em>tableAddAll</em>()</div>
<pre><span class="t">ObjString</span>* <span class="i">tableFindString</span>(<span class="t">Table</span>* <span class="i">table</span>, <span class="k">const</span> <span class="t">char</span>* <span class="i">chars</span>,
                           <span class="t">int</span> <span class="i">length</span>, <span class="t">uint32_t</span> <span class="i">hash</span>) {
  <span class="k">if</span> (<span class="i">table</span>-&gt;<span class="i">count</span> == <span class="n">0</span>) <span class="k">return</span> <span class="a">NULL</span>;

  <span class="t">uint32_t</span> <span class="i">index</span> = <span class="i">hash</span> % <span class="i">table</span>-&gt;<span class="i">capacity</span>;
  <span class="k">for</span> (;;) {
    <span class="t">Entry</span>* <span class="i">entry</span> = &amp;<span class="i">table</span>-&gt;<span class="i">entries</span>[<span class="i">index</span>];
    <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>) {
      <span class="c">// Stop if we find an empty non-tombstone entry.</span>
      <span class="k">if</span> (<span class="a">IS_NIL</span>(<span class="i">entry</span>-&gt;<span class="i">value</span>)) <span class="k">return</span> <span class="a">NULL</span>;
    } <span class="k">else</span> <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span>-&gt;<span class="i">length</span> == <span class="i">length</span> &amp;&amp;
        <span class="i">entry</span>-&gt;<span class="i">key</span>-&gt;<span class="i">hash</span> == <span class="i">hash</span> &amp;&amp;
        <span class="i">memcmp</span>(<span class="i">entry</span>-&gt;<span class="i">key</span>-&gt;<span class="i">chars</span>, <span class="i">chars</span>, <span class="i">length</span>) == <span class="n">0</span>) {
      <span class="c">// We found it.</span>
      <span class="k">return</span> <span class="i">entry</span>-&gt;<span class="i">key</span>;
    }

    <span class="i">index</span> = (<span class="i">index</span> + <span class="n">1</span>) % <span class="i">table</span>-&gt;<span class="i">capacity</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, add after <em>tableAddAll</em>()</div>

<p>It appears we have copy-pasted <code>findEntry()</code>. There is a lot of redundancy, but
also a couple of key differences. First, we pass in the raw character array of
the key we&rsquo;re looking for instead of an ObjString. At the point that we call
this, we haven&rsquo;t created an ObjString yet.</p>
<p>Second, when checking to see if we found the key, we look at the actual strings.
We first see if they have matching lengths and hashes. Those are quick to check
and if they aren&rsquo;t equal, the strings definitely aren&rsquo;t the same.</p>
<p>If there is a hash collision, we do an actual character-by-character string
comparison. This is the one place in the VM where we actually test strings for
textual equality. We do it here to deduplicate strings and then the rest of the
VM can take for granted that any two strings at different addresses in memory
must have different contents.</p>
<p>In fact, now that we&rsquo;ve interned all the strings, we can take advantage of it in
the bytecode interpreter. When a user does <code>==</code> on two objects that happen to be
strings, we don&rsquo;t need to test the characters any more.</p>
<div class="codehilite"><pre class="insert-before">    case VAL_NUMBER: return AS_NUMBER(a) == AS_NUMBER(b);
</pre><div class="source-file"><em>value.c</em><br>
in <em>valuesEqual</em>()<br>
replace 7 lines</div>
<pre class="insert">    <span class="k">case</span> <span class="a">VAL_OBJ</span>:    <span class="k">return</span> <span class="a">AS_OBJ</span>(<span class="i">a</span>) == <span class="a">AS_OBJ</span>(<span class="i">b</span>);
</pre><pre class="insert-after">    default:         return false; // Unreachable.
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>valuesEqual</em>(), replace 7 lines</div>

<p>We&rsquo;ve added a little overhead when creating strings to intern them. But in
return, at runtime, the equality operator on strings is much faster. With that,
we have a full-featured hash table ready for us to use for tracking variables,
instances, or any other key-value pairs that might show up.</p>
<p>We also sped up testing strings for equality. This is nice for when the user
does <code>==</code> on strings. But it&rsquo;s even more critical in a dynamically typed
language like Lox where method calls and instance fields are looked up by name
at runtime. If testing a string for equality is slow, then that means looking up
a method by name is slow. And if <em>that&rsquo;s</em> slow in your object-oriented language,
then <em>everything</em> is slow.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>In clox, we happen to only need keys that are strings, so the hash table we
built is hardcoded for that key type. If we exposed hash tables to Lox users
as a first-class collection, it would be useful to support different kinds
of keys.</p>
<p>Add support for keys of the other primitive types: numbers, Booleans, and
<code>nil</code>. Later, clox will support user-defined classes. If we want to support
keys that are instances of those classes, what kind of complexity does that
add?</p>
</li>
<li>
<p>Hash tables have a lot of knobs you can tweak that affect their performance.
You decide whether to use separate chaining or open addressing. Depending on
which fork in that road you take, you can tune how many entries are stored
in each node, or the probing strategy you use. You control the hash
function, load factor, and growth rate.</p>
<p>All of this variety wasn&rsquo;t created just to give CS doctoral candidates
something to <span name="publish">publish</span> theses on: each has its
uses in the many varied domains and hardware scenarios where hashing comes
into play. Look up a few hash table implementations in different open source
systems, research the choices they made, and try to figure out why they did
things that way.</p>
<aside name="publish">
<p>Well, at least that wasn&rsquo;t the <em>only</em> reason they were created. Whether that
was the <em>main</em> reason is up for debate.</p>
</aside></li>
<li>
<p>Benchmarking a hash table is notoriously difficult. A hash table
implementation may perform well with some keysets and poorly with others. It
may work well at small sizes but degrade as it grows, or vice versa. It may
choke when deletions are common, but fly when they aren&rsquo;t. Creating
benchmarks that accurately represent how your users will use the hash table
is a challenge.</p>
<p>Write a handful of different benchmark programs to validate our hash table
implementation. How does the performance vary between them? Why did you
choose the specific test cases you chose?</p>
</li>
</ol>
</div>

<footer>
<a href="global-variables.html" class="next">
  Next Chapter: &ldquo;Global Variables&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/index.css
================================================
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-roman.woff") format("woff");
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-italic.woff") format("woff");
  font-style: italic;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-semibold.woff") format("woff");
  font-weight: 600;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-semibolditalic.woff") format("woff");
  font-style: italic;
  font-weight: 600;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-bold.woff") format("woff");
  font-weight: bold;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-bolditalic.woff") format("woff");
  font-style: italic;
  font-weight: bold;
}
body, h1, h2, h3, h4, p, blockquote, code, ul, ol, dl, dd, img {
  margin: 0;
}

img {
  outline: none;
}

img.arrow {
  width: auto;
  height: 11px;
}

img.dot {
  width: auto;
  height: 18px;
  vertical-align: text-bottom;
}

body {
  color: #222;
  font: normal 16px/24px "Crimson", Georgia, serif;
}

.sign-up {
  padding: 12px;
  margin: 24px 0 24px 0;
  background: #fcf6e8;
  color: #bf9540;
  border-radius: 3px;
}
.sign-up form {
  display: flex;
}
.sign-up input {
  padding: 4px;
  font: 16px "Source Sans Pro", sans-serif;
  outline: none;
  border-radius: 3px;
  border: solid 2px #ffd580;
  color: #825e17;
  height: 32px;
}
.sign-up input.email {
  display: block;
  box-sizing: border-box;
  width: 100%;
}
.sign-up input.button {
  margin-left: 8px;
  padding: 4px 8px;
  font: 600 13px "Source Sans Pro", sans-serif;
  text-transform: uppercase;
  letter-spacing: 1px;
  background: #ffbb33;
  border: none;
  transition: background-color 0.2s ease;
}
.sign-up input.button:hover {
  background: #ffd580;
}
.sign-up input:focus {
  border-color: #ffaa00;
}

body, h1, h2, h3, h4, p, blockquote, code, ul, ol, dl, dd, img {
  margin: 0;
}

body {
  background: #29313d url("image/background.png") top center/100% auto no-repeat;
  color: #222;
  font: normal 16px/24px "Crimson", Georgia, serif;
}

a {
  color: #1481b8;
  text-decoration: none;
  border-bottom: solid 1px rgba(222, 233, 237, 0);
  transition: color 0.2s ease, border-color 0.4s ease;
}

a:hover {
  color: #1481b8;
  border-bottom: solid 1px #dee9ed;
}

article {
  margin: 0 auto;
  padding: 0 0 12px 0;
  max-width: 960px;
  background: #fff;
}

header {
  margin: 0 0 48px 0;
  color: #595959;
  background: #f5f3f0;
  border-bottom: solid 1px #dad8d6;
}

main {
  margin: 0 48px;
}

img.header {
  display: block;
  width: 100%;
}

img.small {
  display: none;
}

div.intro {
  display: flex;
}
div.intro blockquote {
  flex-basis: 40%;
  margin: 0 48px 0 0;
  font: italic 28px/42px "Crimson", Georgia, serif;
}
div.intro div.text {
  flex-basis: 60%;
  margin: 8px 0 24px 0;
}

p + p {
  margin-top: 24px;
}

.format {
  margin: 0 -12px 24px -12px;
  padding: 12px 12px 8px 12px;
  height: 244px;
  box-sizing: border-box;
  background: #eef4f7;
  background-size: cover;
  background-position: left;
  color: #444;
  border-radius: 3px;
  font: normal 16px/24px "Source Sans Pro", sans-serif;
}
.format h3 {
  margin: 0;
  padding: 0 0 4px 0;
  font: 600 16px/24px "Source Sans Pro", sans-serif;
  text-transform: uppercase;
  letter-spacing: 1px;
}
.format p {
  margin-bottom: 8px;
}

.format.print, .format.pdf {
  background-position: right;
  text-align: right;
}

.format-info {
  display: inline-block;
  width: 384px;
  text-align: left;
}
.format-info table {
  width: 100%;
  border-collapse: collapse;
}
.format-info table td + td {
  padding-left: 5px;
}

.format.print {
  background-image: url("image/format-print.jpg");
}

.format.ebook {
  background-image: url("image/format-ebook.jpg");
}

.format.pdf {
  background-image: url("image/format-pdf.jpg");
}

.format.web {
  background-image: url("image/format-web.jpg");
}

a.action {
  display: block;
  margin: 0 0 4px 0;
  padding: 4px 0;
  text-align: center;
  border-radius: 3px;
  background: #1481b8;
  transition: background-color 0.2s ease, color 0.2s ease;
  font: 400 17px/24px "Source Sans Pro", sans-serif;
  color: white;
}
a.action small {
  font-size: 14px;
  padding: 4px;
  color: rgba(255, 255, 255, 0.7);
  transition: color 0.2s ease;
}

a.action:hover {
  background-color: #2badee;
}
a.action:hover small {
  color: white;
}

h3 {
  font: italic 24px/24px "Crimson", Georgia, serif;
  margin: 12px 0;
}

img.author {
  float: left;
  width: 240px;
  margin: 0 12px 0 -12px;
  padding: 12px;
  background: #f5f3f0;
  border-radius: 3px;
}

div.author {
  vertical-align: top;
  margin: 36px 0 0 288px;
}

footer {
  position: relative;
  border-top: solid 1px #dee9ed;
  color: #7aa0b8;
  font: 400 15px "Source Sans Pro", sans-serif;
  text-align: center;
  margin: 12px 0 36px 0;
  padding-top: 48px;
}
footer a, footer a:hover {
  border: none;
}

@media only screen and (max-width: 700px) {
  main {
    margin: 0 24px;
  }

  header {
    margin-bottom: 24px;
  }

  img.big {
    display: none;
  }

  img.small {
    display: block;
  }

  div.intro {
    display: block;
  }
  div.intro blockquote {
    display: block;
    font: italic 24px/36px "Crimson", Georgia, serif;
  }
  div.intro div.text {
    display: block;
    margin: 24px 0 24px 0;
  }

  .format {
    margin-bottom: 12px;
    height: auto;
    background-blend-mode: lighten;
  }

  .format-info {
    display: block;
    width: 100%;
  }

  .format.print {
    background-color: #a6a29f;
  }

  .format.ebook {
    background-color: #97a2aa;
  }

  .format.pdf {
    background-color: #cfccca;
  }

  .format.web {
    background-color: #d6dbd3;
  }

  img.author {
    float: none;
  }

  div.author {
    margin: 0 0 0 0;
  }
}

================================================
FILE: site/index.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Crafting Interpreters</title>
<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="index.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>
<link rel="icon" type="image/png" href="image/favicon.png" />

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body>

<article>

<header>
  <a href="dedication.html"><img class="header big" src="image/header.png" alt="Crafting Interpreters by Robert Nystrom" /><img class="header small" src="image/header-small.png" alt="Crafting Interpreters by Robert Nystrom" /></a>
</header>

<main>

<div class="intro">

<blockquote><p>Ever wanted to make your own programming language or wondered how
they are designed and built?</p><p>If so, this book is for you.</p></blockquote>

<div class="text">

<p><em>Crafting Interpreters</em> contains everything you need to implement a
full-featured, efficient scripting language. You&#8217;ll learn both high-level
concepts around parsing and semantics and gritty details like bytecode
representation and garbage collection. Your brain will light up with new ideas,
and your hands will get dirty and calloused. It&#8217;s a blast.</p>

<p>Starting from <code>main()</code>, you build a language that features rich
syntax, dynamic typing, garbage collection, lexical scope, first-class
functions, closures, classes, and inheritance. All packed into a few thousand
lines of clean, fast code that you thoroughly understand because you write each
one yourself.</p>

<p>The book is available in four delectable formats:</p>

</div>

</div>

<div class="format print">
  <div class="format-info">
    <h3>Print</h3>
    <p>640 pages of beautiful typography and high resolution hand-drawn
    illustrations. Each page lovingly typeset by the author. The premiere reading
    experience.</p>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.amazon.com/dp/0990582930" target="_blank">Amazon<small>.com</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.ca/dp/0990582930" target="_blank"><small>.ca</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.uk/dp/0990582930" target="_blank"><small>.uk</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.au/dp/0990582930" target="_blank"><small>.au</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.de/dp/0990582930" target="_blank"><small>.de</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.fr/dp/0990582930" target="_blank"><small>.fr</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.es/dp/0990582930" target="_blank"><small>.es</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.it/dp/0990582930" target="_blank"><small>.it</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.jp/dp/0990582930" target="_blank"><small>.jp</small></a>
    </td>
    </tr>
    </table>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.barnesandnoble.com/w/crafting-interpreters-robert-nystrom/1139915245?ean=9780990582939" target="_blank">Barnes and Noble</a>
    </td>
    <td>
      <a class="action" href="https://www.bookdepository.com/Crafting-Interpreters-Robert-Nystrom/9780990582939" target="_blank">Book Depository</a>
    </td>
    </tr>
    </table>
    <a class="action" href="/sample.pdf" target="_blank">Download Sample <small>PDF</small></a>
  </div>
</div>
<div class="format ebook">
  <div class="format-info">
    <h3>eBook</h3>
    <p>Carefully tuned CSS fits itself to your ebook reader and screen size.
    Full-color syntax highlighting and live hyperlinks. Like Alan Kay's Dynabook
    but real.</p>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.amazon.com/dp/B09BCCVLCL" target="_blank">Kindle <small class="hide-small"><span class="hide-medium">Amazon</span>.com</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.uk/dp/B09BCCVLCL" target="_blank"><small>.uk</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.ca/dp/B09BCCVLCL" target="_blank"><small>.ca</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.au/dp/B09BCCVLCL" target="_blank"><small>.au</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.de/dp/B09BCCVLCL" target="_blank"><small>.de</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.in/dp/B09BCCVLCL" target="_blank"><small>.in</small></a>
    </td>
    </tr>
    </table>
    <table>
    <tr>
    <td>
      <a class="action" href="https://www.amazon.fr/dp/B09BCCVLCL" target="_blank"><small>.fr</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.es/dp/B09BCCVLCL" target="_blank"><small>.es</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.it/dp/B09BCCVLCL" target="_blank"><small>.it</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.co.jp/dp/B09BCCVLCL" target="_blank"><small>.jp</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.br/dp/B09BCCVLCL" target="_blank"><small>.br</small></a>
    </td>
    <td>
      <a class="action" href="https://www.amazon.com.mx/dp/B09BCCVLCL" target="_blank"><small>.mx</small></a>
    </td>
    <td>
      <a class="action" href="https://books.apple.com/us/book/crafting-interpreters/id1578795812" target="_blank">Apple Books</a>
    </td>
    </tr>
    </table>
    <table>
    <tr>
    <td>
      <a class="action" href="https://play.google.com/store/books/details?id=q0c6EAAAQBAJ" target="_blank">Play Books <small class="hide-small">Google</small></a>
    </td>
    <td>
      <a class="action" href="https://www.barnesandnoble.com/w/crafting-interpreters-robert-nystrom/1139915245?ean=2940164977092" target="_blank">Nook <small class="hide-small">B&amp;N</small></a>
    </td>
    <td>
      <a class="action" href="https://www.smashwords.com/books/view/1096463" target="_blank">EPUB <small class="hide-small">Smashwords</small></a>
    </td>
    </tr>
    </table>
  </div>
</div>
<div class="format pdf">
  <div class="format-info">
    <h3>PDF</h3>
    <p>Perfectly mirrors the hand-crafted typesetting and sharp illustrations of
    the print book, but much easier to carry around.</p>
    <a class="action" href="https://payhip.com/b/F0zkr" target="_blank">Buy from Payhip</a>
    <a class="action" href="/sample.pdf" target="_blank">Download Free Sample</a>
  </div>
</div>
<div class="format web">
  <div class="format-info">
    <h3>Web</h3>
    <p>Meticulous responsive design looks great from your desktop down to your
    phone. Every chapter, aside, and illustration is there. Read the whole book
    for free. Really.</p>
    <a class="action" href="contents.html">Read Now</a>
  </div>
</div>

<img src="image/dogshot.jpg" class="author" />

<div class="author">
<h3>About Robert Nystrom</h3>

<p>I got bitten by the language bug years ago while on paternity leave between
midnight feedings. I cobbled together a <a href="http://wren.io/"
target="_blank">number</a> <a href="http://magpie-lang.org/"
target="_blank">of</a> <a href="http://finch.stuffwithstuff.com/"
target="_blank">hobby</a> <a href="https://github.com/munificent/vigil"
target="_blank">languages</a> before worming my way into an honest-to-God,
full-time programming language job. Today, I work at Google on the <a
href="http://dart.dev/" target="_blank">Dart language</a>.</p>

<p>Before I fell in love with languages, I developed games at Electronic Arts
for eight years. I wrote the best-selling book <em><a
href="http://gameprogrammingpatterns.com/" target="_blank">Game Programming
Patterns</a></em> based on what I learned there. You can read that book for free
too.</p>

<p>If you want more, you can find me on Twitter (<a
href="https://twitter.com/intent/user?screen_name=munificentbob"
target="_blank"><code>@munificentbob</code></a>), email me at <code>bob</code>
at this site's domain (though I am slow to respond), read <a
href="http://journal.stuffwithstuff.com/" target="_blank">my blog</a>, or join
my low frequency mailing list:</p>

<div class="sign-up">
  <!-- Begin MailChimp Signup Form -->
  <div id="mc_embed_signup">
  <form action="//gameprogrammingpatterns.us7.list-manage.com/subscribe/post?u=0952ca43ed2536d6717766b88&amp;id=6e96334109" method="post" id="mc-embedded-subscribe-form" name="mc-embedded-subscribe-form" class="validate" target="_blank" novalidate>
    <input type="email" value="" name="EMAIL" class="email" id="mce-EMAIL" placeholder="Your email address" required>
    <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups -->
    <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_0952ca43ed2536d6717766b88_6e96334109" tabindex="-1" value=""></div>
    <input type="submit" value="Sign me up!" name="subscribe" id="mc-embedded-subscribe" class="button">
  </form>
  </div>
  <!--End mc_embed_signup -->
</div>

</div>

<footer>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</main>
</article>
</body>
</html>


================================================
FILE: site/inheritance.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Inheritance &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Inheritance<small>13</small></a></h3>

<ul>
    <li><a href="#superclasses-and-subclasses"><small>13.1</small> Superclasses and Subclasses</a></li>
    <li><a href="#inheriting-methods"><small>13.2</small> Inheriting Methods</a></li>
    <li><a href="#calling-superclass-methods"><small>13.3</small> Calling Superclass Methods</a></li>
    <li><a href="#conclusion"><small>13.4</small> Conclusion</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="classes.html" title="Classes" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="classes.html" title="Classes" class="prev">←</a>
<a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Inheritance<small>13</small></a></h3>

<ul>
    <li><a href="#superclasses-and-subclasses"><small>13.1</small> Superclasses and Subclasses</a></li>
    <li><a href="#inheriting-methods"><small>13.2</small> Inheriting Methods</a></li>
    <li><a href="#calling-superclass-methods"><small>13.3</small> Calling Superclass Methods</a></li>
    <li><a href="#conclusion"><small>13.4</small> Conclusion</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="classes.html" title="Classes" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">13</div>
  <h1>Inheritance</h1>

<blockquote>
<p>Once we were blobs in the sea, and then fishes, and then lizards and rats and
then monkeys, and hundreds of things in between. This hand was once a fin,
this hand once had claws! In my human mouth I have the pointy teeth of a wolf
and the chisel teeth of a rabbit and the grinding teeth of a cow! Our blood is
as salty as the sea we used to live in! When we&rsquo;re frightened, the hair on our
skin stands up, just like it did when we had fur. We are history! Everything
we&rsquo;ve ever been on the way to becoming us, we still are.</p>
<p><cite>Terry Pratchett, <em>A Hat Full of Sky</em></cite></p>
</blockquote>
<p>Can you believe it? We&rsquo;ve reached the last chapter of <a href="a-tree-walk-interpreter.html">Part II</a>. We&rsquo;re almost
done with our first Lox interpreter. The <a href="classes.html">previous chapter</a> was a big ball of
intertwined object-orientation features. I couldn&rsquo;t separate those from each
other, but I did manage to untangle one piece. In this chapter, we&rsquo;ll finish
off Lox&rsquo;s class support by adding inheritance.</p>
<p>Inheritance appears in object-oriented languages all the way back to the <span
name="inherited">first</span> one, <a href="https://en.wikipedia.org/wiki/Simula">Simula</a>. Early on, Kristen Nygaard and
Ole-Johan Dahl noticed commonalities across classes in the simulation programs
they wrote. Inheritance gave them a way to reuse the code for those similar
parts.</p>
<aside name="inherited">
<p>You could say all those other languages <em>inherited</em> it from Simula. Hey-ooo!
I&rsquo;ll, uh, see myself out.</p>
</aside>
<h2><a href="#superclasses-and-subclasses" id="superclasses-and-subclasses"><small>13&#8202;.&#8202;1</small>Superclasses and Subclasses</a></h2>
<p>Given that the concept is &ldquo;inheritance&rdquo;, you would hope they would pick a
consistent metaphor and call them &ldquo;parent&rdquo; and &ldquo;child&rdquo; classes, but that would
be too easy. Way back when, C. A. R. Hoare coined the term &ldquo;<span
name="subclass">subclass</span>&rdquo; to refer to a record type that refines another
type. Simula borrowed that term to refer to a <em>class</em> that inherits from
another. I don&rsquo;t think it was until Smalltalk came along that someone flipped
the Latin prefix to get &ldquo;superclass&rdquo; to refer to the other side of the
relationship. From C++, you also hear &ldquo;base&rdquo; and &ldquo;derived&rdquo; classes. I&rsquo;ll mostly
stick with &ldquo;superclass&rdquo; and &ldquo;subclass&rdquo;.</p>
<aside name="subclass">
<p>&ldquo;Super-&rdquo; and &ldquo;sub-&rdquo; mean &ldquo;above&rdquo; and &ldquo;below&rdquo; in Latin, respectively. Picture an
inheritance tree like a family tree with the root at the top<span class="em">&mdash;</span>subclasses are
below their superclasses on the diagram. More generally, &ldquo;sub-&rdquo; refers to things
that refine or are contained by some more general concept. In zoology, a
subclass is a finer categorization of a larger class of living things.</p>
<p>In set theory, a subset is contained by a larger superset which has all of the
elements of the subset and possibly more. Set theory and programming languages
meet each other in type theory. There, you have &ldquo;supertypes&rdquo; and &ldquo;subtypes&rdquo;.</p>
<p>In statically typed object-oriented languages, a subclass is also often a
subtype of its superclass. Say we have a Doughnut superclass and a BostonCream
subclass. Every BostonCream is also an instance of Doughnut, but there may be
doughnut objects that are not BostonCreams (like Crullers).</p>
<p>Think of a type as the set of all values of that type. The set of all Doughnut
instances contains the set of all BostonCream instances since every BostonCream
is also a Doughnut. So BostonCream is a subclass, and a subtype, and its
instances are a subset. It all lines up.</p><img src="image/inheritance/doughnuts.png" alt="Boston cream &lt;: doughnut." />
</aside>
<p>Our first step towards supporting inheritance in Lox is a way to specify a
superclass when declaring a class. There&rsquo;s a lot of variety in syntax for this.
C++ and C# place a <code>:</code> after the subclass&rsquo;s name, followed by the superclass
name. Java uses <code>extends</code> instead of the colon. Python puts the superclass(es)
in parentheses after the class name. Simula puts the superclass&rsquo;s name <em>before</em>
the <code>class</code> keyword.</p>
<p>This late in the game, I&rsquo;d rather not add a new reserved word or token to the
lexer. We don&rsquo;t have <code>extends</code> or even <code>:</code>, so we&rsquo;ll follow Ruby and use a
less-than sign (<code>&lt;</code>).</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Doughnut</span> {
  <span class="c">// General doughnut stuff...</span>
}

<span class="k">class</span> <span class="t">BostonCream</span> &lt; <span class="t">Doughnut</span> {
  <span class="c">// Boston Cream-specific stuff...</span>
}
</pre></div>
<p>To work this into the grammar, we add a new optional clause in our existing
<code>classDecl</code> rule.</p>
<div class="codehilite"><pre><span class="i">classDecl</span>      → <span class="s">&quot;class&quot;</span> <span class="t">IDENTIFIER</span> ( <span class="s">&quot;&lt;&quot;</span> <span class="t">IDENTIFIER</span> )?
                 <span class="s">&quot;{&quot;</span> <span class="i">function</span>* <span class="s">&quot;}&quot;</span> ;
</pre></div>
<p>After the class name, you can have a <code>&lt;</code> followed by the superclass&rsquo;s name. The
superclass clause is optional because you don&rsquo;t <em>have</em> to have a superclass.
Unlike some other object-oriented languages like Java, Lox has no root &ldquo;Object&rdquo;
class that everything inherits from, so when you omit the superclass clause, the
class has <em>no</em> superclass, not even an implicit one.</p>
<p>We want to capture this new syntax in the class declaration&rsquo;s AST node.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Block      : List&lt;Stmt&gt; statements&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()<br>
replace 1 line</div>
<pre class="insert">      <span class="s">&quot;Class      : Token name, Expr.Variable superclass,&quot;</span> +
                  <span class="s">&quot; List&lt;Stmt.Function&gt; methods&quot;</span>,
</pre><pre class="insert-after">      &quot;Expression : Expr expression&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>(), replace 1 line</div>

<p>You might be surprised that we store the superclass name as an Expr.Variable,
not a Token. The grammar restricts the superclass clause to a single identifier,
but at runtime, that identifier is evaluated as a variable access. Wrapping the
name in an Expr.Variable early on in the parser gives us an object that the
resolver can hang the resolution information off of.</p>
<p>The new parser code follows the grammar directly.</p>
<div class="codehilite"><pre class="insert-before">    Token name = consume(IDENTIFIER, &quot;Expect class name.&quot;);
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">

    <span class="t">Expr</span>.<span class="t">Variable</span> <span class="i">superclass</span> = <span class="k">null</span>;
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">LESS</span>)) {
      <span class="i">consume</span>(<span class="i">IDENTIFIER</span>, <span class="s">&quot;Expect superclass name.&quot;</span>);
      <span class="i">superclass</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Variable</span>(<span class="i">previous</span>());
    }

</pre><pre class="insert-after">    consume(LEFT_BRACE, &quot;Expect '{' before class body.&quot;);
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>classDeclaration</em>()</div>

<p>Once we&rsquo;ve (possibly) parsed a superclass declaration, we store it in the AST.</p>
<div class="codehilite"><pre class="insert-before">    consume(RIGHT_BRACE, &quot;Expect '}' after class body.&quot;);

</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>classDeclaration</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Class</span>(<span class="i">name</span>, <span class="i">superclass</span>, <span class="i">methods</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>classDeclaration</em>(), replace 1 line</div>

<p>If we didn&rsquo;t parse a superclass clause, the superclass expression will be
<code>null</code>. We&rsquo;ll have to make sure the later passes check for that. The first of
those is the resolver.</p>
<div class="codehilite"><pre class="insert-before">    define(stmt.name);
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">superclass</span> != <span class="k">null</span>) {
      <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">superclass</span>);
    }
</pre><pre class="insert-after">

    beginScope();
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>The class declaration AST node has a new subexpression, so we traverse into and
resolve that. Since classes are usually declared at the top level, the
superclass name will most likely be a global variable, so this doesn&rsquo;t usually
do anything useful. However, Lox allows class declarations even inside blocks,
so it&rsquo;s possible the superclass name refers to a local variable. In that case,
we need to make sure it&rsquo;s resolved.</p>
<p>Because even well-intentioned programmers sometimes write weird code, there&rsquo;s a
silly edge case we need to worry about while we&rsquo;re in here. Take a look at this:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Oops</span> &lt; <span class="t">Oops</span> {}
</pre></div>
<p>There&rsquo;s no way this will do anything useful, and if we let the runtime try to
run this, it will break the expectation the interpreter has about there not
being cycles in the inheritance chain. The safest thing is to detect this case
statically and report it as an error.</p>
<div class="codehilite"><pre class="insert-before">    define(stmt.name);

</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">superclass</span> != <span class="k">null</span> &amp;&amp;
        <span class="i">stmt</span>.<span class="i">name</span>.<span class="i">lexeme</span>.<span class="i">equals</span>(<span class="i">stmt</span>.<span class="i">superclass</span>.<span class="i">name</span>.<span class="i">lexeme</span>)) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">stmt</span>.<span class="i">superclass</span>.<span class="i">name</span>,
          <span class="s">&quot;A class can&#39;t inherit from itself.&quot;</span>);
    }

</pre><pre class="insert-after">    if (stmt.superclass != null) {
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>Assuming the code resolves without error, the AST travels to the interpreter.</p>
<div class="codehilite"><pre class="insert-before">  public Void visitClassStmt(Stmt.Class stmt) {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">    <span class="t">Object</span> <span class="i">superclass</span> = <span class="k">null</span>;
    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">superclass</span> != <span class="k">null</span>) {
      <span class="i">superclass</span> = <span class="i">evaluate</span>(<span class="i">stmt</span>.<span class="i">superclass</span>);
      <span class="k">if</span> (!(<span class="i">superclass</span> <span class="k">instanceof</span> <span class="t">LoxClass</span>)) {
        <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">stmt</span>.<span class="i">superclass</span>.<span class="i">name</span>,
            <span class="s">&quot;Superclass must be a class.&quot;</span>);
      }
    }

</pre><pre class="insert-after">    environment.define(stmt.name.lexeme, null);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitClassStmt</em>()</div>

<p>If the class has a superclass expression, we evaluate it. Since that could
potentially evaluate to some other kind of object, we have to check at runtime
that the thing we want to be the superclass is actually a class. Bad things
would happen if we allowed code like:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="t">NotAClass</span> = <span class="s">&quot;I am totally not a class&quot;</span>;

<span class="k">class</span> <span class="t">Subclass</span> &lt; <span class="t">NotAClass</span> {} <span class="c">// ?!</span>
</pre></div>
<p>Assuming that check passes, we continue on. Executing a class declaration turns
the syntactic representation of a class<span class="em">&mdash;</span>its AST node<span class="em">&mdash;</span>into its runtime
representation, a LoxClass object. We need to plumb the superclass through to
that too. We pass the superclass to the constructor.</p>
<div class="codehilite"><pre class="insert-before">      methods.put(method.name.lexeme, function);
    }

</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitClassStmt</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">LoxClass</span> <span class="i">klass</span> = <span class="k">new</span> <span class="t">LoxClass</span>(<span class="i">stmt</span>.<span class="i">name</span>.<span class="i">lexeme</span>,
        (<span class="t">LoxClass</span>)<span class="i">superclass</span>, <span class="i">methods</span>);

</pre><pre class="insert-after">    environment.assign(stmt.name, klass);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitClassStmt</em>(), replace 1 line</div>

<p>The constructor stores it in a field.</p>
<div class="codehilite"><div class="source-file"><em>lox/LoxClass.java</em><br>
constructor <em>LoxClass</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">LoxClass</span>(<span class="t">String</span> <span class="i">name</span>, <span class="t">LoxClass</span> <span class="i">superclass</span>,
           <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">LoxFunction</span>&gt; <span class="i">methods</span>) {
    <span class="k">this</span>.<span class="i">superclass</span> = <span class="i">superclass</span>;
</pre><pre class="insert-after">    this.name = name;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, constructor <em>LoxClass</em>(), replace 1 line</div>

<p>Which we declare here:</p>
<div class="codehilite"><pre class="insert-before">  final String name;
</pre><div class="source-file"><em>lox/LoxClass.java</em><br>
in class <em>LoxClass</em></div>
<pre class="insert">  <span class="k">final</span> <span class="t">LoxClass</span> <span class="i">superclass</span>;
</pre><pre class="insert-after">  private final Map&lt;String, LoxFunction&gt; methods;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, in class <em>LoxClass</em></div>

<p>With that, we can define classes that are subclasses of other classes. Now, what
does having a superclass actually <em>do?</em></p>
<h2><a href="#inheriting-methods" id="inheriting-methods"><small>13&#8202;.&#8202;2</small>Inheriting Methods</a></h2>
<p>Inheriting from another class means that everything that&rsquo;s <span
name="liskov">true</span> of the superclass should be true, more or less, of the
subclass. In statically typed languages, that carries a lot of implications. The
sub<em>class</em> must also be a sub<em>type</em>, and the memory layout is controlled so that
you can pass an instance of a subclass to a function expecting a superclass and
it can still access the inherited fields correctly.</p>
<aside name="liskov">
<p>A fancier name for this hand-wavey guideline is the <a href="https://en.wikipedia.org/wiki/Liskov_substitution_principle"><em>Liskov substitution
principle</em></a>. Barbara Liskov introduced it in a keynote during the
formative period of object-oriented programming.</p>
</aside>
<p>Lox is a dynamically typed language, so our requirements are much simpler.
Basically, it means that if you can call some method on an instance of the
superclass, you should be able to call that method when given an instance of the
subclass. In other words, methods are inherited from the superclass.</p>
<p>This lines up with one of the goals of inheritance<span class="em">&mdash;</span>to give users a way to
reuse code across classes. Implementing this in our interpreter is
astonishingly easy.</p>
<div class="codehilite"><pre class="insert-before">      return methods.get(name);
    }

</pre><div class="source-file"><em>lox/LoxClass.java</em><br>
in <em>findMethod</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">superclass</span> != <span class="k">null</span>) {
      <span class="k">return</span> <span class="i">superclass</span>.<span class="i">findMethod</span>(<span class="i">name</span>);
    }

</pre><pre class="insert-after">    return null;
</pre></div>
<div class="source-file-narrow"><em>lox/LoxClass.java</em>, in <em>findMethod</em>()</div>

<p>That&rsquo;s literally all there is to it. When we are looking up a method on an
instance, if we don&rsquo;t find it on the instance&rsquo;s class, we recurse up through the
superclass chain and look there. Give it a try:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Fry until golden brown.&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">BostonCream</span> &lt; <span class="t">Doughnut</span> {}

<span class="t">BostonCream</span>().<span class="i">cook</span>();
</pre></div>
<p>There we go, half of our inheritance features are complete with only three lines
of Java code.</p>
<h2><a href="#calling-superclass-methods" id="calling-superclass-methods"><small>13&#8202;.&#8202;3</small>Calling Superclass Methods</a></h2>
<p>In <code>findMethod()</code> we look for a method on the current class <em>before</em> walking up
the superclass chain. If a method with the same name exists in both the subclass
and the superclass, the subclass one takes precedence or <strong>overrides</strong> the
superclass method. Sort of like how variables in inner scopes shadow outer ones.</p>
<p>That&rsquo;s great if the subclass wants to <em>replace</em> some superclass behavior
completely. But, in practice, subclasses often want to <em>refine</em> the superclass&rsquo;s
behavior. They want to do a little work specific to the subclass, but also
execute the original superclass behavior too.</p>
<p>However, since the subclass has overridden the method, there&rsquo;s no way to refer
to the original one. If the subclass method tries to call it by name, it will
just recursively hit its own override. We need a way to say &ldquo;Call this method,
but look for it directly on my superclass and ignore my override&rdquo;. Java uses
<code>super</code> for this, and we&rsquo;ll use that same syntax in Lox. Here is an example:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Fry until golden brown.&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">BostonCream</span> &lt; <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">super</span>.<span class="i">cook</span>();
    <span class="k">print</span> <span class="s">&quot;Pipe full of custard and coat with chocolate.&quot;</span>;
  }
}

<span class="t">BostonCream</span>().<span class="i">cook</span>();
</pre></div>
<p>If you run this, it should print:</p>
<div class="codehilite"><pre>Fry until golden brown.
Pipe full of custard and coat with chocolate.
</pre></div>
<p>We have a new expression form. The <code>super</code> keyword, followed by a dot and an
identifier, looks for a method with that name. Unlike calls on <code>this</code>, the search
starts at the superclass.</p>
<h3><a href="#syntax" id="syntax"><small>13&#8202;.&#8202;3&#8202;.&#8202;1</small>Syntax</a></h3>
<p>With <code>this</code>, the keyword works sort of like a magic variable, and the expression
is that one lone token. But with <code>super</code>, the subsequent <code>.</code> and property name
are inseparable parts of the <code>super</code> expression. You can&rsquo;t have a bare <code>super</code>
token all by itself.</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="k">super</span>; <span class="c">// Syntax error.</span>
</pre></div>
<p>So the new clause we add to the <code>primary</code> rule in our grammar includes the
property access as well.</p>
<div class="codehilite"><pre><span class="i">primary</span>        → <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span> | <span class="s">&quot;this&quot;</span>
               | <span class="t">NUMBER</span> | <span class="t">STRING</span> | <span class="t">IDENTIFIER</span> | <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span>
               | <span class="s">&quot;super&quot;</span> <span class="s">&quot;.&quot;</span> <span class="t">IDENTIFIER</span> ;
</pre></div>
<p>Typically, a <code>super</code> expression is used for a method call, but, as with regular
methods, the argument list is <em>not</em> part of the expression. Instead, a super
<em>call</em> is a super <em>access</em> followed by a function call. Like other method calls,
you can get a handle to a superclass method and invoke it separately.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">method</span> = <span class="k">super</span>.<span class="i">cook</span>;
<span class="i">method</span>();
</pre></div>
<p>So the <code>super</code> expression itself contains only the token for the <code>super</code> keyword
and the name of the method being looked up. The corresponding <span
name="super-ast">syntax tree node</span> is thus:</p>
<div class="codehilite"><pre class="insert-before">      &quot;Set      : Expr object, Token name, Expr value&quot;,
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Super    : Token keyword, Token method&quot;</span>,
</pre><pre class="insert-after">      &quot;This     : Token keyword&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="super-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#super-expression">Appendix II</a>.</p>
</aside>
<p>Following the grammar, the new parsing code goes inside our existing <code>primary()</code>
method.</p>
<div class="codehilite"><pre class="insert-before">      return new Expr.Literal(previous().literal);
    }
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>primary</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">match</span>(<span class="i">SUPER</span>)) {
      <span class="t">Token</span> <span class="i">keyword</span> = <span class="i">previous</span>();
      <span class="i">consume</span>(<span class="i">DOT</span>, <span class="s">&quot;Expect &#39;.&#39; after &#39;super&#39;.&quot;</span>);
      <span class="t">Token</span> <span class="i">method</span> = <span class="i">consume</span>(<span class="i">IDENTIFIER</span>,
          <span class="s">&quot;Expect superclass method name.&quot;</span>);
      <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Super</span>(<span class="i">keyword</span>, <span class="i">method</span>);
    }
</pre><pre class="insert-after">

    if (match(THIS)) return new Expr.This(previous());
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>primary</em>()</div>

<p>A leading <code>super</code> keyword tells us we&rsquo;ve hit a <code>super</code> expression. After that we
consume the expected <code>.</code> and method name.</p>
<h3><a href="#semantics" id="semantics"><small>13&#8202;.&#8202;3&#8202;.&#8202;2</small>Semantics</a></h3>
<p>Earlier, I said a <code>super</code> expression starts the method lookup from &ldquo;the
superclass&rdquo;, but <em>which</em> superclass? The naïve answer is the superclass of
<code>this</code>, the object the surrounding method was called on. That coincidentally
produces the right behavior in a lot of cases, but that&rsquo;s not actually correct.
Gaze upon:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">print</span> <span class="s">&quot;A method&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">B</span> &lt; <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">print</span> <span class="s">&quot;B method&quot;</span>;
  }

  <span class="i">test</span>() {
    <span class="k">super</span>.<span class="i">method</span>();
  }
}

<span class="k">class</span> <span class="t">C</span> &lt; <span class="t">B</span> {}

<span class="t">C</span>().<span class="i">test</span>();
</pre></div>
<p>Translate this program to Java, C#, or C++ and it will print &ldquo;A method&rdquo;, which
is what we want Lox to do too. When this program runs, inside the body of
<code>test()</code>, <code>this</code> is an instance of C. The superclass of C is B, but that is
<em>not</em> where the lookup should start. If it did, we would hit B&rsquo;s <code>method()</code>.</p>
<p>Instead, lookup should start on the superclass of <em>the class containing the
<code>super</code> expression</em>. In this case, since <code>test()</code> is defined inside B, the
<code>super</code> expression inside it should start the lookup on <em>B</em>&rsquo;s superclass<span class="em">&mdash;</span>A.</p>
<p><span name="flow"></span></p><img src="image/inheritance/classes.png" alt="The call chain flowing through the classes." />
<aside name="flow">
<p>The execution flow looks something like this:</p>
<ol>
<li>
<p>We call <code>test()</code> on an instance of C.</p>
</li>
<li>
<p>That enters the <code>test()</code> method inherited from B. That calls
<code>super.method()</code>.</p>
</li>
<li>
<p>The superclass of B is A, so that chains to <code>method()</code> on A, and the program
prints &ldquo;A method&rdquo;.</p>
</li>
</ol>
</aside>
<p>Thus, in order to evaluate a <code>super</code> expression, we need access to the
superclass of the class definition surrounding the call. Alack and alas, at the
point in the interpreter where we are executing a <code>super</code> expression, we don&rsquo;t
have that easily available.</p>
<p>We <em>could</em> add a field to LoxFunction to store a reference to the LoxClass that
owns that method. The interpreter would keep a reference to the
currently executing LoxFunction so that we could look it up later when we hit a
<code>super</code> expression. From there, we&rsquo;d get the LoxClass of the method, then its
superclass.</p>
<p>That&rsquo;s a lot of plumbing. In the <a href="classes.html">last chapter</a>, we had a similar problem when
we needed to add support for <code>this</code>. In that case, we used our existing
environment and closure mechanism to store a reference to the current object.
Could we do something similar for storing the superclass<span
name="rhetorical">?</span> Well, I probably wouldn&rsquo;t be talking about it if the
answer was no, so<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>yes.</p>
<aside name="rhetorical">
<p>Does anyone even like rhetorical questions?</p>
</aside>
<p>One important difference is that we bound <code>this</code> when the method was <em>accessed</em>.
The same method can be called on different instances and each needs its own
<code>this</code>. With <code>super</code> expressions, the superclass is a fixed property of the
<em>class declaration itself</em>. Every time you evaluate some <code>super</code> expression, the
superclass is always the same.</p>
<p>That means we can create the environment for the superclass once, when the class
definition is executed. Immediately before we define the methods, we make a new
environment to bind the class&rsquo;s superclass to the name <code>super</code>.</p><img src="image/inheritance/superclass.png" alt="The superclass environment." />
<p>When we create the LoxFunction runtime representation for each method, that is
the environment they will capture in their closure. Later, when a method is
invoked and <code>this</code> is bound, the superclass environment becomes the parent for
the method&rsquo;s environment, like so:</p><img src="image/inheritance/environments.png" alt="The environment chain including the superclass environment." />
<p>That&rsquo;s a lot of machinery, but we&rsquo;ll get through it a step at a time. Before we
can get to creating the environment at runtime, we need to handle the
corresponding scope chain in the resolver.</p>
<div class="codehilite"><pre class="insert-before">      resolve(stmt.superclass);
    }
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">superclass</span> != <span class="k">null</span>) {
      <span class="i">beginScope</span>();
      <span class="i">scopes</span>.<span class="i">peek</span>().<span class="i">put</span>(<span class="s">&quot;super&quot;</span>, <span class="k">true</span>);
    }
</pre><pre class="insert-after">

    beginScope();
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>If the class declaration has a superclass, then we create a new scope
surrounding all of its methods. In that scope, we define the name &ldquo;super&rdquo;. Once
we&rsquo;re done resolving the class&rsquo;s methods, we discard that scope.</p>
<div class="codehilite"><pre class="insert-before">    endScope();

</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">superclass</span> != <span class="k">null</span>) <span class="i">endScope</span>();

</pre><pre class="insert-after">    currentClass = enclosingClass;
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>It&rsquo;s a minor optimization, but we only create the superclass environment if the
class actually <em>has</em> a superclass. There&rsquo;s no point creating it when there isn&rsquo;t
a superclass since there&rsquo;d be no superclass to store in it anyway.</p>
<p>With &ldquo;super&rdquo; defined in a scope chain, we are able to resolve the <code>super</code>
expression itself.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitSetExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitSuperExpr</span>(<span class="t">Expr</span>.<span class="t">Super</span> <span class="i">expr</span>) {
    <span class="i">resolveLocal</span>(<span class="i">expr</span>, <span class="i">expr</span>.<span class="i">keyword</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitSetExpr</em>()</div>

<p>We resolve the <code>super</code> token exactly as if it were a variable. The resolution
stores the number of hops along the environment chain that the interpreter needs
to walk to find the environment where the superclass is stored.</p>
<p>This code is mirrored in the interpreter. When we evaluate a subclass
definition, we create a new environment.</p>
<div class="codehilite"><pre class="insert-before">        throw new RuntimeError(stmt.superclass.name,
            &quot;Superclass must be a class.&quot;);
      }
    }

    environment.define(stmt.name.lexeme, null);
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">superclass</span> != <span class="k">null</span>) {
      <span class="i">environment</span> = <span class="k">new</span> <span class="t">Environment</span>(<span class="i">environment</span>);
      <span class="i">environment</span>.<span class="i">define</span>(<span class="s">&quot;super&quot;</span>, <span class="i">superclass</span>);
    }
</pre><pre class="insert-after">

    Map&lt;String, LoxFunction&gt; methods = new HashMap&lt;&gt;();
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitClassStmt</em>()</div>

<p>Inside that environment, we store a reference to the superclass<span class="em">&mdash;</span>the actual
LoxClass object for the superclass which we have now that we are in the runtime.
Then we create the LoxFunctions for each method. Those will capture the current
environment<span class="em">&mdash;</span>the one where we just bound &ldquo;super&rdquo;<span class="em">&mdash;</span>as their closure, holding
on to the superclass like we need. Once that&rsquo;s done, we pop the environment.</p>
<div class="codehilite"><pre class="insert-before">    LoxClass klass = new LoxClass(stmt.name.lexeme,
        (LoxClass)superclass, methods);
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">superclass</span> != <span class="k">null</span>) {
      <span class="i">environment</span> = <span class="i">environment</span>.<span class="i">enclosing</span>;
    }
</pre><pre class="insert-after">

    environment.assign(stmt.name, klass);
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitClassStmt</em>()</div>

<p>We&rsquo;re ready to interpret <code>super</code> expressions themselves. There are a few moving
parts, so we&rsquo;ll build this method up in pieces.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitSetExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitSuperExpr</span>(<span class="t">Expr</span>.<span class="t">Super</span> <span class="i">expr</span>) {
    <span class="t">int</span> <span class="i">distance</span> = <span class="i">locals</span>.<span class="i">get</span>(<span class="i">expr</span>);
    <span class="t">LoxClass</span> <span class="i">superclass</span> = (<span class="t">LoxClass</span>)<span class="i">environment</span>.<span class="i">getAt</span>(
        <span class="i">distance</span>, <span class="s">&quot;super&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitSetExpr</em>()</div>

<p>First, the work we&rsquo;ve been leading up to. We look up the surrounding class&rsquo;s
superclass by looking up &ldquo;super&rdquo; in the proper environment.</p>
<p>When we access a method, we also need to bind <code>this</code> to the object the method is
accessed from. In an expression like <code>doughnut.cook</code>, the object is whatever we
get from evaluating <code>doughnut</code>. In a <code>super</code> expression like <code>super.cook</code>, the
current object is implicitly the <em>same</em> current object that we&rsquo;re using. In
other words, <code>this</code>. Even though we are looking up the <em>method</em> on the
superclass, the <em>instance</em> is still <code>this</code>.</p>
<p>Unfortunately, inside the <code>super</code> expression, we don&rsquo;t have a convenient node
for the resolver to hang the number of hops to <code>this</code> on. Fortunately, we do
control the layout of the environment chains. The environment where &ldquo;this&rdquo; is
bound is always right inside the environment where we store &ldquo;super&rdquo;.</p>
<div class="codehilite"><pre class="insert-before">    LoxClass superclass = (LoxClass)environment.getAt(
        distance, &quot;super&quot;);
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitSuperExpr</em>()</div>
<pre class="insert">

    <span class="t">LoxInstance</span> <span class="i">object</span> = (<span class="t">LoxInstance</span>)<span class="i">environment</span>.<span class="i">getAt</span>(
        <span class="i">distance</span> - <span class="n">1</span>, <span class="s">&quot;this&quot;</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitSuperExpr</em>()</div>

<p>Offsetting the distance by one looks up &ldquo;this&rdquo; in that inner environment. I
admit this isn&rsquo;t the most <span name="elegant">elegant</span> code, but it
works.</p>
<aside name="elegant">
<p>Writing a book that includes every single line of code for a program means I
can&rsquo;t hide the hacks by leaving them as an &ldquo;exercise for the reader&rdquo;.</p>
</aside>
<p>Now we&rsquo;re ready to look up and bind the method, starting at the superclass.</p>
<div class="codehilite"><pre class="insert-before">    LoxInstance object = (LoxInstance)environment.getAt(
        distance - 1, &quot;this&quot;);
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitSuperExpr</em>()</div>
<pre class="insert">

    <span class="t">LoxFunction</span> <span class="i">method</span> = <span class="i">superclass</span>.<span class="i">findMethod</span>(<span class="i">expr</span>.<span class="i">method</span>.<span class="i">lexeme</span>);
    <span class="k">return</span> <span class="i">method</span>.<span class="i">bind</span>(<span class="i">object</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitSuperExpr</em>()</div>

<p>This is almost exactly like the code for looking up a method of a get
expression, except that we call <code>findMethod()</code> on the superclass instead of on
the class of the current object.</p>
<p>That&rsquo;s basically it. Except, of course, that we might <em>fail</em> to find the method.
So we check for that too.</p>
<div class="codehilite"><pre class="insert-before">

    LoxFunction method = superclass.findMethod(expr.method.lexeme);
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitSuperExpr</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">method</span> == <span class="k">null</span>) {
      <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">expr</span>.<span class="i">method</span>,
          <span class="s">&quot;Undefined property &#39;&quot;</span> + <span class="i">expr</span>.<span class="i">method</span>.<span class="i">lexeme</span> + <span class="s">&quot;&#39;.&quot;</span>);
    }

</pre><pre class="insert-after">    return method.bind(object);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitSuperExpr</em>()</div>

<p>There you have it! Take that BostonCream example earlier and give it a try.
Assuming you and I did everything right, it should fry it first, then stuff it
with cream.</p>
<h3><a href="#invalid-uses-of-super" id="invalid-uses-of-super"><small>13&#8202;.&#8202;3&#8202;.&#8202;3</small>Invalid uses of super</a></h3>
<p>As with previous language features, our implementation does the right thing when
the user writes correct code, but we haven&rsquo;t bulletproofed the intepreter
against bad code. In particular, consider:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Eclair</span> {
  <span class="i">cook</span>() {
    <span class="k">super</span>.<span class="i">cook</span>();
    <span class="k">print</span> <span class="s">&quot;Pipe full of crème pâtissière.&quot;</span>;
  }
}
</pre></div>
<p>This class has a <code>super</code> expression, but no superclass. At runtime, the code for
evaluating <code>super</code> expressions assumes that &ldquo;super&rdquo; was successfully resolved
and will be found in the environment. That&rsquo;s going to fail here because there is
no surrounding environment for the superclass since there is no superclass. The
JVM will throw an exception and bring our interpreter to its knees.</p>
<p>Heck, there are even simpler broken uses of super:</p>
<div class="codehilite"><pre><span class="k">super</span>.<span class="i">notEvenInAClass</span>();
</pre></div>
<p>We could handle errors like these at runtime by checking to see if the lookup
of &ldquo;super&rdquo; succeeded. But we can tell statically<span class="em">&mdash;</span>just by looking at the
source code<span class="em">&mdash;</span>that Eclair has no superclass and thus no <code>super</code> expression will
work inside it. Likewise, in the second example, we know that the <code>super</code>
expression is not even inside a method body.</p>
<p>Even though Lox is dynamically typed, that doesn&rsquo;t mean we want to defer
<em>everything</em> to runtime. If the user made a mistake, we&rsquo;d like to help them find
it sooner rather than later. So we&rsquo;ll report these errors statically, in the
resolver.</p>
<p>First, we add a new case to the enum we use to keep track of what kind of class
is surrounding the current code being visited.</p>
<div class="codehilite"><pre class="insert-before">    NONE,
</pre><pre class="insert-before">    <span class="i">CLASS</span><span class="insert-comma">,</span>
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in enum <em>ClassType</em><br>
add <em>&ldquo;,&rdquo;</em> to previous line</div>
<pre class="insert">    <span class="i">SUBCLASS</span>
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in enum <em>ClassType</em>, add <em>&ldquo;,&rdquo;</em> to previous line</div>

<p>We&rsquo;ll use that to distinguish when we&rsquo;re inside a class that has a superclass
versus one that doesn&rsquo;t. When we resolve a class declaration, we set that if the
class is a subclass.</p>
<div class="codehilite"><pre class="insert-before">    if (stmt.superclass != null) {
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitClassStmt</em>()</div>
<pre class="insert">      <span class="i">currentClass</span> = <span class="t">ClassType</span>.<span class="i">SUBCLASS</span>;
</pre><pre class="insert-after">      resolve(stmt.superclass);
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitClassStmt</em>()</div>

<p>Then, when we resolve a <code>super</code> expression, we check to see that we are
currently inside a scope where that&rsquo;s allowed.</p>
<div class="codehilite"><pre class="insert-before">  public Void visitSuperExpr(Expr.Super expr) {
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitSuperExpr</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">currentClass</span> == <span class="t">ClassType</span>.<span class="i">NONE</span>) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">expr</span>.<span class="i">keyword</span>,
          <span class="s">&quot;Can&#39;t use &#39;super&#39; outside of a class.&quot;</span>);
    } <span class="k">else</span> <span class="k">if</span> (<span class="i">currentClass</span> != <span class="t">ClassType</span>.<span class="i">SUBCLASS</span>) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">expr</span>.<span class="i">keyword</span>,
          <span class="s">&quot;Can&#39;t use &#39;super&#39; in a class with no superclass.&quot;</span>);
    }

</pre><pre class="insert-after">    resolveLocal(expr, expr.keyword);
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitSuperExpr</em>()</div>

<p>If not<span class="em">&mdash;</span>oopsie!<span class="em">&mdash;</span>the user made a mistake.</p>
<h2><a href="#conclusion" id="conclusion"><small>13&#8202;.&#8202;4</small>Conclusion</a></h2>
<p>We made it! That final bit of error handling is the last chunk of code needed to
complete our Java implementation of Lox. This is a real <span
name="superhero">accomplishment</span> and one you should be proud of. In the
past dozen chapters and a thousand or so lines of code, we have learned and
implemented<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<ul>
<li><a href="scanning.html">tokens and lexing</a>,</li>
<li><a href="representing-code.html">abstract syntax trees</a>,</li>
<li><a href="parsing-expressions.html">recursive descent parsing</a>,</li>
<li>prefix and infix expressions,</li>
<li>runtime representation of objects,</li>
<li><a href="evaluating-expressions.html">interpreting code using the Visitor pattern</a>,</li>
<li><a href="statements-and-state.html">lexical scope</a>,</li>
<li>environment chains for storing variables,</li>
<li><a href="control-flow.html">control flow</a>,</li>
<li><a href="functions.html">functions with parameters</a>,</li>
<li>closures,</li>
<li><a href="resolving-and-binding.html">static variable resolution and error detection</a>,</li>
<li><a href="classes.html">classes</a>,</li>
<li>constructors,</li>
<li>fields,</li>
<li>methods, and finally,</li>
<li>inheritance.</li>
</ul>
<aside name="superhero"><img src="image/inheritance/superhero.png" alt="You, being your bad self." />
</aside>
<p>We did all of that from scratch, with no external dependencies or magic tools.
Just you and I, our respective text editors, a couple of collection classes in
the Java standard library, and the JVM runtime.</p>
<p>This marks the end of Part II, but not the end of the book. Take a break. Maybe
write a few fun Lox programs and run them in your interpreter. (You may want to
add a few more native methods for things like reading user input.) When you&rsquo;re
refreshed and ready, we&rsquo;ll embark on our <a href="a-bytecode-virtual-machine.html">next adventure</a>.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Lox supports only <em>single inheritance</em><span class="em">&mdash;</span>a class may have a single
superclass and that&rsquo;s the only way to reuse methods across classes. Other
languages have explored a variety of ways to more freely reuse and share
capabilities across classes: mixins, traits, multiple inheritance, virtual
inheritance, extension methods, etc.</p>
<p>If you were to add some feature along these lines to Lox, which would you
pick and why? If you&rsquo;re feeling courageous (and you should be at this
point), go ahead and add it.</p>
</li>
<li>
<p>In Lox, as in most other object-oriented languages, when looking up a
method, we start at the bottom of the class hierarchy and work our way up<span class="em">&mdash;</span>a subclass&rsquo;s method is preferred over a superclass&rsquo;s. In order to get to the
superclass method from within an overriding method, you use <code>super</code>.</p>
<p>The language <a href="https://beta.cs.au.dk/">BETA</a> takes the <a href="http://journal.stuffwithstuff.com/2012/12/19/the-impoliteness-of-overriding-methods/">opposite approach</a>. When you call a
method, it starts at the <em>top</em> of the class hierarchy and works <em>down</em>. A
superclass method wins over a subclass method. In order to get to the
subclass method, the superclass method can call <code>inner</code>, which is sort of
like the inverse of <code>super</code>. It chains to the next method down the
hierarchy.</p>
<p>The superclass method controls when and where the subclass is allowed to
refine its behavior. If the superclass method doesn&rsquo;t call <code>inner</code> at all,
then the subclass has no way of overriding or modifying the superclass&rsquo;s
behavior.</p>
<p>Take out Lox&rsquo;s current overriding and <code>super</code> behavior and replace it with
BETA&rsquo;s semantics. In short:</p>
<ul>
<li>
<p>When calling a method on a class, prefer the method <em>highest</em> on the
class&rsquo;s inheritance chain.</p>
</li>
<li>
<p>Inside the body of a method, a call to <code>inner</code> looks for a method with
the same name in the nearest subclass along the inheritance chain
between the class containing the <code>inner</code> and the class of <code>this</code>. If
there is no matching method, the <code>inner</code> call does nothing.</p>
</li>
</ul>
<p>For example:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Fry until golden brown.&quot;</span>;
    <span class="i">inner</span>();
    <span class="k">print</span> <span class="s">&quot;Place in a nice box.&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">BostonCream</span> &lt; <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Pipe full of custard and coat with chocolate.&quot;</span>;
  }
}

<span class="t">BostonCream</span>().<span class="i">cook</span>();
</pre></div>
<p>This should print:</p>
<div class="codehilite"><pre>Fry until golden brown.
Pipe full of custard and coat with chocolate.
Place in a nice box.
</pre></div>
</li>
<li>
<p>In the chapter where I introduced Lox, <a href="the-lox-language.html#challenges">I challenged you</a> to
come up with a couple of features you think the language is missing. Now
that you know how to build an interpreter, implement one of those features.</p>
</li>
</ol>
</div>

<footer>
<a href="a-bytecode-virtual-machine.html" class="next">
  Next Part: &ldquo;A Bytecode Virtual Machine&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/introduction.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Introduction &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Introduction<small>1</small></a></h3>

<ul>
    <li><a href="#why-learn-this-stuff"><small>1.1</small> Why Learn This Stuff?</a></li>
    <li><a href="#how-the-book-is-organized"><small>1.2</small> How the Book Is Organized</a></li>
    <li><a href="#the-first-interpreter"><small>1.3</small> The First Interpreter</a></li>
    <li><a href="#the-second-interpreter"><small>1.4</small> The Second Interpreter</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>What&#x27;s in a Name?</a></li>
</ul>


<div class="prev-next">
    <a href="welcome.html" title="Welcome" class="left">&larr;&nbsp;Previous</a>
    <a href="welcome.html" title="Welcome">&uarr;&nbsp;Up</a>
    <a href="a-map-of-the-territory.html" title="A Map of the Territory" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="welcome.html" title="Welcome" class="prev">←</a>
<a href="a-map-of-the-territory.html" title="A Map of the Territory" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Introduction<small>1</small></a></h3>

<ul>
    <li><a href="#why-learn-this-stuff"><small>1.1</small> Why Learn This Stuff?</a></li>
    <li><a href="#how-the-book-is-organized"><small>1.2</small> How the Book Is Organized</a></li>
    <li><a href="#the-first-interpreter"><small>1.3</small> The First Interpreter</a></li>
    <li><a href="#the-second-interpreter"><small>1.4</small> The Second Interpreter</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>What&#x27;s in a Name?</a></li>
</ul>


<div class="prev-next">
    <a href="welcome.html" title="Welcome" class="left">&larr;&nbsp;Previous</a>
    <a href="welcome.html" title="Welcome">&uarr;&nbsp;Up</a>
    <a href="a-map-of-the-territory.html" title="A Map of the Territory" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">1</div>
  <h1>Introduction</h1>

<blockquote>
<p>Fairy tales are more than true: not because they tell us that dragons exist,
but because they tell us that dragons can be beaten.</p>
<p><cite>G.K. Chesterton by way of Neil Gaiman, <em>Coraline</em></cite></p>
</blockquote>
<p>I&rsquo;m really excited we&rsquo;re going on this journey together. This is a book on
implementing interpreters for programming languages. It&rsquo;s also a book on how to
design a language worth implementing. It&rsquo;s the book I wish I&rsquo;d had when I first
started getting into languages, and it&rsquo;s the book I&rsquo;ve been writing in my <span
name="head">head</span> for nearly a decade.</p>
<aside name="head">
<p>To my friends and family, sorry I&rsquo;ve been so absentminded!</p>
</aside>
<p>In these pages, we will walk step-by-step through two complete interpreters for
a full-featured language. I assume this is your first foray into languages, so
I&rsquo;ll cover each concept and line of code you need to build a complete, usable,
fast language implementation.</p>
<p>In order to cram two full implementations inside one book without it turning
into a doorstop, this text is lighter on theory than others. As we build each
piece of the system, I will introduce the history and concepts behind it. I&rsquo;ll
try to get you familiar with the lingo so that if you ever find yourself at a
<span name="party">cocktail party</span> full of PL (programming language)
researchers, you&rsquo;ll fit in.</p>
<aside name="party">
<p>Strangely enough, a situation I have found myself in multiple times. You
wouldn&rsquo;t believe how much some of them can drink.</p>
</aside>
<p>But we&rsquo;re mostly going to spend our brain juice getting the language up and
running. This is not to say theory isn&rsquo;t important. Being able to reason
precisely and <span name="formal">formally</span> about syntax and semantics is
a vital skill when working on a language. But, personally, I learn best by
doing. It&rsquo;s hard for me to wade through paragraphs full of abstract concepts and
really absorb them. But if I&rsquo;ve coded something, run it, and debugged it, then I
<em>get</em> it.</p>
<aside name="formal">
<p>Static type systems in particular require rigorous formal reasoning. Hacking on
a type system has the same feel as proving a theorem in mathematics.</p>
<p>It turns out this is no coincidence. In the early half of last century, Haskell
Curry and William Alvin Howard showed that they are two sides of the same coin:
<a href="https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence">the Curry-Howard isomorphism</a>.</p>
</aside>
<p>That&rsquo;s my goal for you. I want you to come away with a solid intuition of how a
real language lives and breathes. My hope is that when you read other, more
theoretical books later, the concepts there will firmly stick in your mind,
adhered to this tangible substrate.</p>
<h2><a href="#why-learn-this-stuff" id="why-learn-this-stuff"><small>1&#8202;.&#8202;1</small>Why Learn This Stuff?</a></h2>
<p>Every introduction to every compiler book seems to have this section. I don&rsquo;t
know what it is about programming languages that causes such existential doubt.
I don&rsquo;t think ornithology books worry about justifying their existence. They
assume the reader loves birds and start teaching.</p>
<p>But programming languages are a little different. I suppose it is true that the
odds of any of us creating a broadly successful, general-purpose programming
language are slim. The designers of the world&rsquo;s widely used languages could fit
in a Volkswagen bus, even without putting the pop-top camper up. If joining that
elite group was the <em>only</em> reason to learn languages, it would be hard to
justify. Fortunately, it isn&rsquo;t.</p>
<h3><a href="#little-languages-are-everywhere" id="little-languages-are-everywhere"><small>1&#8202;.&#8202;1&#8202;.&#8202;1</small>Little languages are everywhere</a></h3>
<p>For every successful general-purpose language, there are a thousand successful
niche ones. We used to call them &ldquo;little languages&rdquo;, but inflation in the jargon
economy led to the name &ldquo;domain-specific languages&rdquo;. These are pidgins
tailor-built to a specific task. Think application scripting languages, template
engines, markup formats, and configuration files.</p>
<p><span name="little"></span><img src="image/introduction/little-languages.png" alt="A random selection of little languages." /></p>
<aside name="little">
<p>A random selection of some little languages you might run into.</p>
</aside>
<p>Almost every large software project needs a handful of these. When you can, it&rsquo;s
good to reuse an existing one instead of rolling your own. Once you factor in
documentation, debuggers, editor support, syntax highlighting, and all of the
other trappings, doing it yourself becomes a tall order.</p>
<p>But there&rsquo;s still a good chance you&rsquo;ll find yourself needing to whip up a parser
or other tool when there isn&rsquo;t an existing library that fits your needs. Even
when you are reusing some existing implementation, you&rsquo;ll inevitably end up
needing to debug and maintain it and poke around in its guts.</p>
<h3><a href="#languages-are-great-exercise" id="languages-are-great-exercise"><small>1&#8202;.&#8202;1&#8202;.&#8202;2</small>Languages are great exercise</a></h3>
<p>Long distance runners sometimes train with weights strapped to their ankles or
at high altitudes where the atmosphere is thin. When they later unburden
themselves, the new relative ease of light limbs and oxygen-rich air enables
them to run farther and faster.</p>
<p>Implementing a language is a real test of programming skill. The code is complex
and performance critical. You must master recursion, dynamic arrays, trees,
graphs, and hash tables. You probably use hash tables at least in your
day-to-day programming, but do you <em>really</em> understand them? Well, after we&rsquo;ve
crafted our own from scratch, I guarantee you will.</p>
<p>While I intend to show you that an interpreter isn&rsquo;t as daunting as you might
believe, implementing one well is still a challenge. Rise to it, and you&rsquo;ll come
away a stronger programmer, and smarter about how you use data structures and
algorithms in your day job.</p>
<h3><a href="#one-more-reason" id="one-more-reason"><small>1&#8202;.&#8202;1&#8202;.&#8202;3</small>One more reason</a></h3>
<p>This last reason is hard for me to admit, because it&rsquo;s so close to my heart.
Ever since I learned to program as a kid, I felt there was something magical
about languages. When I first tapped out BASIC programs one key at a time I
couldn&rsquo;t conceive how BASIC <em>itself</em> was made.</p>
<p>Later, the mixture of awe and terror on my college friends&rsquo; faces when talking
about their compilers class was enough to convince me language hackers were a
different breed of human<span class="em">&mdash;</span>some sort of wizards granted privileged access to
arcane arts.</p>
<p>It&rsquo;s a charming <span name="image">image</span>, but it has a darker side. <em>I</em>
didn&rsquo;t feel like a wizard, so I was left thinking I lacked some inborn quality
necessary to join the cabal. Though I&rsquo;ve been fascinated by languages ever since
I doodled made-up keywords in my school notebook, it took me decades to muster
the courage to try to really learn them. That &ldquo;magical&rdquo; quality, that sense of
exclusivity, excluded <em>me</em>.</p>
<aside name="image">
<p>And its practitioners don&rsquo;t hesitate to play up this image. Two of the seminal
texts on programming languages feature a <a href="https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools">dragon</a> and a <a href="https://mitpress.mit.edu/sites/default/files/sicp/index.html">wizard</a> on their
covers.</p>
</aside>
<p>When I did finally start cobbling together my own little interpreters, I quickly
learned that, of course, there is no magic at all. It&rsquo;s just code, and the
people who hack on languages are just people.</p>
<p>There <em>are</em> a few techniques you don&rsquo;t often encounter outside of languages, and
some parts are a little difficult. But not more difficult than other obstacles
you&rsquo;ve overcome. My hope is that if you&rsquo;ve felt intimidated by languages and
this book helps you overcome that fear, maybe I&rsquo;ll leave you just a tiny bit
braver than you were before.</p>
<p>And, who knows, maybe you <em>will</em> make the next great language. Someone has to.</p>
<h2><a href="#how-the-book-is-organized" id="how-the-book-is-organized"><small>1&#8202;.&#8202;2</small>How the Book Is Organized</a></h2>
<p>This book is broken into three parts. You&rsquo;re reading the first one now. It&rsquo;s a
couple of chapters to get you oriented, teach you some of the lingo that
language hackers use, and introduce you to Lox, the language we&rsquo;ll be
implementing.</p>
<p>Each of the other two parts builds one complete Lox interpreter. Within those
parts, each chapter is structured the same way. The chapter takes a single
language feature, teaches you the concepts behind it, and walks you through an
implementation.</p>
<p>It took a good bit of trial and error on my part, but I managed to carve up the
two interpreters into chapter-sized chunks that build on the previous chapters
but require nothing from later ones. From the very first chapter, you&rsquo;ll have a
working program you can run and play with. With each passing chapter, it grows
increasingly full-featured until you eventually have a complete language.</p>
<p>Aside from copious, scintillating English prose, chapters have a few other
delightful facets:</p>
<h3><a href="#the-code" id="the-code"><small>1&#8202;.&#8202;2&#8202;.&#8202;1</small>The code</a></h3>
<p>We&rsquo;re about <em>crafting</em> interpreters, so this book contains real code. Every
single line of code needed is included, and each snippet tells you where to
insert it in your ever-growing implementation.</p>
<p>Many other language books and language implementations use tools like <a href="https://en.wikipedia.org/wiki/Lex_(software)">Lex</a>
and <span name="yacc"><a href="https://en.wikipedia.org/wiki/Yacc">Yacc</a></span>, so-called <strong>compiler-compilers</strong>, that
automatically generate some of the source files for an implementation from some
higher-level description. There are pros and cons to tools like those, and
strong opinions<span class="em">&mdash;</span>some might say religious convictions<span class="em">&mdash;</span>on both sides.</p>
<aside name="yacc">
<p>Yacc is a tool that takes in a grammar file and produces a source file for a
compiler, so it&rsquo;s sort of like a &ldquo;compiler&rdquo; that outputs a compiler, which is
where we get the term &ldquo;compiler-compiler&rdquo;.</p>
<p>Yacc wasn&rsquo;t the first of its ilk, which is why it&rsquo;s named &ldquo;Yacc&rdquo;<span class="em">&mdash;</span><em>Yet
Another</em> Compiler-Compiler. A later similar tool is <a href="https://en.wikipedia.org/wiki/GNU_bison">Bison</a>, named as a pun on
the pronunciation of Yacc like &ldquo;yak&rdquo;.</p><img src="image/introduction/yak.png" alt="A yak." />
<p>If you find all of these little self-references and puns charming and fun,
you&rsquo;ll fit right in here. If not, well, maybe the language nerd sense of humor
is an acquired taste.</p>
</aside>
<p>We will abstain from using them here. I want to ensure there are no dark corners
where magic and confusion can hide, so we&rsquo;ll write everything by hand. As you&rsquo;ll
see, it&rsquo;s not as bad as it sounds, and it means you really will understand each
line of code and how both interpreters work.</p>
<p>A book has different constraints from the &ldquo;real world&rdquo; and so the coding style
here might not always reflect the best way to write maintainable production
software. If I seem a little cavalier about, say, omitting <code>private</code> or
declaring a global variable, understand I do so to keep the code easier on your
eyes. The pages here aren&rsquo;t as wide as your IDE and every character counts.</p>
<p>Also, the code doesn&rsquo;t have many comments. That&rsquo;s because each handful of lines
is surrounded by several paragraphs of honest-to-God prose explaining it. When
you write a book to accompany your program, you are welcome to omit comments
too. Otherwise, you should probably use <code>//</code> a little more than I do.</p>
<p>While the book contains every line of code and teaches what each means, it does
not describe the machinery needed to compile and run the interpreter. I assume
you can slap together a makefile or a project in your IDE of choice in order to
get the code to run. Those kinds of instructions get out of date quickly, and
I want this book to age like XO brandy, not backyard hooch.</p>
<h3><a href="#snippets" id="snippets"><small>1&#8202;.&#8202;2&#8202;.&#8202;2</small>Snippets</a></h3>
<p>Since the book contains literally every line of code needed for the
implementations, the snippets are quite precise. Also, because I try to keep the
program in a runnable state even when major features are missing, sometimes we
add temporary code that gets replaced in later snippets.</p>
<p>A snippet with all the bells and whistles looks like this:</p>
<div class="codehilite"><pre class="insert-before">
      default:
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()<br>
replace 1 line</div>
<pre class="insert">
        <span class="k">if</span> (<span class="i">isDigit</span>(<span class="i">c</span>)) {
          <span class="i">number</span>();
        } <span class="k">else</span> {
          <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">line</span>, <span class="s">&quot;Unexpected character.&quot;</span>);
        }
</pre><pre class="insert-after">
        break;
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>(), replace 1 line</div>
<p>In the center, you have the new code to add. It may have a few faded out lines
above or below to show where it goes in the existing surrounding code. There is
also a little blurb telling you in which file and where to place the snippet. If
that blurb says &ldquo;replace _ lines&rdquo;, there is some existing code between the faded
lines that you need to remove and replace with the new snippet.</p>
<h3><a href="#asides" id="asides"><small>1&#8202;.&#8202;2&#8202;.&#8202;3</small>Asides</a></h3>
<p><span name="joke">Asides</span> contain biographical sketches, historical
background, references to related topics, and suggestions of other areas to
explore. There&rsquo;s nothing that you <em>need</em> to know in them to understand later
parts of the book, so you can skip them if you want. I won&rsquo;t judge you, but I
might be a little sad.</p>
<aside name="joke">
<p>Well, some asides do, at least. Most of them are just dumb jokes and amateurish
drawings.</p>
</aside>
<h3><a href="#challenges_" id="challenges_"><small>1&#8202;.&#8202;2&#8202;.&#8202;4</small>Challenges</a></h3>
<p>Each chapter ends with a few exercises. Unlike textbook problem sets, which tend
to review material you already covered, these are to help you learn <em>more</em> than
what&rsquo;s in the chapter. They force you to step off the guided path and explore on
your own. They will make you research other languages, figure out how to
implement features, or otherwise get you out of your comfort zone.</p>
<p><span name="warning">Vanquish</span> the challenges and you&rsquo;ll come away with a
broader understanding and possibly a few bumps and scrapes. Or skip them if you
want to stay inside the comfy confines of the tour bus. It&rsquo;s your book.</p>
<aside name="warning">
<p>A word of warning: the challenges often ask you to make changes to the
interpreter you&rsquo;re building. You&rsquo;ll want to implement those in a copy of your
code. The later chapters assume your interpreter is in a pristine
(&ldquo;unchallenged&rdquo;?) state.</p>
</aside>
<h3><a href="#design-notes" id="design-notes"><small>1&#8202;.&#8202;2&#8202;.&#8202;5</small>Design notes</a></h3>
<p>Most &ldquo;programming language&rdquo; books are strictly programming language
<em>implementation</em> books. They rarely discuss how one might happen to <em>design</em> the
language being implemented. Implementation is fun because it is so <span
name="benchmark">precisely defined</span>. We programmers seem to have an
affinity for things that are black and white, ones and zeroes.</p>
<aside name="benchmark">
<p>I know a lot of language hackers whose careers are based on this. You slide a
language spec under their door, wait a few months, and code and benchmark
results come out.</p>
</aside>
<p>Personally, I think the world needs only so many implementations of <span
name="fortran">FORTRAN 77</span>. At some point, you find yourself designing a
<em>new</em> language. Once you start playing <em>that</em> game, then the softer, human side
of the equation becomes paramount. Things like which features are easy to learn,
how to balance innovation and familiarity, what syntax is more readable and to
whom.</p>
<aside name="fortran">
<p>Hopefully your new language doesn&rsquo;t hardcode assumptions about the width of a
punched card into its grammar.</p>
</aside>
<p>All of that stuff profoundly affects the success of your new language. I want
your language to succeed, so in some chapters I end with a &ldquo;design note&rdquo;, a
little essay on some corner of the human aspect of programming languages. I&rsquo;m no
expert on this<span class="em">&mdash;</span>I don&rsquo;t know if anyone really is<span class="em">&mdash;</span>so take these with a large
pinch of salt. That should make them tastier food for thought, which is my main
aim.</p>
<h2><a href="#the-first-interpreter" id="the-first-interpreter"><small>1&#8202;.&#8202;3</small>The First Interpreter</a></h2>
<p>We&rsquo;ll write our first interpreter, jlox, in <span name="lang">Java</span>. The
focus is on <em>concepts</em>. We&rsquo;ll write the simplest, cleanest code we can to
correctly implement the semantics of the language. This will get us comfortable
with the basic techniques and also hone our understanding of exactly how the
language is supposed to behave.</p>
<aside name="lang">
<p>The book uses Java and C, but readers have ported the code to <a href="https://github.com/munificent/craftinginterpreters/wiki/Lox-implementations">many other
languages</a>. If the languages I picked aren&rsquo;t your bag, take a look at
those.</p>
</aside>
<p>Java is a great language for this. It&rsquo;s high level enough that we don&rsquo;t get
overwhelmed by fiddly implementation details, but it&rsquo;s still pretty explicit.
Unlike in scripting languages, there tends to be less complex machinery hiding
under the hood, and you&rsquo;ve got static types to see what data structures you&rsquo;re
working with.</p>
<p>I also chose Java specifically because it is an object-oriented language. That
paradigm swept the programming world in the &rsquo;90s and is now the dominant way of
thinking for millions of programmers. Odds are good you&rsquo;re already used to
organizing code into classes and methods, so we&rsquo;ll keep you in that comfort
zone.</p>
<p>While academic language folks sometimes look down on object-oriented languages,
the reality is that they are widely used even for language work. GCC and LLVM
are written in C++, as are most JavaScript virtual machines. Object-oriented
languages are ubiquitous, and the tools and compilers <em>for</em> a language are often
written <em>in</em> the <span name="host">same language</span>.</p>
<aside name="host">
<p>A compiler reads files in one language, translates them, and outputs files in
another language. You can implement a compiler in any language, including the
same language it compiles, a process called <strong>self-hosting</strong>.</p>
<p>You can&rsquo;t compile your compiler using itself yet, but if you have another
compiler for your language written in some other language, you use <em>that</em> one to
compile your compiler once. Now you can use the compiled version of your own
compiler to compile future versions of itself, and you can discard the original
one compiled from the other compiler. This is called <strong>bootstrapping</strong>, from
the image of pulling yourself up by your own bootstraps.</p><img src="image/introduction/bootstrap.png" alt="Fact: This is the primary mode of transportation of the American cowboy." />
</aside>
<p>And, finally, Java is hugely popular. That means there&rsquo;s a good chance you
already know it, so there&rsquo;s less for you to learn to get going in the book. If
you aren&rsquo;t that familiar with Java, don&rsquo;t freak out. I try to stick to a fairly
minimal subset of it. I use the diamond operator from Java 7 to make things a
little more terse, but that&rsquo;s about it as far as &ldquo;advanced&rdquo; features go. If you
know another object-oriented language, like C# or C++, you can muddle through.</p>
<p>By the end of part II, we&rsquo;ll have a simple, readable implementation. It&rsquo;s not
very fast, but it&rsquo;s correct. However, we are only able to accomplish that by
building on the Java virtual machine&rsquo;s own runtime facilities. We want to learn
how Java <em>itself</em> implements those things.</p>
<h2><a href="#the-second-interpreter" id="the-second-interpreter"><small>1&#8202;.&#8202;4</small>The Second Interpreter</a></h2>
<p>So in the next part, we start all over again, but this time in C. C is the
perfect language for understanding how an implementation <em>really</em> works, all the
way down to the bytes in memory and the code flowing through the CPU.</p>
<p>A big reason that we&rsquo;re using C is so I can show you things C is particularly
good at, but that <em>does</em> mean you&rsquo;ll need to be pretty comfortable with it. You
don&rsquo;t have to be the reincarnation of Dennis Ritchie, but you shouldn&rsquo;t be
spooked by pointers either.</p>
<p>If you aren&rsquo;t there yet, pick up an introductory book on C and chew through it,
then come back here when you&rsquo;re done. In return, you&rsquo;ll come away from this book
an even stronger C programmer. That&rsquo;s useful given how many language
implementations are written in C: Lua, CPython, and Ruby&rsquo;s MRI, to name a few.</p>
<p>In our C interpreter, <span name="clox">clox</span>, we are forced to implement
for ourselves all the things Java gave us for free. We&rsquo;ll write our own dynamic
array and hash table. We&rsquo;ll decide how objects are represented in memory, and
build a garbage collector to reclaim them.</p>
<aside name="clox">
<p>I pronounce the name like &ldquo;sea-locks&rdquo;, but you can say it &ldquo;clocks&rdquo; or even
&ldquo;cloch&rdquo;, where you pronounce the &ldquo;x&rdquo; like the Greeks do if it makes you happy.</p>
</aside>
<p>Our Java implementation was focused on being correct. Now that we have that
down, we&rsquo;ll turn to also being <em>fast</em>. Our C interpreter will contain a <span
name="compiler">compiler</span> that translates Lox to an efficient bytecode
representation (don&rsquo;t worry, I&rsquo;ll get into what that means soon), which it then
executes. This is the same technique used by implementations of Lua, Python,
Ruby, PHP, and many other successful languages.</p>
<aside name="compiler">
<p>Did you think this was just an interpreter book? It&rsquo;s a compiler book as well.
Two for the price of one!</p>
</aside>
<p>We&rsquo;ll even try our hand at benchmarking and optimization. By the end, we&rsquo;ll have
a robust, accurate, fast interpreter for our language, able to keep up with
other professional caliber implementations out there. Not bad for one book and a
few thousand lines of code.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>There are at least six domain-specific languages used in the <a href="https://github.com/munificent/craftinginterpreters">little system
I cobbled together</a> to write and publish this book. What are they?</p>
</li>
<li>
<p>Get a &ldquo;Hello, world!&rdquo; program written and running in Java. Set up whatever
makefiles or IDE projects you need to get it working. If you have a
debugger, get comfortable with it and step through your program as it runs.</p>
</li>
<li>
<p>Do the same thing for C. To get some practice with pointers, define a
<a href="https://en.wikipedia.org/wiki/Doubly_linked_list">doubly linked list</a> of heap-allocated strings. Write functions to insert,
find, and delete items from it. Test them.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: What&rsquo;s in a Name?</a></h2>
<p>One of the hardest challenges in writing this book was coming up with a name for
the language it implements. I went through <em>pages</em> of candidates before I found
one that worked. As you&rsquo;ll discover on the first day you start building your own
language, naming is deviously hard. A good name satisfies a few criteria:</p>
<ol>
<li>
<p><strong>It isn&rsquo;t in use.</strong> You can run into all sorts of trouble, legal and
social, if you inadvertently step on someone else&rsquo;s name.</p>
</li>
<li>
<p><strong>It&rsquo;s easy to pronounce.</strong> If things go well, hordes of people will be
saying and writing your language&rsquo;s name. Anything longer than a couple of
syllables or a handful of letters will annoy them to no end.</p>
</li>
<li>
<p><strong>It&rsquo;s distinct enough to search for.</strong> People will Google your language&rsquo;s
name to learn about it, so you want a word that&rsquo;s rare enough that most
results point to your docs. Though, with the amount of AI search engines are
packing today, that&rsquo;s less of an issue. Still, you won&rsquo;t be doing your users
any favors if you name your language &ldquo;for&rdquo;.</p>
</li>
<li>
<p><strong>It doesn&rsquo;t have negative connotations across a number of cultures.</strong> This
is hard to be on guard for, but it&rsquo;s worth considering. The designer of
Nimrod ended up renaming his language to &ldquo;Nim&rdquo; because too many people
remember that Bugs Bunny used &ldquo;Nimrod&rdquo; as an insult. (Bugs was using it
ironically.)</p>
</li>
</ol>
<p>If your potential name makes it through that gauntlet, keep it. Don&rsquo;t get hung
up on trying to find an appellation that captures the quintessence of your
language. If the names of the world&rsquo;s other successful languages teach us
anything, it&rsquo;s that the name doesn&rsquo;t matter much. All you need is a reasonably
unique token.</p>
</div>

<footer>
<a href="a-map-of-the-territory.html" class="next">
  Next Chapter: &ldquo;A Map of the Territory&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/jumping-back-and-forth.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Jumping Back and Forth &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Jumping Back and Forth<small>23</small></a></h3>

<ul>
    <li><a href="#if-statements"><small>23.1</small> If Statements</a></li>
    <li><a href="#logical-operators"><small>23.2</small> Logical Operators</a></li>
    <li><a href="#while-statements"><small>23.3</small> While Statements</a></li>
    <li><a href="#for-statements"><small>23.4</small> For Statements</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Considering Goto Harmful</a></li>
</ul>


<div class="prev-next">
    <a href="local-variables.html" title="Local Variables" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="calls-and-functions.html" title="Calls and Functions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="local-variables.html" title="Local Variables" class="prev">←</a>
<a href="calls-and-functions.html" title="Calls and Functions" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Jumping Back and Forth<small>23</small></a></h3>

<ul>
    <li><a href="#if-statements"><small>23.1</small> If Statements</a></li>
    <li><a href="#logical-operators"><small>23.2</small> Logical Operators</a></li>
    <li><a href="#while-statements"><small>23.3</small> While Statements</a></li>
    <li><a href="#for-statements"><small>23.4</small> For Statements</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Considering Goto Harmful</a></li>
</ul>


<div class="prev-next">
    <a href="local-variables.html" title="Local Variables" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="calls-and-functions.html" title="Calls and Functions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">23</div>
  <h1>Jumping Back and Forth</h1>

<blockquote>
<p>The order that our mind imagines is like a net, or like a ladder, built to
attain something. But afterward you must throw the ladder away, because you
discover that, even if it was useful, it was meaningless.</p>
<p><cite>Umberto Eco, <em>The Name of the Rose</em></cite></p>
</blockquote>
<p>It&rsquo;s taken a while to get here, but we&rsquo;re finally ready to add control flow to
our virtual machine. In the tree-walk interpreter we built for jlox, we
implemented Lox&rsquo;s control flow in terms of Java&rsquo;s. To execute a Lox <code>if</code>
statement, we used a Java <code>if</code> statement to run the chosen branch. That works,
but isn&rsquo;t entirely satisfying. By what magic does the <em>JVM itself</em> or a native
CPU implement <code>if</code> statements? Now that we have our own bytecode VM to hack on,
we can answer that.</p>
<p>When we talk about &ldquo;control flow&rdquo;, what are we referring to? By &ldquo;flow&rdquo; we mean
the way execution moves through the text of the program. Almost like there is a
little robot inside the computer wandering through our code, executing bits and
pieces here and there. Flow is the path that robot takes, and by <em>controlling</em>
the robot, we drive which pieces of code it executes.</p>
<p>In jlox, the robot&rsquo;s locus of attention<span class="em">&mdash;</span>the <em>current</em> bit of code<span class="em">&mdash;</span>was
implicit based on which AST nodes were stored in various Java variables and what
Java code we were in the middle of running. In clox, it is much more explicit.
The VM&rsquo;s <code>ip</code> field stores the address of the current bytecode instruction. The
value of that field is exactly &ldquo;where we are&rdquo; in the program.</p>
<p>Execution proceeds normally by incrementing the <code>ip</code>. But we can mutate that
variable however we want to. In order to implement control flow, all that&rsquo;s
necessary is to change the <code>ip</code> in more interesting ways. The simplest control
flow construct is an <code>if</code> statement with no <code>else</code> clause:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">condition</span>) <span class="k">print</span>(<span class="s">&quot;condition was truthy&quot;</span>);
</pre></div>
<p>The VM evaluates the bytecode for the condition expression. If the result is
truthy, then it continues along and executes the <code>print</code> statement in the body.
The interesting case is when the condition is falsey. When that happens,
execution skips over the then branch and proceeds to the next statement.</p>
<p>To skip over a chunk of code, we simply set the <code>ip</code> field to the address of the
bytecode instruction following that code. To <em>conditionally</em> skip over some
code, we need an instruction that looks at the value on top of the stack. If
it&rsquo;s falsey, it adds a given offset to the <code>ip</code> to jump over a range of
instructions. Otherwise, it does nothing and lets execution proceed to the next
instruction as usual.</p>
<p>When we compile to bytecode, the explicit nested block structure of the code
evaporates, leaving only a flat series of instructions behind. Lox is a
<a href="https://en.wikipedia.org/wiki/Structured_programming">structured programming</a> language, but clox bytecode isn&rsquo;t. The right<span class="em">&mdash;</span>or
wrong, depending on how you look at it<span class="em">&mdash;</span>set of bytecode instructions could
jump into the middle of a block, or from one scope into another.</p>
<p>The VM will happily execute that, even if the result leaves the stack in an
unknown, inconsistent state. So even though the bytecode is unstructured, we&rsquo;ll
take care to ensure that our compiler only generates clean code that maintains
the same structure and nesting that Lox itself does.</p>
<p>This is exactly how real CPUs behave. Even though we might program them using
higher-level languages that mandate structured control flow, the compiler lowers
that down to raw jumps. At the bottom, it turns out goto is the only real
control flow.</p>
<p>Anyway, I didn&rsquo;t mean to get all philosophical. The important bit is that if we
have that one conditional jump instruction, that&rsquo;s enough to implement Lox&rsquo;s
<code>if</code> statement, as long as it doesn&rsquo;t have an <code>else</code> clause. So let&rsquo;s go ahead
and get started with that.</p>
<h2><a href="#if-statements" id="if-statements"><small>23&#8202;.&#8202;1</small>If Statements</a></h2>
<p>This many chapters in, you know the drill. Any new feature starts in the front
end and works its way through the pipeline. An <code>if</code> statement is, well, a
statement, so that&rsquo;s where we hook it into the parser.</p>
<div class="codehilite"><pre class="insert-before">  if (match(TOKEN_PRINT)) {
    printStatement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>statement</em>()</div>
<pre class="insert">  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_IF</span>)) {
    <span class="i">ifStatement</span>();
</pre><pre class="insert-after">  } else if (match(TOKEN_LEFT_BRACE)) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>statement</em>()</div>

<p>When we see an <code>if</code> keyword, we hand off compilation to this function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>expressionStatement</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">ifStatement</span>() {
  <span class="i">consume</span>(<span class="a">TOKEN_LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after &#39;if&#39;.&quot;</span>);
  <span class="i">expression</span>();
  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after condition.&quot;</span>);<span name="paren"> </span>

  <span class="t">int</span> <span class="i">thenJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP_IF_FALSE</span>);
  <span class="i">statement</span>();

  <span class="i">patchJump</span>(<span class="i">thenJump</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expressionStatement</em>()</div>

<aside name="paren">
<p>Have you ever noticed that the <code>(</code> after the <code>if</code> keyword doesn&rsquo;t actually do
anything useful? The language would be just as unambiguous and easy to parse
without it, like:</p>
<div class="codehilite"><pre><span class="k">if</span> <span class="i">condition</span>) <span class="k">print</span>(<span class="s">&quot;looks weird&quot;</span>);
</pre></div>
<p>The closing <code>)</code> is useful because it separates the condition expression from the
body. Some languages use a <code>then</code> keyword instead. But the opening <code>(</code> doesn&rsquo;t
do anything. It&rsquo;s just there because unmatched parentheses look bad to us
humans.</p>
</aside>
<p>First we compile the condition expression, bracketed by parentheses. At runtime,
that will leave the condition value on top of the stack. We&rsquo;ll use that to
determine whether to execute the then branch or skip it.</p>
<p>Then we emit a new <code>OP_JUMP_IF_FALSE</code> instruction. It has an operand for how
much to offset the <code>ip</code><span class="em">&mdash;</span>how many bytes of code to skip. If the condition is
falsey, it adjusts the <code>ip</code> by that amount. Something like this:</p>
<aside name="legend">
<p>The boxes with the torn edges here represent the blob of bytecode generated by
compiling some sub-clause of a control flow construct. So the &ldquo;condition
expression&rdquo; box is all of the instructions emitted when we compiled that
expression.</p>
</aside>
<p><span name="legend"></span></p><img src="image/jumping-back-and-forth/if-without-else.png" alt="Flowchart of the compiled bytecode of an if statement." />
<p>But we have a problem. When we&rsquo;re writing the <code>OP_JUMP_IF_FALSE</code> instruction&rsquo;s
operand, how do we know how far to jump? We haven&rsquo;t compiled the then branch
yet, so we don&rsquo;t know how much bytecode it contains.</p>
<p>To fix that, we use a classic trick called <strong>backpatching</strong>. We emit the jump
instruction first with a placeholder offset operand. We keep track of where that
half-finished instruction is. Next, we compile the then body. Once that&rsquo;s done,
we know how far to jump. So we go back and replace that placeholder offset with
the real one now that we can calculate it. Sort of like sewing a patch onto the
existing fabric of the compiled code.</p><img src="image/jumping-back-and-forth/patch.png" alt="A patch containing a number being sewn onto a sheet of bytecode." />
<p>We encode this trick into two helper functions.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitBytes</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">emitJump</span>(<span class="t">uint8_t</span> <span class="i">instruction</span>) {
  <span class="i">emitByte</span>(<span class="i">instruction</span>);
  <span class="i">emitByte</span>(<span class="n">0xff</span>);
  <span class="i">emitByte</span>(<span class="n">0xff</span>);
  <span class="k">return</span> <span class="i">currentChunk</span>()-&gt;<span class="i">count</span> - <span class="n">2</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitBytes</em>()</div>

<p>The first emits a bytecode instruction and writes a placeholder operand for the
jump offset. We pass in the opcode as an argument because later we&rsquo;ll have two
different instructions that use this helper. We use two bytes for the jump
offset operand. A 16-bit <span name="offset">offset</span> lets us jump over up
to 65,535 bytes of code, which should be plenty for our needs.</p>
<aside name="offset">
<p>Some instruction sets have separate &ldquo;long&rdquo; jump instructions that take larger
operands for when you need to jump a greater distance.</p>
</aside>
<p>The function returns the offset of the emitted instruction in the chunk. After
compiling the then branch, we take that offset and pass it to this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitConstant</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">patchJump</span>(<span class="t">int</span> <span class="i">offset</span>) {
  <span class="c">// -2 to adjust for the bytecode for the jump offset itself.</span>
  <span class="t">int</span> <span class="i">jump</span> = <span class="i">currentChunk</span>()-&gt;<span class="i">count</span> - <span class="i">offset</span> - <span class="n">2</span>;

  <span class="k">if</span> (<span class="i">jump</span> &gt; <span class="a">UINT16_MAX</span>) {
    <span class="i">error</span>(<span class="s">&quot;Too much code to jump over.&quot;</span>);
  }

  <span class="i">currentChunk</span>()-&gt;<span class="i">code</span>[<span class="i">offset</span>] = (<span class="i">jump</span> &gt;&gt; <span class="n">8</span>) &amp; <span class="n">0xff</span>;
  <span class="i">currentChunk</span>()-&gt;<span class="i">code</span>[<span class="i">offset</span> + <span class="n">1</span>] = <span class="i">jump</span> &amp; <span class="n">0xff</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitConstant</em>()</div>

<p>This goes back into the bytecode and replaces the operand at the given location
with the calculated jump offset. We call <code>patchJump()</code> right before we emit the
next instruction that we want the jump to land on, so it uses the current
bytecode count to determine how far to jump. In the case of an <code>if</code> statement,
that means right after we compile the then branch and before we compile the next
statement.</p>
<p>That&rsquo;s all we need at compile time. Let&rsquo;s define the new instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_PRINT,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_JUMP_IF_FALSE</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>Over in the VM, we get it working like so:</p>
<div class="codehilite"><pre class="insert-before">        break;
      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_JUMP_IF_FALSE</span>: {
        <span class="t">uint16_t</span> <span class="i">offset</span> = <span class="a">READ_SHORT</span>();
        <span class="k">if</span> (<span class="i">isFalsey</span>(<span class="i">peek</span>(<span class="n">0</span>))) <span class="i">vm</span>.<span class="i">ip</span> += <span class="i">offset</span>;
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>This is the first instruction we&rsquo;ve added that takes a 16-bit operand. To read
that from the chunk, we use a new macro.</p>
<div class="codehilite"><pre class="insert-before">#define READ_CONSTANT() (vm.chunk-&gt;constants.values[READ_BYTE()])
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#define READ_SHORT() \</span>
<span class="a">    (vm.ip += 2, (uint16_t)((vm.ip[-2] &lt;&lt; 8) | vm.ip[-1]))</span>
</pre><pre class="insert-after">#define READ_STRING() AS_STRING(READ_CONSTANT())
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>It yanks the next two bytes from the chunk and builds a 16-bit unsigned integer
out of them. As usual, we clean up our macro when we&rsquo;re done with it.</p>
<div class="codehilite"><pre class="insert-before">#undef READ_BYTE
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert"><span class="a">#undef READ_SHORT</span>
</pre><pre class="insert-after">#undef READ_CONSTANT
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>After reading the offset, we check the condition value on top of the stack.
<span name="if">If</span> it&rsquo;s falsey, we apply this jump offset to the <code>ip</code>.
Otherwise, we leave the <code>ip</code> alone and execution will automatically proceed to
the next instruction following the jump instruction.</p>
<p>In the case where the condition is falsey, we don&rsquo;t need to do any other work.
We&rsquo;ve offset the <code>ip</code>, so when the outer instruction dispatch loop turns again,
it will pick up execution at that new instruction, past all of the code in the
then branch.</p>
<aside name="if">
<p>I said we wouldn&rsquo;t use C&rsquo;s <code>if</code> statement to implement Lox&rsquo;s control flow, but
we do use one here to determine whether or not to offset the instruction
pointer. But we aren&rsquo;t really using C for <em>control flow</em>. If we wanted to, we
could do the same thing purely arithmetically. Let&rsquo;s assume we have a function
<code>falsey()</code> that takes a Lox Value and returns 1 if it&rsquo;s falsey or 0 otherwise.
Then we could implement the jump instruction like:</p>
<div class="codehilite"><pre><span class="k">case</span> <span class="a">OP_JUMP_IF_FALSE</span>: {
  <span class="t">uint16_t</span> <span class="i">offset</span> = <span class="a">READ_SHORT</span>();
  <span class="i">vm</span>.<span class="i">ip</span> += <span class="i">falsey</span>() * <span class="i">offset</span>;
  <span class="k">break</span>;
}
</pre></div>
<p>The <code>falsey()</code> function would probably use some control flow to handle the
different value types, but that&rsquo;s an implementation detail of that function and
doesn&rsquo;t affect how our VM does its own control flow.</p>
</aside>
<p>Note that the jump instruction doesn&rsquo;t pop the condition value off the stack. So
we aren&rsquo;t totally done here, since this leaves an extra value floating around on
the stack. We&rsquo;ll clean that up soon. Ignoring that for the moment, we do have a
working <code>if</code> statement in Lox now, with only one little instruction required to
support it at runtime in the VM.</p>
<h3><a href="#else-clauses" id="else-clauses"><small>23&#8202;.&#8202;1&#8202;.&#8202;1</small>Else clauses</a></h3>
<p>An <code>if</code> statement without support for <code>else</code> clauses is like Morticia Addams
without Gomez. So, after we compile the then branch, we look for an <code>else</code>
keyword. If we find one, we compile the else branch.</p>
<div class="codehilite"><pre class="insert-before">  patchJump(thenJump);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>ifStatement</em>()</div>
<pre class="insert">

  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_ELSE</span>)) <span class="i">statement</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>ifStatement</em>()</div>

<p>When the condition is falsey, we&rsquo;ll jump over the then branch. If there&rsquo;s an
else branch, the <code>ip</code> will land right at the beginning of its code. But that&rsquo;s
not enough, though. Here&rsquo;s the flow that leads to:</p><img src="image/jumping-back-and-forth/bad-else.png" alt="Flowchart of the compiled bytecode with the then branch incorrectly falling through to the else branch." />
<p>If the condition is truthy, we execute the then branch like we want. But after
that, execution rolls right on through into the else branch. Oops! When the
condition is true, after we run the then branch, we need to jump over the else
branch. That way, in either case, we only execute a single branch, like this:</p><img src="image/jumping-back-and-forth/if-else.png" alt="Flowchart of the compiled bytecode for an if with an else clause." />
<p>To implement that, we need another jump from the end of the then branch.</p>
<div class="codehilite"><pre class="insert-before">  statement();

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>ifStatement</em>()</div>
<pre class="insert">  <span class="t">int</span> <span class="i">elseJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP</span>);

</pre><pre class="insert-after">  patchJump(thenJump);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>ifStatement</em>()</div>

<p>We patch that offset after the end of the else body.</p>
<div class="codehilite"><pre class="insert-before">  if (match(TOKEN_ELSE)) statement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>ifStatement</em>()</div>
<pre class="insert">  <span class="i">patchJump</span>(<span class="i">elseJump</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>ifStatement</em>()</div>

<p>After executing the then branch, this jumps to the next statement after the else
branch. Unlike the other jump, this jump is unconditional. We always take it, so
we need another instruction that expresses that.</p>
<div class="codehilite"><pre class="insert-before">  OP_PRINT,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_JUMP</span>,
</pre><pre class="insert-after">  OP_JUMP_IF_FALSE,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>We interpret it like so:</p>
<div class="codehilite"><pre class="insert-before">        break;
      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_JUMP</span>: {
        <span class="t">uint16_t</span> <span class="i">offset</span> = <span class="a">READ_SHORT</span>();
        <span class="i">vm</span>.<span class="i">ip</span> += <span class="i">offset</span>;
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_JUMP_IF_FALSE: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Nothing too surprising here<span class="em">&mdash;</span>the only difference is that it doesn&rsquo;t check a
condition and always applies the offset.</p>
<p>We have then and else branches working now, so we&rsquo;re close. The last bit is to
clean up that condition value we left on the stack. Remember, each statement is
required to have zero stack effect<span class="em">&mdash;</span>after the statement is finished executing,
the stack should be as tall as it was before.</p>
<p>We could have the <code>OP_JUMP_IF_FALSE</code> instruction pop the condition itself, but
soon we&rsquo;ll use that same instruction for the logical operators where we don&rsquo;t
want the condition popped. Instead, we&rsquo;ll have the compiler emit a couple of
explicit <code>OP_POP</code> instructions when compiling an <code>if</code> statement. We need to take
care that every execution path through the generated code pops the condition.</p>
<p>When the condition is truthy, we pop it right before the code inside the then
branch.</p>
<div class="codehilite"><pre class="insert-before">  int thenJump = emitJump(OP_JUMP_IF_FALSE);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>ifStatement</em>()</div>
<pre class="insert">  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
</pre><pre class="insert-after">  statement();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>ifStatement</em>()</div>

<p>Otherwise, we pop it at the beginning of the else branch.</p>
<div class="codehilite"><pre class="insert-before">  patchJump(thenJump);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>ifStatement</em>()</div>
<pre class="insert">  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
</pre><pre class="insert-after">

  if (match(TOKEN_ELSE)) statement();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>ifStatement</em>()</div>

<p>This little instruction here also means that every <code>if</code> statement has an
implicit else branch even if the user didn&rsquo;t write an <code>else</code> clause. In the case
where they left it off, all the branch does is discard the condition value.</p>
<p>The full correct flow looks like this:</p><img src="image/jumping-back-and-forth/full-if-else.png" alt="Flowchart of the compiled bytecode including necessary pop instructions." />
<p>If you trace through, you can see that it always executes a single branch and
ensures the condition is popped first. All that remains is a little disassembler
support.</p>
<div class="codehilite"><pre class="insert-before">      return simpleInstruction(&quot;OP_PRINT&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_JUMP</span>:
      <span class="k">return</span> <span class="i">jumpInstruction</span>(<span class="s">&quot;OP_JUMP&quot;</span>, <span class="n">1</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_JUMP_IF_FALSE</span>:
      <span class="k">return</span> <span class="i">jumpInstruction</span>(<span class="s">&quot;OP_JUMP_IF_FALSE&quot;</span>, <span class="n">1</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>These two instructions have a new format with a 16-bit operand, so we add a new
utility function to disassemble them.</p>
<div class="codehilite"><div class="source-file"><em>debug.c</em><br>
add after <em>byteInstruction</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">jumpInstruction</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>, <span class="t">int</span> <span class="i">sign</span>,
                           <span class="t">Chunk</span>* <span class="i">chunk</span>, <span class="t">int</span> <span class="i">offset</span>) {
  <span class="t">uint16_t</span> <span class="i">jump</span> = (<span class="t">uint16_t</span>)(<span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span> + <span class="n">1</span>] &lt;&lt; <span class="n">8</span>);
  <span class="i">jump</span> |= <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span> + <span class="n">2</span>];
  <span class="i">printf</span>(<span class="s">&quot;%-16s %4d -&gt; %d</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">name</span>, <span class="i">offset</span>,
         <span class="i">offset</span> + <span class="n">3</span> + <span class="i">sign</span> * <span class="i">jump</span>);
  <span class="k">return</span> <span class="i">offset</span> + <span class="n">3</span>;
}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, add after <em>byteInstruction</em>()</div>

<p>There we go, that&rsquo;s one complete control flow construct. If this were an &rsquo;80s
movie, the montage music would kick in and the rest of the control flow syntax
would take care of itself. Alas, the <span name="80s">&rsquo;80s</span> are long over,
so we&rsquo;ll have to grind it out ourselves.</p>
<aside name="80s">
<p>My enduring love of Depeche Mode notwithstanding.</p>
</aside>
<h2><a href="#logical-operators" id="logical-operators"><small>23&#8202;.&#8202;2</small>Logical Operators</a></h2>
<p>You probably remember this from jlox, but the logical operators <code>and</code> and <code>or</code>
aren&rsquo;t just another pair of binary operators like <code>+</code> and <code>-</code>. Because they
short-circuit and may not evaluate their right operand depending on the value of
the left one, they work more like control flow expressions.</p>
<p>They&rsquo;re basically a little variation on an <code>if</code> statement with an <code>else</code> clause.
The easiest way to explain them is to just show you the compiler code and the
control flow it produces in the resulting bytecode. Starting with <code>and</code>, we hook
it into the expression parsing table here:</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_NUMBER]        = {number,   NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_AND</span>]           = {<span class="a">NULL</span>,     <span class="i">and_</span>,   <span class="a">PREC_AND</span>},
</pre><pre class="insert-after">  [TOKEN_CLASS]         = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>That hands off to a new parser function.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>defineVariable</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">and_</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
  <span class="t">int</span> <span class="i">endJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP_IF_FALSE</span>);

  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
  <span class="i">parsePrecedence</span>(<span class="a">PREC_AND</span>);

  <span class="i">patchJump</span>(<span class="i">endJump</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>defineVariable</em>()</div>

<p>At the point this is called, the left-hand side expression has already been
compiled. That means at runtime, its value will be on top of the stack. If that
value is falsey, then we know the entire <code>and</code> must be false, so we skip the
right operand and leave the left-hand side value as the result of the entire
expression. Otherwise, we discard the left-hand value and evaluate the right
operand which becomes the result of the whole <code>and</code> expression.</p>
<p>Those four lines of code right there produce exactly that. The flow looks like
this:</p><img src="image/jumping-back-and-forth/and.png" alt="Flowchart of the compiled bytecode of an 'and' expression." />
<p>Now you can see why <code>OP_JUMP_IF_FALSE</code> <span name="instr">leaves</span> the
value on top of the stack. When the left-hand side of the <code>and</code> is falsey, that
value sticks around to become the result of the entire expression.</p>
<aside name="instr">
<p>We&rsquo;ve got plenty of space left in our opcode range, so we could have separate
instructions for conditional jumps that implicitly pop and those that don&rsquo;t, I
suppose. But I&rsquo;m trying to keep things minimal for the book. In your bytecode
VM, it&rsquo;s worth exploring adding more specialized instructions and seeing how
they affect performance.</p>
</aside>
<h3><a href="#logical-or-operator" id="logical-or-operator"><small>23&#8202;.&#8202;2&#8202;.&#8202;1</small>Logical or operator</a></h3>
<p>The <code>or</code> operator is a little more complex. First we add it to the parse table.</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_NIL]           = {literal,  NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_OR</span>]            = {<span class="a">NULL</span>,     <span class="i">or_</span>,    <span class="a">PREC_OR</span>},
</pre><pre class="insert-after">  [TOKEN_PRINT]         = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>When that parser consumes an infix <code>or</code> token, it calls this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>number</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">or_</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
  <span class="t">int</span> <span class="i">elseJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP_IF_FALSE</span>);
  <span class="t">int</span> <span class="i">endJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP</span>);

  <span class="i">patchJump</span>(<span class="i">elseJump</span>);
  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);

  <span class="i">parsePrecedence</span>(<span class="a">PREC_OR</span>);
  <span class="i">patchJump</span>(<span class="i">endJump</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>number</em>()</div>

<p>In an <code>or</code> expression, if the left-hand side is <em>truthy</em>, then we skip over the
right operand. Thus we need to jump when a value is truthy. We could add a
separate instruction, but just to show how our compiler is free to map the
language&rsquo;s semantics to whatever instruction sequence it wants, I implemented it
in terms of the jump instructions we already have.</p>
<p>When the left-hand side is falsey, it does a tiny jump over the next statement.
That statement is an unconditional jump over the code for the right operand.
This little dance effectively does a jump when the value is truthy. The flow
looks like this:</p><img src="image/jumping-back-and-forth/or.png" alt="Flowchart of the compiled bytecode of a logical or expression." />
<p>If I&rsquo;m honest with you, this isn&rsquo;t the best way to do this. There are more
instructions to dispatch and more overhead. There&rsquo;s no good reason why <code>or</code>
should be slower than <code>and</code>. But it is kind of fun to see that it&rsquo;s possible to
implement both operators without adding any new instructions. Forgive me my
indulgences.</p>
<p>OK, those are the three <em>branching</em> constructs in Lox. By that, I mean, these
are the control flow features that only jump <em>forward</em> over code. Other
languages often have some kind of multi-way branching statement like <code>switch</code>
and maybe a conditional expression like <code>?:</code>, but Lox keeps it simple.</p>
<h2><a href="#while-statements" id="while-statements"><small>23&#8202;.&#8202;3</small>While Statements</a></h2>
<p>That takes us to the <em>looping</em> statements, which jump <em>backward</em> so that code
can be executed more than once. Lox only has two loop constructs, <code>while</code> and
<code>for</code>. A <code>while</code> loop is (much) simpler, so we start the party there.</p>
<div class="codehilite"><pre class="insert-before">    ifStatement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>statement</em>()</div>
<pre class="insert">  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_WHILE</span>)) {
    <span class="i">whileStatement</span>();
</pre><pre class="insert-after">  } else if (match(TOKEN_LEFT_BRACE)) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>statement</em>()</div>

<p>When we reach a <code>while</code> token, we call:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>printStatement</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">whileStatement</span>() {
  <span class="i">consume</span>(<span class="a">TOKEN_LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after &#39;while&#39;.&quot;</span>);
  <span class="i">expression</span>();
  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after condition.&quot;</span>);

  <span class="t">int</span> <span class="i">exitJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP_IF_FALSE</span>);
  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
  <span class="i">statement</span>();

  <span class="i">patchJump</span>(<span class="i">exitJump</span>);
  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>printStatement</em>()</div>

<p>Most of this mirrors <code>if</code> statements<span class="em">&mdash;</span>we compile the condition expression,
surrounded by mandatory parentheses. That&rsquo;s followed by a jump instruction that
skips over the subsequent body statement if the condition is falsey.</p>
<p>We patch the jump after compiling the body and take care to <span
name="pop">pop</span> the condition value from the stack on either path. The
only difference from an <code>if</code> statement is the loop. That looks like this:</p>
<aside name="pop">
<p>Really starting to second-guess my decision to use the same jump instructions
for the logical operators.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  statement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>whileStatement</em>()</div>
<pre class="insert">  <span class="i">emitLoop</span>(<span class="i">loopStart</span>);
</pre><pre class="insert-after">

  patchJump(exitJump);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>whileStatement</em>()</div>

<p>After the body, we call this function to emit a &ldquo;loop&rdquo; instruction. That
instruction needs to know how far back to jump. When jumping forward, we had to
emit the instruction in two stages since we didn&rsquo;t know how far we were going to
jump until after we emitted the jump instruction. We don&rsquo;t have that problem
now. We&rsquo;ve already compiled the point in code that we want to jump back to<span class="em">&mdash;</span>it&rsquo;s right before the condition expression.</p>
<p>All we need to do is capture that location as we compile it.</p>
<div class="codehilite"><pre class="insert-before">static void whileStatement() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>whileStatement</em>()</div>
<pre class="insert">  <span class="t">int</span> <span class="i">loopStart</span> = <span class="i">currentChunk</span>()-&gt;<span class="i">count</span>;
</pre><pre class="insert-after">  consume(TOKEN_LEFT_PAREN, &quot;Expect '(' after 'while'.&quot;);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>whileStatement</em>()</div>

<p>After executing the body of a <code>while</code> loop, we jump all the way back to before
the condition. That way, we re-evaluate the condition expression on each
iteration. We store the chunk&rsquo;s current instruction count in <code>loopStart</code> to
record the offset in the bytecode right before the condition expression we&rsquo;re
about to compile. Then we pass that into this helper function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitBytes</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">emitLoop</span>(<span class="t">int</span> <span class="i">loopStart</span>) {
  <span class="i">emitByte</span>(<span class="a">OP_LOOP</span>);

  <span class="t">int</span> <span class="i">offset</span> = <span class="i">currentChunk</span>()-&gt;<span class="i">count</span> - <span class="i">loopStart</span> + <span class="n">2</span>;
  <span class="k">if</span> (<span class="i">offset</span> &gt; <span class="a">UINT16_MAX</span>) <span class="i">error</span>(<span class="s">&quot;Loop body too large.&quot;</span>);

  <span class="i">emitByte</span>((<span class="i">offset</span> &gt;&gt; <span class="n">8</span>) &amp; <span class="n">0xff</span>);
  <span class="i">emitByte</span>(<span class="i">offset</span> &amp; <span class="n">0xff</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitBytes</em>()</div>

<p>It&rsquo;s a bit like <code>emitJump()</code> and <code>patchJump()</code> combined. It emits a new loop
instruction, which unconditionally jumps <em>backwards</em> by a given offset. Like the
jump instructions, after that we have a 16-bit operand. We calculate the offset
from the instruction we&rsquo;re currently at to the <code>loopStart</code> point that we want to
jump back to. The <code>+ 2</code> is to take into account the size of the <code>OP_LOOP</code>
instruction&rsquo;s own operands which we also need to jump over.</p>
<p>From the VM&rsquo;s perspective, there really is no semantic difference between
<code>OP_LOOP</code> and <code>OP_JUMP</code>. Both just add an offset to the <code>ip</code>. We could have used
a single instruction for both and given it a signed offset operand. But I
figured it was a little easier to sidestep the annoying bit twiddling required
to manually pack a signed 16-bit integer into two bytes, and we&rsquo;ve got the
opcode space available, so why not use it?</p>
<p>The new instruction is here:</p>
<div class="codehilite"><pre class="insert-before">  OP_JUMP_IF_FALSE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_LOOP</span>,
</pre><pre class="insert-after">  OP_RETURN,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>And in the VM, we implement it thusly:</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_LOOP</span>: {
        <span class="t">uint16_t</span> <span class="i">offset</span> = <span class="a">READ_SHORT</span>();
        <span class="i">vm</span>.<span class="i">ip</span> -= <span class="i">offset</span>;
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>The only difference from <code>OP_JUMP</code> is a subtraction instead of an addition.
Disassembly is similar too.</p>
<div class="codehilite"><pre class="insert-before">      return jumpInstruction(&quot;OP_JUMP_IF_FALSE&quot;, 1, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_LOOP</span>:
      <span class="k">return</span> <span class="i">jumpInstruction</span>(<span class="s">&quot;OP_LOOP&quot;</span>, -<span class="n">1</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_RETURN:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>That&rsquo;s our <code>while</code> statement. It contains two jumps<span class="em">&mdash;</span>a conditional forward one
to escape the loop when the condition is not met, and an unconditional loop
backward after we have executed the body. The flow looks like this:</p><img src="image/jumping-back-and-forth/while.png" alt="Flowchart of the compiled bytecode of a while statement." />
<h2><a href="#for-statements" id="for-statements"><small>23&#8202;.&#8202;4</small>For Statements</a></h2>
<p>The other looping statement in Lox is the venerable <code>for</code> loop, inherited from
C. It&rsquo;s got a lot more going on with it compared to a <code>while</code> loop. It has three
clauses, all of which are optional:</p>
<p><span name="detail"></span></p>
<ul>
<li>
<p>The initializer can be a variable declaration or an expression. It runs once
at the beginning of the statement.</p>
</li>
<li>
<p>The condition clause is an expression. Like in a <code>while</code> loop, we exit the
loop when it evaluates to something falsey.</p>
</li>
<li>
<p>The increment expression runs once at the end of each loop iteration.</p>
</li>
</ul>
<aside name="detail">
<p>If you want a refresher, the corresponding chapter in part II goes through the
semantics <a href="control-flow.html#for-loops">in more detail</a>.</p>
</aside>
<p>In jlox, the parser desugared a <code>for</code> loop to a synthesized AST for a <code>while</code>
loop with some extra stuff before it and at the end of the body. We&rsquo;ll do
something similar, though we won&rsquo;t go through anything like an AST. Instead,
our bytecode compiler will use the jump and loop instructions we already have.</p>
<p>We&rsquo;ll work our way through the implementation a piece at a time, starting with
the <code>for</code> keyword.</p>
<div class="codehilite"><pre class="insert-before">    printStatement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>statement</em>()</div>
<pre class="insert">  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_FOR</span>)) {
    <span class="i">forStatement</span>();
</pre><pre class="insert-after">  } else if (match(TOKEN_IF)) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>statement</em>()</div>

<p>It calls a helper function. If we only supported <code>for</code> loops with empty clauses
like <code>for (;;)</code>, then we could implement it like this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>expressionStatement</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">forStatement</span>() {
  <span class="i">consume</span>(<span class="a">TOKEN_LEFT_PAREN</span>, <span class="s">&quot;Expect &#39;(&#39; after &#39;for&#39;.&quot;</span>);
  <span class="i">consume</span>(<span class="a">TOKEN_SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39;.&quot;</span>);

  <span class="t">int</span> <span class="i">loopStart</span> = <span class="i">currentChunk</span>()-&gt;<span class="i">count</span>;
  <span class="i">consume</span>(<span class="a">TOKEN_SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39;.&quot;</span>);
  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after for clauses.&quot;</span>);

  <span class="i">statement</span>();
  <span class="i">emitLoop</span>(<span class="i">loopStart</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expressionStatement</em>()</div>

<p>There&rsquo;s a bunch of mandatory punctuation at the top. Then we compile the body.
Like we did for <code>while</code> loops, we record the bytecode offset at the top of the
body and emit a loop to jump back to that point after it. We&rsquo;ve got a working
implementation of <span name="infinite">infinite</span> loops now.</p>
<aside name="infinite">
<p>Alas, without <code>return</code> statements, there isn&rsquo;t any way to terminate it short of
a runtime error.</p>
</aside>
<h3><a href="#initializer-clause" id="initializer-clause"><small>23&#8202;.&#8202;4&#8202;.&#8202;1</small>Initializer clause</a></h3>
<p>Now we&rsquo;ll add the first clause, the initializer. It executes only once, before
the body, so compiling is straightforward.</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_LEFT_PAREN, &quot;Expect '(' after 'for'.&quot;);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>forStatement</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_SEMICOLON</span>)) {
    <span class="c">// No initializer.</span>
  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_VAR</span>)) {
    <span class="i">varDeclaration</span>();
  } <span class="k">else</span> {
    <span class="i">expressionStatement</span>();
  }
</pre><pre class="insert-after">

  int loopStart = currentChunk()-&gt;count;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>forStatement</em>(), replace 1 line</div>

<p>The syntax is a little complex since we allow either a variable declaration or
an expression. We use the presence of the <code>var</code> keyword to tell which we have.
For the expression case, we call <code>expressionStatement()</code> instead of
<code>expression()</code>. That looks for a semicolon, which we need here too, and also
emits an <code>OP_POP</code> instruction to discard the value. We don&rsquo;t want the
initializer to leave anything on the stack.</p>
<p>If a <code>for</code> statement declares a variable, that variable should be scoped to the
loop body. We ensure that by wrapping the whole statement in a scope.</p>
<div class="codehilite"><pre class="insert-before">static void forStatement() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">  <span class="i">beginScope</span>();
</pre><pre class="insert-after">  consume(TOKEN_LEFT_PAREN, &quot;Expect '(' after 'for'.&quot;);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>forStatement</em>()</div>

<p>Then we close it at the end.</p>
<div class="codehilite"><pre class="insert-before">  emitLoop(loopStart);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">  <span class="i">endScope</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>forStatement</em>()</div>

<h3><a href="#condition-clause" id="condition-clause"><small>23&#8202;.&#8202;4&#8202;.&#8202;2</small>Condition clause</a></h3>
<p>Next, is the condition expression that can be used to exit the loop.</p>
<div class="codehilite"><pre class="insert-before">  int loopStart = currentChunk()-&gt;count;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>forStatement</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">int</span> <span class="i">exitJump</span> = -<span class="n">1</span>;
  <span class="k">if</span> (!<span class="i">match</span>(<span class="a">TOKEN_SEMICOLON</span>)) {
    <span class="i">expression</span>();
    <span class="i">consume</span>(<span class="a">TOKEN_SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after loop condition.&quot;</span>);

    <span class="c">// Jump out of the loop if the condition is false.</span>
    <span class="i">exitJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP_IF_FALSE</span>);
    <span class="i">emitByte</span>(<span class="a">OP_POP</span>); <span class="c">// Condition.</span>
  }

</pre><pre class="insert-after">  consume(TOKEN_RIGHT_PAREN, &quot;Expect ')' after for clauses.&quot;);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>forStatement</em>(), replace 1 line</div>

<p>Since the clause is optional, we need to see if it&rsquo;s actually present. If the
clause is omitted, the next token must be a semicolon, so we look for that to
tell. If there isn&rsquo;t a semicolon, there must be a condition expression.</p>
<p>In that case, we compile it. Then, just like with while, we emit a conditional
jump that exits the loop if the condition is falsey. Since the jump leaves the
value on the stack, we pop it before executing the body. That ensures we discard
the value when the condition is true.</p>
<p>After the loop body, we need to patch that jump.</p>
<div class="codehilite"><pre class="insert-before">  emitLoop(loopStart);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>forStatement</em>()</div>
<pre class="insert">

  <span class="k">if</span> (<span class="i">exitJump</span> != -<span class="n">1</span>) {
    <span class="i">patchJump</span>(<span class="i">exitJump</span>);
    <span class="i">emitByte</span>(<span class="a">OP_POP</span>); <span class="c">// Condition.</span>
  }

</pre><pre class="insert-after">  endScope();
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>forStatement</em>()</div>

<p>We do this only when there is a condition clause. If there isn&rsquo;t, there&rsquo;s no
jump to patch and no condition value on the stack to pop.</p>
<h3><a href="#increment-clause" id="increment-clause"><small>23&#8202;.&#8202;4&#8202;.&#8202;3</small>Increment clause</a></h3>
<p>I&rsquo;ve saved the best for last, the increment clause. It&rsquo;s pretty convoluted. It
appears textually before the body, but executes <em>after</em> it. If we parsed to an
AST and generated code in a separate pass, we could simply traverse into and
compile the <code>for</code> statement AST&rsquo;s body field before its increment clause.</p>
<p>Unfortunately, we can&rsquo;t compile the increment clause later, since our compiler
only makes a single pass over the code. Instead, we&rsquo;ll <em>jump over</em> the
increment, run the body, jump <em>back</em> up to the increment, run it, and then go to
the next iteration.</p>
<p>I know, a little weird, but hey, it beats manually managing ASTs in memory in C,
right? Here&rsquo;s the code:</p>
<div class="codehilite"><pre class="insert-before">  }

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>forStatement</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (!<span class="i">match</span>(<span class="a">TOKEN_RIGHT_PAREN</span>)) {
    <span class="t">int</span> <span class="i">bodyJump</span> = <span class="i">emitJump</span>(<span class="a">OP_JUMP</span>);
    <span class="t">int</span> <span class="i">incrementStart</span> = <span class="i">currentChunk</span>()-&gt;<span class="i">count</span>;
    <span class="i">expression</span>();
    <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
    <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after for clauses.&quot;</span>);

    <span class="i">emitLoop</span>(<span class="i">loopStart</span>);
    <span class="i">loopStart</span> = <span class="i">incrementStart</span>;
    <span class="i">patchJump</span>(<span class="i">bodyJump</span>);
  }
</pre><pre class="insert-after">

  statement();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>forStatement</em>(), replace 1 line</div>

<p>Again, it&rsquo;s optional. Since this is the last clause, when omitted, the next
token will be the closing parenthesis. When an increment is present, we need to
compile it now, but it shouldn&rsquo;t execute yet. So, first, we emit an
unconditional jump that hops over the increment clause&rsquo;s code to the body of the
loop.</p>
<p>Next, we compile the increment expression itself. This is usually an assignment.
Whatever it is, we only execute it for its side effect, so we also emit a pop to
discard its value.</p>
<p>The last part is a little tricky. First, we emit a loop instruction. This is the
main loop that takes us back to the top of the <code>for</code> loop<span class="em">&mdash;</span>right before the
condition expression if there is one. That loop happens right after the
increment, since the increment executes at the end of each loop iteration.</p>
<p>Then we change <code>loopStart</code> to point to the offset where the increment expression
begins. Later, when we emit the loop instruction after the body statement, this
will cause it to jump up to the <em>increment</em> expression instead of the top of the
loop like it does when there is no increment. This is how we weave the
increment in to run after the body.</p>
<p>It&rsquo;s convoluted, but it all works out. A complete loop with all the clauses
compiles to a flow like this:</p><img src="image/jumping-back-and-forth/for.png" alt="Flowchart of the compiled bytecode of a for statement." />
<p>As with implementing <code>for</code> loops in jlox, we didn&rsquo;t need to touch the runtime.
It all gets compiled down to primitive control flow operations the VM already
supports. In this chapter, we&rsquo;ve taken a big <span name="leap">leap</span>
forward<span class="em">&mdash;</span>clox is now Turing complete. We&rsquo;ve also covered quite a bit of new
syntax: three statements and two expression forms. Even so, it only took three
new simple instructions. That&rsquo;s a pretty good effort-to-reward ratio for the
architecture of our VM.</p>
<aside name="leap">
<p>I couldn&rsquo;t resist the pun. I regret nothing.</p>
</aside>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>In addition to <code>if</code> statements, most C-family languages have a multi-way
<code>switch</code> statement. Add one to clox. The grammar is:</p>
<div class="codehilite"><pre><span class="i">switchStmt</span>     → <span class="s">&quot;switch&quot;</span> <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span>
                 <span class="s">&quot;{&quot;</span> <span class="i">switchCase</span>* <span class="i">defaultCase</span>? <span class="s">&quot;}&quot;</span> ;
<span class="i">switchCase</span>     → <span class="s">&quot;case&quot;</span> <span class="i">expression</span> <span class="s">&quot;:&quot;</span> <span class="i">statement</span>* ;
<span class="i">defaultCase</span>    → <span class="s">&quot;default&quot;</span> <span class="s">&quot;:&quot;</span> <span class="i">statement</span>* ;
</pre></div>
<p>To execute a <code>switch</code> statement, first evaluate the parenthesized switch
value expression. Then walk the cases. For each case, evaluate its value
expression. If the case value is equal to the switch value, execute the
statements under the case and then exit the <code>switch</code> statement. Otherwise,
try the next case. If no case matches and there is a <code>default</code> clause,
execute its statements.</p>
<p>To keep things simpler, we&rsquo;re omitting fallthrough and <code>break</code> statements.
Each case automatically jumps to the end of the switch statement after its
statements are done.</p>
</li>
<li>
<p>In jlox, we had a challenge to add support for <code>break</code> statements. This
time, let&rsquo;s do <code>continue</code>:</p>
<div class="codehilite"><pre><span class="i">continueStmt</span>   → <span class="s">&quot;continue&quot;</span> <span class="s">&quot;;&quot;</span> ;
</pre></div>
<p>A <code>continue</code> statement jumps directly to the top of the nearest enclosing
loop, skipping the rest of the loop body. Inside a <code>for</code> loop, a <code>continue</code>
jumps to the increment clause, if there is one. It&rsquo;s a compile-time error to
have a <code>continue</code> statement not enclosed in a loop.</p>
<p>Make sure to think about scope. What should happen to local variables
declared inside the body of the loop or in blocks nested inside the loop
when a <code>continue</code> is executed?</p>
</li>
<li>
<p>Control flow constructs have been mostly unchanged since Algol 68. Language
evolution since then has focused on making code more declarative and high
level, so imperative control flow hasn&rsquo;t gotten much attention.</p>
<p>For fun, try to invent a useful novel control flow feature for Lox. It can
be a refinement of an existing form or something entirely new. In practice,
it&rsquo;s hard to come up with something useful enough at this low expressiveness
level to outweigh the cost of forcing a user to learn an unfamiliar notation
and behavior, but it&rsquo;s a good chance to practice your design skills.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Considering Goto Harmful</a></h2>
<p>Discovering that all of our beautiful structured control flow in Lox is actually
compiled to raw unstructured jumps is like the moment in Scooby Doo when the
monster rips the mask off their face. It was goto all along! Except in this
case, the monster is <em>under</em> the mask. We all know goto is evil. But<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>why?</p>
<p>It is true that you can write outrageously unmaintainable code using goto. But I
don&rsquo;t think most programmers around today have seen that first hand. It&rsquo;s been a
long time since that style was common. These days, it&rsquo;s a boogie man we invoke
in scary stories around the campfire.</p>
<p>The reason we rarely confront that monster in person is because Edsger Dijkstra
slayed it with his famous letter &ldquo;Go To Statement Considered Harmful&rdquo;, published
in <em>Communications of the ACM</em> (March, 1968). Debate around structured
programming had been fierce for some time with adherents on both sides, but I
think Dijkstra deserves the most credit for effectively ending it. Most new
languages today have no unstructured jump statements.</p>
<p>A one-and-a-half page letter that almost single-handedly destroyed a language
feature must be pretty impressive stuff. If you haven&rsquo;t read it, I encourage you
to do so. It&rsquo;s a seminal piece of computer science lore, one of our tribe&rsquo;s
ancestral songs. Also, it&rsquo;s a nice, short bit of practice for reading academic
CS <span name="style">writing</span>, which is a useful skill to develop.</p>
<aside name="style">
<p>That is, if you can get past Dijkstra&rsquo;s insufferable faux-modest
self-aggrandizing writing style:</p>
<blockquote>
<p>More recently I discovered why the use of the go to statement has such
disastrous effects. <span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>At that time I did not attach too much importance to
this discovery; I now submit my considerations for publication because in very
recent discussions in which the subject turned up, I have been urged to do so.</p>
</blockquote>
<p>Ah, yet another one of my many discoveries. I couldn&rsquo;t even be bothered to write
it up until the clamoring masses begged me to.</p>
</aside>
<p>I&rsquo;ve read it through a number of times, along with a few critiques, responses,
and commentaries. I ended up with mixed feelings, at best. At a very high level,
I&rsquo;m with him. His general argument is something like this:</p>
<ol>
<li>
<p>As programmers, we write programs<span class="em">&mdash;</span>static text<span class="em">&mdash;</span>but what we care about
is the actual running program<span class="em">&mdash;</span>its dynamic behavior.</p>
</li>
<li>
<p>We&rsquo;re better at reasoning about static things than dynamic things. (He
doesn&rsquo;t provide any evidence to support this claim, but I accept it.)</p>
</li>
<li>
<p>Thus, the more we can make the dynamic execution of the program reflect its
textual structure, the better.</p>
</li>
</ol>
<p>This is a good start. Drawing our attention to the separation between the code
we write and the code as it runs inside the machine is an interesting insight.
Then he tries to define a &ldquo;correspondence&rdquo; between program text and execution.
For someone who spent literally his entire career advocating greater rigor in
programming, his definition is pretty hand-wavey. He says:</p>
<blockquote>
<p>Let us now consider how we can characterize the progress of a process. (You
may think about this question in a very concrete manner: suppose that a
process, considered as a time succession of actions, is stopped after an
arbitrary action, what data do we have to fix in order that we can redo the
process until the very same point?)</p>
</blockquote>
<p>Imagine it like this. You have two computers with the same program running on
the exact same inputs<span class="em">&mdash;</span>so totally deterministic. You pause one of them at an
arbitrary point in its execution. What data would you need to send to the other
computer to be able to stop it exactly as far along as the first one was?</p>
<p>If your program allows only simple statements like assignment, it&rsquo;s easy. You
just need to know the point after the last statement you executed. Basically a
breakpoint, the <code>ip</code> in our VM, or the line number in an error message. Adding
branching control flow like <code>if</code> and <code>switch</code> doesn&rsquo;t add any more to this. Even
if the marker points inside a branch, we can still tell where we are.</p>
<p>Once you add function calls, you need something more. You could have paused the
first computer in the middle of a function, but that function may be called from
multiple places. To pause the second machine at exactly the same point in <em>the
entire program&rsquo;s</em> execution, you need to pause it on the <em>right</em> call to that
function.</p>
<p>So you need to know not just the current statement, but, for function calls that
haven&rsquo;t returned yet, you need to know the locations of the callsites. In other
words, a call stack, though I don&rsquo;t think that term existed when Dijkstra wrote
this. Groovy.</p>
<p>He notes that loops make things harder. If you pause in the middle of a loop
body, you don&rsquo;t know how many iterations have run. So he says you also need to
keep an iteration count. And, since loops can nest, you need a stack of those
(presumably interleaved with the call stack pointers since you can be in loops
in outer calls too).</p>
<p>This is where it gets weird. So we&rsquo;re really building to something now, and you
expect him to explain how goto breaks all of this. Instead, he just says:</p>
<blockquote>
<p>The unbridled use of the go to statement has an immediate consequence that it
becomes terribly hard to find a meaningful set of coordinates in which to
describe the process progress.</p>
</blockquote>
<p>He doesn&rsquo;t prove that this is hard, or say why. He just says it. He does say
that one approach is unsatisfactory:</p>
<blockquote>
<p>With the go to statement one can, of course, still describe the progress
uniquely by a counter counting the number of actions performed since program
start (viz. a kind of normalized clock). The difficulty is that such a
coordinate, although unique, is utterly unhelpful.</p>
</blockquote>
<p>But<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>that&rsquo;s effectively what loop counters do, and he was fine with those.
It&rsquo;s not like every loop is a simple &ldquo;for every integer from 0 to 10&rdquo;
incrementing count. Many are <code>while</code> loops with complex conditionals.</p>
<p>Taking an example close to home, consider the core bytecode execution loop at
the heart of clox. Dijkstra argues that that loop is tractable because we can
simply count how many times the loop has run to reason about its progress. But
that loop runs once for each executed instruction in some user&rsquo;s compiled Lox
program. Does knowing that it executed 6,201 bytecode instructions really tell
us VM maintainers <em>anything</em> edifying about the state of the interpreter?</p>
<p>In fact, this particular example points to a deeper truth. Böhm and Jacopini
<a href="https://en.wikipedia.org/wiki/Structured_program_theorem">proved</a> that <em>any</em> control flow using goto can be transformed into one using
just sequencing, loops, and branches. Our bytecode interpreter loop is a living
example of that proof: it implements the unstructured control flow of the clox
bytecode instruction set without using any gotos itself.</p>
<p>That seems to offer a counter-argument to Dijkstra&rsquo;s claim: you <em>can</em> define a
correspondence for a program using gotos by transforming it to one that doesn&rsquo;t
and then use the correspondence from that program, which<span class="em">&mdash;</span>according to him<span class="em">&mdash;</span>is acceptable because it uses only branches and loops.</p>
<p>But, honestly, my argument here is also weak. I think both of us are basically
doing pretend math and using fake logic to make what should be an empirical,
human-centered argument. Dijkstra is right that some code using goto is really
bad. Much of that could and should be turned into clearer code by using
structured control flow.</p>
<p>By eliminating goto completely from languages, you&rsquo;re definitely prevented from
writing bad code using gotos. It may be that forcing users to use structured
control flow and making it an uphill battle to write goto-like code using those
constructs is a net win for all of our productivity.</p>
<p>But I do wonder sometimes if we threw out the baby with the bathwater. In the
absence of goto, we often resort to more complex structured patterns. The
&ldquo;switch inside a loop&rdquo; is a classic one. Another is using a guard variable to
exit out of a series of nested loops:</p><span name="break">
</span>
<div class="codehilite"><pre><span class="c">// See if the matrix contains a zero.</span>
<span class="t">bool</span> <span class="i">found</span> = <span class="k">false</span>;
<span class="k">for</span> (<span class="t">int</span> <span class="i">x</span> = <span class="n">0</span>; <span class="i">x</span> &lt; <span class="i">xSize</span>; <span class="i">x</span>++) {
  <span class="k">for</span> (<span class="t">int</span> <span class="i">y</span> = <span class="n">0</span>; <span class="i">y</span> &lt; <span class="i">ySize</span>; <span class="i">y</span>++) {
    <span class="k">for</span> (<span class="t">int</span> <span class="i">z</span> = <span class="n">0</span>; <span class="i">z</span> &lt; <span class="i">zSize</span>; <span class="i">z</span>++) {
      <span class="k">if</span> (<span class="i">matrix</span>[<span class="i">x</span>][<span class="i">y</span>][<span class="i">z</span>] == <span class="n">0</span>) {
        <span class="i">printf</span>(<span class="s">&quot;found&quot;</span>);
        <span class="i">found</span> = <span class="k">true</span>;
        <span class="k">break</span>;
      }
    }
    <span class="k">if</span> (<span class="i">found</span>) <span class="k">break</span>;
  }
  <span class="k">if</span> (<span class="i">found</span>) <span class="k">break</span>;
}
</pre></div>
<p>Is that really better than:</p>
<div class="codehilite"><pre><span class="k">for</span> (<span class="t">int</span> <span class="i">x</span> = <span class="n">0</span>; <span class="i">x</span> &lt; <span class="i">xSize</span>; <span class="i">x</span>++) {
  <span class="k">for</span> (<span class="t">int</span> <span class="i">y</span> = <span class="n">0</span>; <span class="i">y</span> &lt; <span class="i">ySize</span>; <span class="i">y</span>++) {
    <span class="k">for</span> (<span class="t">int</span> <span class="i">z</span> = <span class="n">0</span>; <span class="i">z</span> &lt; <span class="i">zSize</span>; <span class="i">z</span>++) {
      <span class="k">if</span> (<span class="i">matrix</span>[<span class="i">x</span>][<span class="i">y</span>][<span class="i">z</span>] == <span class="n">0</span>) {
        <span class="i">printf</span>(<span class="s">&quot;found&quot;</span>);
        <span class="k">goto</span> <span class="i">done</span>;
      }
    }
  }
}
<span class="i">done</span>:
</pre></div>
<aside name="break">
<p>You could do this without <code>break</code> statements<span class="em">&mdash;</span>themselves a limited goto-ish
construct<span class="em">&mdash;</span>by inserting <code>!found &amp;&amp;</code> at the beginning of the condition clause
of each loop.</p>
</aside>
<p>I guess what I really don&rsquo;t like is that we&rsquo;re making language design and
engineering decisions today based on fear. Few people today have any subtle
understanding of the problems and benefits of goto. Instead, we just think it&rsquo;s
&ldquo;considered harmful&rdquo;. Personally, I&rsquo;ve never found dogma a good starting place
for quality creative work.</p>
</div>

<footer>
<a href="calls-and-functions.html" class="next">
  Next Chapter: &ldquo;Calls and Functions&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/local-variables.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Local Variables &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Local Variables<small>22</small></a></h3>

<ul>
    <li><a href="#representing-local-variables"><small>22.1</small> Representing Local Variables</a></li>
    <li><a href="#block-statements"><small>22.2</small> Block Statements</a></li>
    <li><a href="#declaring-local-variables"><small>22.3</small> Declaring Local Variables</a></li>
    <li><a href="#using-locals"><small>22.4</small> Using Locals</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="global-variables.html" title="Global Variables" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="jumping-back-and-forth.html" title="Jumping Back and Forth" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="global-variables.html" title="Global Variables" class="prev">←</a>
<a href="jumping-back-and-forth.html" title="Jumping Back and Forth" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Local Variables<small>22</small></a></h3>

<ul>
    <li><a href="#representing-local-variables"><small>22.1</small> Representing Local Variables</a></li>
    <li><a href="#block-statements"><small>22.2</small> Block Statements</a></li>
    <li><a href="#declaring-local-variables"><small>22.3</small> Declaring Local Variables</a></li>
    <li><a href="#using-locals"><small>22.4</small> Using Locals</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="global-variables.html" title="Global Variables" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="jumping-back-and-forth.html" title="Jumping Back and Forth" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">22</div>
  <h1>Local Variables</h1>

<blockquote>
<p>And as imagination bodies forth<br />
The forms of things unknown, the poet&rsquo;s pen<br />
Turns them to shapes and gives to airy nothing<br />
A local habitation and a name.</p>
<p><cite>William Shakespeare, <em>A Midsummer Night&rsquo;s Dream</em></cite></p>
</blockquote>
<p>The <a href="global-variables.html">last chapter</a> introduced variables to clox, but only of the <span
name="global">global</span> variety. In this chapter, we&rsquo;ll extend that to
support blocks, block scope, and local variables. In jlox, we managed to pack
all of that and globals into one chapter. For clox, that&rsquo;s two chapters worth of
work partially because, frankly, everything takes more effort in C.</p>
<aside name="global">
<p>There&rsquo;s probably some dumb &ldquo;think globally, act locally&rdquo; joke here, but I&rsquo;m
struggling to find it.</p>
</aside>
<p>But an even more important reason is that our approach to local variables will
be quite different from how we implemented globals. Global variables are late
bound in Lox. &ldquo;Late&rdquo; in this context means &ldquo;resolved after compile time&rdquo;. That&rsquo;s
good for keeping the compiler simple, but not great for performance. Local
variables are one of the most-used <span name="params">parts</span> of a
language. If locals are slow, <em>everything</em> is slow. So we want a strategy for
local variables that&rsquo;s as efficient as possible.</p>
<aside name="params">
<p>Function parameters are also heavily used. They work like local variables too,
so we&rsquo;ll use the same implementation technique for them.</p>
</aside>
<p>Fortunately, lexical scoping is here to help us. As the name implies, lexical
scope means we can resolve a local variable just by looking at the text of the
program<span class="em">&mdash;</span>locals are <em>not</em> late bound. Any processing work we do in the
compiler is work we <em>don&rsquo;t</em> have to do at runtime, so our implementation of
local variables will lean heavily on the compiler.</p>
<h2><a href="#representing-local-variables" id="representing-local-variables"><small>22&#8202;.&#8202;1</small>Representing Local Variables</a></h2>
<p>The nice thing about hacking on a programming language in modern times is
there&rsquo;s a long lineage of other languages to learn from. So how do C and Java
manage their local variables? Why, on the stack, of course! They typically use
the native stack mechanisms supported by the chip and OS. That&rsquo;s a little too
low level for us, but inside the virtual world of clox, we have our own stack we
can use.</p>
<p>Right now, we only use it for holding on to <strong>temporaries</strong><span class="em">&mdash;</span>short-lived blobs
of data that we need to remember while computing an expression. As long as we
don&rsquo;t get in the way of those, we can stuff our local variables onto the stack
too. This is great for performance. Allocating space for a new local requires
only incrementing the <code>stackTop</code> pointer, and freeing is likewise a decrement.
Accessing a variable from a known stack slot is an indexed array lookup.</p>
<p>We do need to be careful, though. The VM expects the stack to behave like, well,
a stack. We have to be OK with allocating new locals only on the top of the
stack, and we have to accept that we can discard a local only when nothing is
above it on the stack. Also, we need to make sure temporaries don&rsquo;t interfere.</p>
<p>Conveniently, the design of Lox is in <span name="harmony">harmony</span> with
these constraints. New locals are always created by declaration statements.
Statements don&rsquo;t nest inside expressions, so there are never any temporaries on
the stack when a statement begins executing. Blocks are strictly nested. When a
block ends, it always takes the innermost, most recently declared locals with
it. Since those are also the locals that came into scope last, they should be on
top of the stack where we need them.</p>
<aside name="harmony">
<p>This alignment obviously isn&rsquo;t coincidental. I designed Lox to be amenable to
single-pass compilation to stack-based bytecode. But I didn&rsquo;t have to tweak the
language too much to fit in those restrictions. Most of its design should feel
pretty natural.</p>
<p>This is in large part because the history of languages is deeply tied to
single-pass compilation and<span class="em">&mdash;</span>to a lesser degree<span class="em">&mdash;</span>stack-based architectures.
Lox&rsquo;s block scoping follows a tradition stretching back to BCPL. As programmers,
our intuition of what&rsquo;s &ldquo;normal&rdquo; in a language is informed even today by the
hardware limitations of yesteryear.</p>
</aside>
<p>Step through this example program and watch how the local variables come in and
go out of scope:</p><img src="image/local-variables/scopes.png" alt="A series of local variables come into and out of scope in a stack-like fashion." />
<p>See how they fit a stack perfectly? It seems that the stack will work for
storing locals at runtime. But we can go further than that. Not only do we know
<em>that</em> they will be on the stack, but we can even pin down precisely <em>where</em>
they will be on the stack. Since the compiler knows exactly which local
variables are in scope at any point in time, it can effectively simulate the
stack during compilation and note <span name="fn">where</span> in the stack each
variable lives.</p>
<p>We&rsquo;ll take advantage of this by using these stack offsets as operands for the
bytecode instructions that read and store local variables. This makes working
with locals deliciously fast<span class="em">&mdash;</span>as simple as indexing into an array.</p>
<aside name="fn">
<p>In this chapter, locals start at the bottom of the VM&rsquo;s stack array and are
indexed from there. When we add <a href="calls-and-functions.html">functions</a>, that scheme gets a little more
complex. Each function needs its own region of the stack for its parameters and
local variables. But, as we&rsquo;ll see, that doesn&rsquo;t add as much complexity as you
might expect.</p>
</aside>
<p>There&rsquo;s a lot of state we need to track in the compiler to make this whole thing
go, so let&rsquo;s get started there. In jlox, we used a linked chain of &ldquo;environment&rdquo;
HashMaps to track which local variables were currently in scope. That&rsquo;s sort of
the classic, schoolbook way of representing lexical scope. For clox, as usual,
we&rsquo;re going a little closer to the metal. All of the state lives in a new
struct.</p>
<div class="codehilite"><pre class="insert-before">} ParseRule;
</pre><div class="source-file"><em>compiler.c</em><br>
add after struct <em>ParseRule</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Local</span> <span class="i">locals</span>[<span class="a">UINT8_COUNT</span>];
  <span class="t">int</span> <span class="i">localCount</span>;
  <span class="t">int</span> <span class="i">scopeDepth</span>;
} <span class="t">Compiler</span>;
</pre><pre class="insert-after">

Parser parser;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after struct <em>ParseRule</em></div>

<p>We have a simple, flat array of all locals that are in scope during each point in
the compilation process. They are <span name="order">ordered</span> in the array
in the order that their declarations appear in the code. Since the instruction
operand we&rsquo;ll use to encode a local is a single byte, our VM has a hard limit on
the number of locals that can be in scope at once. That means we can also give
the locals array a fixed size.</p>
<aside name="order">
<p>We&rsquo;re writing a single-pass compiler, so it&rsquo;s not like we have <em>too</em> many other
options for how to order them in the array.</p>
</aside>
<div class="codehilite"><pre class="insert-before">#define DEBUG_TRACE_EXECUTION
</pre><div class="source-file"><em>common.h</em></div>
<pre class="insert">

<span class="a">#define UINT8_COUNT (UINT8_MAX + 1)</span>
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>common.h</em></div>

<p>Back in the Compiler struct, the <code>localCount</code> field tracks how many locals are
in scope<span class="em">&mdash;</span>how many of those array slots are in use. We also track the &ldquo;scope
depth&rdquo;. This is the number of blocks surrounding the current bit of code we&rsquo;re
compiling.</p>
<p>Our Java interpreter used a chain of maps to keep each block&rsquo;s variables
separate from other blocks&rsquo;. This time, we&rsquo;ll simply number variables with the
level of nesting where they appear. Zero is the global scope, one is the first
top-level block, two is inside that, you get the idea. We use this to track
which block each local belongs to so that we know which locals to discard when a
block ends.</p>
<p>Each local in the array is one of these:</p>
<div class="codehilite"><pre class="insert-before">} ParseRule;
</pre><div class="source-file"><em>compiler.c</em><br>
add after struct <em>ParseRule</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Token</span> <span class="i">name</span>;
  <span class="t">int</span> <span class="i">depth</span>;
} <span class="t">Local</span>;
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after struct <em>ParseRule</em></div>

<p>We store the name of the variable. When we&rsquo;re resolving an identifier, we
compare the identifier&rsquo;s lexeme with each local&rsquo;s name to find a match. It&rsquo;s
pretty hard to resolve a variable if you don&rsquo;t know its name. The <code>depth</code> field
records the scope depth of the block where the local variable was declared.
That&rsquo;s all the state we need for now.</p>
<p>This is a very different representation from what we had in jlox, but it still
lets us answer all of the same questions our compiler needs to ask of the
lexical environment. The next step is figuring out how the compiler <em>gets</em> at
this state. If we were <span name="thread">principled</span> engineers, we&rsquo;d
give each function in the front end a parameter that accepts a pointer to a
Compiler. We&rsquo;d create a Compiler at the beginning and carefully thread it
through each function call<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>but that would mean a lot of boring changes to
the code we already wrote, so here&rsquo;s a global variable instead:</p>
<aside name="thread">
<p>In particular, if we ever want to use our compiler in a multi-threaded
application, possibly with multiple compilers running in parallel, then using a
global variable is a <em>bad</em> idea.</p>
</aside>
<div class="codehilite"><pre class="insert-before">Parser parser;
</pre><div class="source-file"><em>compiler.c</em><br>
add after variable <em>parser</em></div>
<pre class="insert"><span class="t">Compiler</span>* <span class="i">current</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">Chunk* compilingChunk;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after variable <em>parser</em></div>

<p>Here&rsquo;s a little function to initialize the compiler:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>emitConstant</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">initCompiler</span>(<span class="t">Compiler</span>* <span class="i">compiler</span>) {
  <span class="i">compiler</span>-&gt;<span class="i">localCount</span> = <span class="n">0</span>;
  <span class="i">compiler</span>-&gt;<span class="i">scopeDepth</span> = <span class="n">0</span>;
  <span class="i">current</span> = <span class="i">compiler</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>emitConstant</em>()</div>

<p>When we first start up the VM, we call it to get everything into a clean state.</p>
<div class="codehilite"><pre class="insert-before">  initScanner(source);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()</div>
<pre class="insert">  <span class="t">Compiler</span> <span class="i">compiler</span>;
  <span class="i">initCompiler</span>(&amp;<span class="i">compiler</span>);
</pre><pre class="insert-after">  compilingChunk = chunk;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>()</div>

<p>Our compiler has the data it needs, but not the operations on that data. There&rsquo;s
no way to create and destroy scopes, or add and resolve variables. We&rsquo;ll add
those as we need them. First, let&rsquo;s start building some language features.</p>
<h2><a href="#block-statements" id="block-statements"><small>22&#8202;.&#8202;2</small>Block Statements</a></h2>
<p>Before we can have any local variables, we need some local scopes. These come
from two things: function bodies and <span name="block">blocks</span>. Functions
are a big chunk of work that we&rsquo;ll tackle in <a href="calls-and-functions.html">a later chapter</a>, so
for now we&rsquo;re only going to do blocks. As usual, we start with the syntax. The
new grammar we&rsquo;ll introduce is:</p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">block</span> ;

<span class="i">block</span>          → <span class="s">&quot;{&quot;</span> <span class="i">declaration</span>* <span class="s">&quot;}&quot;</span> ;
</pre></div>
<aside name="block">
<p>When you think about it, &ldquo;block&rdquo; is a weird name. Used metaphorically, &ldquo;block&rdquo;
usually means a small indivisible unit, but for some reason, the Algol 60
committee decided to use it to refer to a <em>compound</em> structure<span class="em">&mdash;</span>a series of
statements. It could be worse, I suppose. Algol 58 called <code>begin</code> and <code>end</code>
&ldquo;statement parentheses&rdquo;.</p><img src="image/local-variables/block.png" alt="A cinder block." class="above" />
</aside>
<p>Blocks are a kind of statement, so the rule for them goes in the <code>statement</code>
production. The corresponding code to compile one looks like this:</p>
<div class="codehilite"><pre class="insert-before">  if (match(TOKEN_PRINT)) {
    printStatement();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>statement</em>()</div>
<pre class="insert">  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_LEFT_BRACE</span>)) {
    <span class="i">beginScope</span>();
    <span class="i">block</span>();
    <span class="i">endScope</span>();
</pre><pre class="insert-after">  } else {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>statement</em>()</div>

<p>After <span name="helper">parsing</span> the initial curly brace, we use this
helper function to compile the rest of the block:</p>
<aside name="helper">
<p>This function will come in handy later for compiling function bodies.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>expression</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">block</span>() {
  <span class="k">while</span> (!<span class="i">check</span>(<span class="a">TOKEN_RIGHT_BRACE</span>) &amp;&amp; !<span class="i">check</span>(<span class="a">TOKEN_EOF</span>)) {
    <span class="i">declaration</span>();
  }

  <span class="i">consume</span>(<span class="a">TOKEN_RIGHT_BRACE</span>, <span class="s">&quot;Expect &#39;}&#39; after block.&quot;</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>expression</em>()</div>

<p>It keeps parsing declarations and statements until it hits the closing brace. As
we do with any loop in the parser, we also check for the end of the token
stream. This way, if there&rsquo;s a malformed program with a missing closing curly,
the compiler doesn&rsquo;t get stuck in a loop.</p>
<p>Executing a block simply means executing the statements it contains, one after
the other, so there isn&rsquo;t much to compiling them. The semantically interesting
thing blocks do is create scopes. Before we compile the body of a block, we call
this function to enter a new local scope:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>endCompiler</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">beginScope</span>() {
  <span class="i">current</span>-&gt;<span class="i">scopeDepth</span>++;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>endCompiler</em>()</div>

<p>In order to &ldquo;create&rdquo; a scope, all we do is increment the current depth. This is
certainly much faster than jlox, which allocated an entire new HashMap for
each one. Given <code>beginScope()</code>, you can probably guess what <code>endScope()</code> does.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>beginScope</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">endScope</span>() {
  <span class="i">current</span>-&gt;<span class="i">scopeDepth</span>--;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>beginScope</em>()</div>

<p>That&rsquo;s it for blocks and scopes<span class="em">&mdash;</span>more or less<span class="em">&mdash;</span>so we&rsquo;re ready to stuff some
variables into them.</p>
<h2><a href="#declaring-local-variables" id="declaring-local-variables"><small>22&#8202;.&#8202;3</small>Declaring Local Variables</a></h2>
<p>Usually we start with parsing here, but our compiler already supports parsing
and compiling variable declarations. We&rsquo;ve got <code>var</code> statements, identifier
expressions and assignment in there now. It&rsquo;s just that the compiler assumes
all variables are global. So we don&rsquo;t need any new parsing support, we just need
to hook up the new scoping semantics to the existing code.</p><img src="image/local-variables/declaration.png" alt="The code flow within varDeclaration()." />
<p>Variable declaration parsing begins in <code>varDeclaration()</code> and relies on a couple
of other functions. First, <code>parseVariable()</code> consumes the identifier token for
the variable name, adds its lexeme to the chunk&rsquo;s constant table as a string,
and then returns the constant table index where it was added. Then, after
<code>varDeclaration()</code> compiles the initializer, it calls <code>defineVariable()</code> to emit
the bytecode for storing the variable&rsquo;s value in the global variable hash table.</p>
<p>Both of those helpers need a few changes to support local variables. In
<code>parseVariable()</code>, we add:</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_IDENTIFIER, errorMessage);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>parseVariable</em>()</div>
<pre class="insert">

  <span class="i">declareVariable</span>();
  <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">scopeDepth</span> &gt; <span class="n">0</span>) <span class="k">return</span> <span class="n">0</span>;

</pre><pre class="insert-after">  return identifierConstant(&amp;parser.previous);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>parseVariable</em>()</div>

<p>First, we &ldquo;declare&rdquo; the variable. I&rsquo;ll get to what that means in a second. After
that, we exit the function if we&rsquo;re in a local scope. At runtime, locals aren&rsquo;t
looked up by name. There&rsquo;s no need to stuff the variable&rsquo;s name into the
constant table, so if the declaration is inside a local scope, we return a dummy
table index instead.</p>
<p>Over in <code>defineVariable()</code>, we need to emit the code to store a local variable
if we&rsquo;re in a local scope. It looks like this:</p>
<div class="codehilite"><pre class="insert-before">static void defineVariable(uint8_t global) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>defineVariable</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">scopeDepth</span> &gt; <span class="n">0</span>) {
    <span class="k">return</span>;
  }

</pre><pre class="insert-after">  emitBytes(OP_DEFINE_GLOBAL, global);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>defineVariable</em>()</div>

<p>Wait, what? Yup. That&rsquo;s it. There is no code to create a local variable at
runtime. Think about what state the VM is in. It has already executed the code
for the variable&rsquo;s initializer (or the implicit <code>nil</code> if the user omitted an
initializer), and that value is sitting right on top of the stack as the only
remaining temporary. We also know that new locals are allocated at the top of
the stack<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>right where that value already is. Thus, there&rsquo;s nothing to do. The
temporary simply <em>becomes</em> the local variable. It doesn&rsquo;t get much more
efficient than that.</p>
<p><span name="locals"></span></p><img src="image/local-variables/local-slots.png" alt="Walking through the bytecode execution showing that each initializer's result ends up in the local's slot." />
<aside name="locals">
<p>The code on the left compiles to the sequence of instructions on the right.</p>
</aside>
<p>OK, so what&rsquo;s &ldquo;declaring&rdquo; about? Here&rsquo;s what that does:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>identifierConstant</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">declareVariable</span>() {
  <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">scopeDepth</span> == <span class="n">0</span>) <span class="k">return</span>;

  <span class="t">Token</span>* <span class="i">name</span> = &amp;<span class="i">parser</span>.<span class="i">previous</span>;
  <span class="i">addLocal</span>(*<span class="i">name</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>identifierConstant</em>()</div>

<p>This is the point where the compiler records the existence of the variable. We
only do this for locals, so if we&rsquo;re in the top-level global scope, we just bail
out. Because global variables are late bound, the compiler doesn&rsquo;t keep track of
which declarations for them it has seen.</p>
<p>But for local variables, the compiler does need to remember that the variable
exists. That&rsquo;s what declaring it does<span class="em">&mdash;</span>it adds it to the compiler&rsquo;s list of
variables in the current scope. We implement that using another new function.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>identifierConstant</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">addLocal</span>(<span class="t">Token</span> <span class="i">name</span>) {
  <span class="t">Local</span>* <span class="i">local</span> = &amp;<span class="i">current</span>-&gt;<span class="i">locals</span>[<span class="i">current</span>-&gt;<span class="i">localCount</span>++];
  <span class="i">local</span>-&gt;<span class="i">name</span> = <span class="i">name</span>;
  <span class="i">local</span>-&gt;<span class="i">depth</span> = <span class="i">current</span>-&gt;<span class="i">scopeDepth</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>identifierConstant</em>()</div>

<p>This initializes the next available Local in the compiler&rsquo;s array of variables.
It stores the variable&rsquo;s <span name="lexeme">name</span> and the depth of the
scope that owns the variable.</p>
<aside name="lexeme">
<p>Worried about the lifetime of the string for the variable&rsquo;s name? The Local
directly stores a copy of the Token struct for the identifier. Tokens store a
pointer to the first character of their lexeme and the lexeme&rsquo;s length. That
pointer points into the original source string for the script or REPL entry
being compiled.</p>
<p>As long as that string stays around during the entire compilation process<span class="em">&mdash;</span>which it must since, you know, we&rsquo;re compiling it<span class="em">&mdash;</span>then all of the tokens
pointing into it are fine.</p>
</aside>
<p>Our implementation is fine for a correct Lox program, but what about invalid
code? Let&rsquo;s aim to be robust. The first error to handle is not really the user&rsquo;s
fault, but more a limitation of the VM. The instructions to work with local
variables refer to them by slot index. That index is stored in a single-byte
operand, which means the VM only supports up to 256 local variables in scope at
one time.</p>
<p>If we try to go over that, not only could we not refer to them at runtime, but
the compiler would overwrite its own locals array, too. Let&rsquo;s prevent that.</p>
<div class="codehilite"><pre class="insert-before">static void addLocal(Token name) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>addLocal</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">localCount</span> == <span class="a">UINT8_COUNT</span>) {
    <span class="i">error</span>(<span class="s">&quot;Too many local variables in function.&quot;</span>);
    <span class="k">return</span>;
  }

</pre><pre class="insert-after">  Local* local = &amp;current-&gt;locals[current-&gt;localCount++];
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>addLocal</em>()</div>

<p>The next case is trickier. Consider:</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;first&quot;</span>;
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;second&quot;</span>;
}
</pre></div>
<p>At the top level, Lox allows redeclaring a variable with the same name as a
previous declaration because that&rsquo;s useful for the REPL. But inside a local
scope, that&rsquo;s a pretty <span name="rust">weird</span> thing to do. It&rsquo;s likely
to be a mistake, and many languages, including our own Lox, enshrine that
assumption by making this an error.</p>
<aside name="rust">
<p>Interestingly, the Rust programming language <em>does</em> allow this, and idiomatic
code relies on it.</p>
</aside>
<p>Note that the above program is different from this one:</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer&quot;</span>;
  {
    <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;inner&quot;</span>;
  }
}
</pre></div>
<p>It&rsquo;s OK to have two variables with the same name in <em>different</em> scopes, even
when the scopes overlap such that both are visible at the same time. That&rsquo;s
shadowing, and Lox does allow that. It&rsquo;s only an error to have two variables
with the same name in the <em>same</em> local scope.</p>
<p>We detect that error like so:</p>
<div class="codehilite"><pre class="insert-before">  Token* name = &amp;parser.previous;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>declareVariable</em>()</div>
<pre class="insert">  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="i">current</span>-&gt;<span class="i">localCount</span> - <span class="n">1</span>; <span class="i">i</span> &gt;= <span class="n">0</span>; <span class="i">i</span>--) {
    <span class="t">Local</span>* <span class="i">local</span> = &amp;<span class="i">current</span>-&gt;<span class="i">locals</span>[<span class="i">i</span>];
    <span class="k">if</span> (<span class="i">local</span>-&gt;<span class="i">depth</span> != -<span class="n">1</span> &amp;&amp; <span class="i">local</span>-&gt;<span class="i">depth</span> &lt; <span class="i">current</span>-&gt;<span class="i">scopeDepth</span>) {
      <span class="k">break</span>;<span name="negative"> </span>
    }

    <span class="k">if</span> (<span class="i">identifiersEqual</span>(<span class="i">name</span>, &amp;<span class="i">local</span>-&gt;<span class="i">name</span>)) {
      <span class="i">error</span>(<span class="s">&quot;Already a variable with this name in this scope.&quot;</span>);
    }
  }

</pre><pre class="insert-after">  addLocal(*name);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>declareVariable</em>()</div>

<aside name="negative">
<p>Don&rsquo;t worry about that odd <code>depth != -1</code> part yet. We&rsquo;ll get to what that&rsquo;s
about later.</p>
</aside>
<p>Local variables are appended to the array when they&rsquo;re declared, which means the
current scope is always at the end of the array. When we declare a new variable,
we start at the end and work backward, looking for an existing variable with the
same name. If we find one in the current scope, we report the error. Otherwise,
if we reach the beginning of the array or a variable owned by another scope,
then we know we&rsquo;ve checked all of the existing variables in the scope.</p>
<p>To see if two identifiers are the same, we use this:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>identifierConstant</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">identifiersEqual</span>(<span class="t">Token</span>* <span class="i">a</span>, <span class="t">Token</span>* <span class="i">b</span>) {
  <span class="k">if</span> (<span class="i">a</span>-&gt;<span class="i">length</span> != <span class="i">b</span>-&gt;<span class="i">length</span>) <span class="k">return</span> <span class="k">false</span>;
  <span class="k">return</span> <span class="i">memcmp</span>(<span class="i">a</span>-&gt;<span class="i">start</span>, <span class="i">b</span>-&gt;<span class="i">start</span>, <span class="i">a</span>-&gt;<span class="i">length</span>) == <span class="n">0</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>identifierConstant</em>()</div>

<p>Since we know the lengths of both lexemes, we check that first. That will fail
quickly for many non-equal strings. If the <span name="hash">lengths</span> are
the same, we check the characters using <code>memcmp()</code>. To get to <code>memcmp()</code>, we
need an include.</p>
<aside name="hash">
<p>It would be a nice little optimization if we could check their hashes, but
tokens aren&rsquo;t full LoxStrings, so we haven&rsquo;t calculated their hashes yet.</p>
</aside>
<div class="codehilite"><pre class="insert-before">#include &lt;stdlib.h&gt;
</pre><div class="source-file"><em>compiler.c</em></div>
<pre class="insert"><span class="a">#include &lt;string.h&gt;</span>
</pre><pre class="insert-after">

#include &quot;common.h&quot;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em></div>

<p>With this, we&rsquo;re able to bring variables into being. But, like ghosts, they
linger on beyond the scope where they are declared. When a block ends, we need
to put them to rest.</p>
<div class="codehilite"><pre class="insert-before">  current-&gt;scopeDepth--;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>endScope</em>()</div>
<pre class="insert">

  <span class="k">while</span> (<span class="i">current</span>-&gt;<span class="i">localCount</span> &gt; <span class="n">0</span> &amp;&amp;
         <span class="i">current</span>-&gt;<span class="i">locals</span>[<span class="i">current</span>-&gt;<span class="i">localCount</span> - <span class="n">1</span>].<span class="i">depth</span> &gt;
            <span class="i">current</span>-&gt;<span class="i">scopeDepth</span>) {
    <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
    <span class="i">current</span>-&gt;<span class="i">localCount</span>--;
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>endScope</em>()</div>

<p>When we pop a scope, we walk backward through the local array looking for any
variables declared at the scope depth we just left. We discard them by simply
decrementing the length of the array.</p>
<p>There is a runtime component to this too. Local variables occupy slots on the
stack. When a local variable goes out of scope, that slot is no longer needed
and should be freed. So, for each variable that we discard, we also emit an
<code>OP_POP</code> <span name="pop">instruction</span> to pop it from the stack.</p>
<aside name="pop">
<p>When multiple local variables go out of scope at once, you get a series of
<code>OP_POP</code> instructions that get interpreted one at a time. A simple optimization
you could add to your Lox implementation is a specialized <code>OP_POPN</code> instruction
that takes an operand for the number of slots to pop and pops them all at once.</p>
</aside>
<h2><a href="#using-locals" id="using-locals"><small>22&#8202;.&#8202;4</small>Using Locals</a></h2>
<p>We can now compile and execute local variable declarations. At runtime, their
values are sitting where they should be on the stack. Let&rsquo;s start using them.
We&rsquo;ll do both variable access and assignment at the same time since they touch
the same functions in the compiler.</p>
<p>We already have code for getting and setting global variables, and<span class="em">&mdash;</span>like good
little software engineers<span class="em">&mdash;</span>we want to reuse as much of that existing code as
we can. Something like this:</p>
<div class="codehilite"><pre class="insert-before">static void namedVariable(Token name, bool canAssign) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>namedVariable</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">uint8_t</span> <span class="i">getOp</span>, <span class="i">setOp</span>;
  <span class="t">int</span> <span class="i">arg</span> = <span class="i">resolveLocal</span>(<span class="i">current</span>, &amp;<span class="i">name</span>);
  <span class="k">if</span> (<span class="i">arg</span> != -<span class="n">1</span>) {
    <span class="i">getOp</span> = <span class="a">OP_GET_LOCAL</span>;
    <span class="i">setOp</span> = <span class="a">OP_SET_LOCAL</span>;
  } <span class="k">else</span> {
    <span class="i">arg</span> = <span class="i">identifierConstant</span>(&amp;<span class="i">name</span>);
    <span class="i">getOp</span> = <span class="a">OP_GET_GLOBAL</span>;
    <span class="i">setOp</span> = <span class="a">OP_SET_GLOBAL</span>;
  }
</pre><pre class="insert-after">

  if (canAssign &amp;&amp; match(TOKEN_EQUAL)) {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>namedVariable</em>(), replace 1 line</div>

<p>Instead of hardcoding the bytecode instructions emitted for variable access and
assignment, we use a couple of C variables. First, we try to find a local
variable with the given name. If we find one, we use the instructions for
working with locals. Otherwise, we assume it&rsquo;s a global variable and use the
existing bytecode instructions for globals.</p>
<p>A little further down, we use those variables to emit the right instructions.
For assignment:</p>
<div class="codehilite"><pre class="insert-before">  if (canAssign &amp;&amp; match(TOKEN_EQUAL)) {
    expression();
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>namedVariable</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">emitBytes</span>(<span class="i">setOp</span>, (<span class="t">uint8_t</span>)<span class="i">arg</span>);
</pre><pre class="insert-after">  } else {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>namedVariable</em>(), replace 1 line</div>

<p>And for access:</p>
<div class="codehilite"><pre class="insert-before">    emitBytes(setOp, (uint8_t)arg);
  } else {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>namedVariable</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">emitBytes</span>(<span class="i">getOp</span>, (<span class="t">uint8_t</span>)<span class="i">arg</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>namedVariable</em>(), replace 1 line</div>

<p>The real heart of this chapter, the part where we resolve a local variable, is
here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>identifiersEqual</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">resolveLocal</span>(<span class="t">Compiler</span>* <span class="i">compiler</span>, <span class="t">Token</span>* <span class="i">name</span>) {
  <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="i">compiler</span>-&gt;<span class="i">localCount</span> - <span class="n">1</span>; <span class="i">i</span> &gt;= <span class="n">0</span>; <span class="i">i</span>--) {
    <span class="t">Local</span>* <span class="i">local</span> = &amp;<span class="i">compiler</span>-&gt;<span class="i">locals</span>[<span class="i">i</span>];
    <span class="k">if</span> (<span class="i">identifiersEqual</span>(<span class="i">name</span>, &amp;<span class="i">local</span>-&gt;<span class="i">name</span>)) {
      <span class="k">return</span> <span class="i">i</span>;
    }
  }

  <span class="k">return</span> -<span class="n">1</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>identifiersEqual</em>()</div>

<p>For all that, it&rsquo;s straightforward. We walk the list of locals that are
currently in scope. If one has the same name as the identifier token, the
identifier must refer to that variable. We&rsquo;ve found it! We walk the array
backward so that we find the <em>last</em> declared variable with the identifier. That
ensures that inner local variables correctly shadow locals with the same name in
surrounding scopes.</p>
<p>At runtime, we load and store locals using the stack slot index, so that&rsquo;s what
the compiler needs to calculate after it resolves the variable. Whenever a
variable is declared, we append it to the locals array in Compiler. That means
the first local variable is at index zero, the next one is at index one, and so
on. In other words, the locals array in the compiler has the <em>exact</em> same layout
as the VM&rsquo;s stack will have at runtime. The variable&rsquo;s index in the locals array
is the same as its stack slot. How convenient!</p>
<p>If we make it through the whole array without finding a variable with the given
name, it must not be a local. In that case, we return <code>-1</code> to signal that it
wasn&rsquo;t found and should be assumed to be a global variable instead.</p>
<h3><a href="#interpreting-local-variables" id="interpreting-local-variables"><small>22&#8202;.&#8202;4&#8202;.&#8202;1</small>Interpreting local variables</a></h3>
<p>Our compiler is emitting two new instructions, so let&rsquo;s get them working. First
is loading a local variable:</p>
<div class="codehilite"><pre class="insert-before">  OP_POP,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_GET_LOCAL</span>,
</pre><pre class="insert-after">  OP_GET_GLOBAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>And its implementation:</p>
<div class="codehilite"><pre class="insert-before">      case OP_POP: pop(); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_GET_LOCAL</span>: {
        <span class="t">uint8_t</span> <span class="i">slot</span> = <span class="a">READ_BYTE</span>();
        <span class="i">push</span>(<span class="i">vm</span>.<span class="i">stack</span>[<span class="i">slot</span>]);<span name="slot"> </span>
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_GET_GLOBAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>It takes a single-byte operand for the stack slot where the local lives. It
loads the value from that index and then pushes it on top of the stack where
later instructions can find it.</p>
<aside name="slot">
<p>It seems redundant to push the local&rsquo;s value onto the stack since it&rsquo;s already
on the stack lower down somewhere. The problem is that the other bytecode
instructions only look for data at the <em>top</em> of the stack. This is the core
aspect that makes our bytecode instruction set <em>stack</em>-based.
<a href="a-virtual-machine.html#design-note">Register-based</a> bytecode instruction sets avoid this stack juggling at the
cost of having larger instructions with more operands.</p>
</aside>
<p>Next is assignment:</p>
<div class="codehilite"><pre class="insert-before">  OP_GET_LOCAL,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_SET_LOCAL</span>,
</pre><pre class="insert-after">  OP_GET_GLOBAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>You can probably predict the implementation.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_SET_LOCAL</span>: {
        <span class="t">uint8_t</span> <span class="i">slot</span> = <span class="a">READ_BYTE</span>();
        <span class="i">vm</span>.<span class="i">stack</span>[<span class="i">slot</span>] = <span class="i">peek</span>(<span class="n">0</span>);
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_GET_GLOBAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>It takes the assigned value from the top of the stack and stores it in the stack
slot corresponding to the local variable. Note that it doesn&rsquo;t pop the value
from the stack. Remember, assignment is an expression, and every expression
produces a value. The value of an assignment expression is the assigned value
itself, so the VM just leaves the value on the stack.</p>
<p>Our disassembler is incomplete without support for these two new instructions.</p>
<div class="codehilite"><pre class="insert-before">      return simpleInstruction(&quot;OP_POP&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_GET_LOCAL</span>:
      <span class="k">return</span> <span class="i">byteInstruction</span>(<span class="s">&quot;OP_GET_LOCAL&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_SET_LOCAL</span>:
      <span class="k">return</span> <span class="i">byteInstruction</span>(<span class="s">&quot;OP_SET_LOCAL&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_GET_GLOBAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>The compiler compiles local variables to direct slot access. The local
variable&rsquo;s name never leaves the compiler to make it into the chunk at all.
That&rsquo;s great for performance, but not so great for introspection. When we
disassemble these instructions, we can&rsquo;t show the variable&rsquo;s name like we could
with globals. Instead, we just show the slot number.</p>
<aside name="debug">
<p>Erasing local variable names in the compiler is a real issue if we ever want to
implement a debugger for our VM. When users step through code, they expect to
see the values of local variables organized by their names. To support that,
we&rsquo;d need to output some additional information that tracks the name of each
local variable at each stack slot.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>debug.c</em><br>
add after <em>simpleInstruction</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">byteInstruction</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>, <span class="t">Chunk</span>* <span class="i">chunk</span>,
                           <span class="t">int</span> <span class="i">offset</span>) {
  <span class="t">uint8_t</span> <span class="i">slot</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span> + <span class="n">1</span>];
  <span class="i">printf</span>(<span class="s">&quot;%-16s %4d</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">name</span>, <span class="i">slot</span>);
  <span class="k">return</span> <span class="i">offset</span> + <span class="n">2</span>;<span name="debug"> </span>
}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, add after <em>simpleInstruction</em>()</div>

<h3><a href="#another-scope-edge-case" id="another-scope-edge-case"><small>22&#8202;.&#8202;4&#8202;.&#8202;2</small>Another scope edge case</a></h3>
<p>We already sunk some time into handling a couple of weird edge cases around
scopes. We made sure shadowing works correctly. We report an error if two
variables in the same local scope have the same name. For reasons that aren&rsquo;t
entirely clear to me, variable scoping seems to have a lot of these wrinkles.
I&rsquo;ve never seen a language where it feels completely <span
name="elegant">elegant</span>.</p>
<aside name="elegant">
<p>No, not even Scheme.</p>
</aside>
<p>We&rsquo;ve got one more edge case to deal with before we end this chapter. Recall this strange beastie we first met in <a href="resolving-and-binding.html#resolving-variable-declarations">jlox&rsquo;s implementation of variable resolution</a>:</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer&quot;</span>;
  {
    <span class="k">var</span> <span class="i">a</span> = <span class="i">a</span>;
  }
}
</pre></div>
<p>We slayed it then by splitting a variable&rsquo;s declaration into two phases, and
we&rsquo;ll do that again here:</p><img src="image/local-variables/phases.png" alt="An example variable declaration marked 'declared uninitialized' before the variable name and 'ready for use' after the initializer." />
<p>As soon as the variable declaration begins<span class="em">&mdash;</span>in other words, before its
initializer<span class="em">&mdash;</span>the name is declared in the current scope. The variable exists,
but in a special &ldquo;uninitialized&rdquo; state. Then we compile the initializer. If at
any point in that expression we resolve an identifier that points back to this
variable, we&rsquo;ll see that it is not initialized yet and report an error. After we
finish compiling the initializer, we mark the variable as initialized and ready
for use.</p>
<p>To implement this, when we declare a local, we need to indicate the
&ldquo;uninitialized&rdquo; state somehow. We could add a new field to Local, but let&rsquo;s be a
little more parsimonious with memory. Instead, we&rsquo;ll set the variable&rsquo;s scope
depth to a special sentinel value, <code>-1</code>.</p>
<div class="codehilite"><pre class="insert-before">  local-&gt;name = name;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>addLocal</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="i">local</span>-&gt;<span class="i">depth</span> = -<span class="n">1</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>addLocal</em>(), replace 1 line</div>

<p>Later, once the variable&rsquo;s initializer has been compiled, we mark it
initialized.</p>
<div class="codehilite"><pre class="insert-before">  if (current-&gt;scopeDepth &gt; 0) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>defineVariable</em>()</div>
<pre class="insert">    <span class="i">markInitialized</span>();
</pre><pre class="insert-after">    return;
  }
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>defineVariable</em>()</div>

<p>That is implemented like so:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>parseVariable</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">markInitialized</span>() {
  <span class="i">current</span>-&gt;<span class="i">locals</span>[<span class="i">current</span>-&gt;<span class="i">localCount</span> - <span class="n">1</span>].<span class="i">depth</span> =
      <span class="i">current</span>-&gt;<span class="i">scopeDepth</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>parseVariable</em>()</div>

<p>So this is <em>really</em> what &ldquo;declaring&rdquo; and &ldquo;defining&rdquo; a variable means in the
compiler. &ldquo;Declaring&rdquo; is when the variable is added to the scope, and &ldquo;defining&rdquo;
is when it becomes available for use.</p>
<p>When we resolve a reference to a local variable, we check the scope depth to see
if it&rsquo;s fully defined.</p>
<div class="codehilite"><pre class="insert-before">    if (identifiersEqual(name, &amp;local-&gt;name)) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>resolveLocal</em>()</div>
<pre class="insert">      <span class="k">if</span> (<span class="i">local</span>-&gt;<span class="i">depth</span> == -<span class="n">1</span>) {
        <span class="i">error</span>(<span class="s">&quot;Can&#39;t read local variable in its own initializer.&quot;</span>);
      }
</pre><pre class="insert-after">      return i;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>resolveLocal</em>()</div>

<p>If the variable has the sentinel depth, it must be a reference to a variable in
its own initializer, and we report that as an error.</p>
<p>That&rsquo;s it for this chapter! We added blocks, local variables, and real,
honest-to-God lexical scoping. Given that we introduced an entirely different
runtime representation for variables, we didn&rsquo;t have to write a lot of code. The
implementation ended up being pretty clean and efficient.</p>
<p>You&rsquo;ll notice that almost all of the code we wrote is in the compiler. Over in
the runtime, it&rsquo;s just two little instructions. You&rsquo;ll see this as a continuing
<span name="static">trend</span> in clox compared to jlox. One of the biggest
hammers in the optimizer&rsquo;s toolbox is pulling work forward into the compiler so
that you don&rsquo;t have to do it at runtime. In this chapter, that meant resolving
exactly which stack slot every local variable occupies. That way, at runtime, no
lookup or resolution needs to happen.</p>
<aside name="static">
<p>You can look at static types as an extreme example of this trend. A statically
typed language takes all of the type analysis and type error handling and sorts
it all out during compilation. Then the runtime doesn&rsquo;t have to waste any time
checking that values have the proper type for their operation. In fact, in some
statically typed languages like C, you don&rsquo;t even <em>know</em> the type at runtime.
The compiler completely erases any representation of a value&rsquo;s type leaving just
the bare bits.</p>
</aside>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Our simple local array makes it easy to calculate the stack slot of each
local variable. But it means that when the compiler resolves a reference to
a variable, we have to do a linear scan through the array.</p>
<p>Come up with something more efficient. Do you think the additional
complexity is worth it?</p>
</li>
<li>
<p>How do other languages handle code like this:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="i">a</span>;
</pre></div>
<p>What would you do if it was your language? Why?</p>
</li>
<li>
<p>Many languages make a distinction between variables that can be reassigned
and those that can&rsquo;t. In Java, the <code>final</code> modifier prevents you from
assigning to a variable. In JavaScript, a variable declared with <code>let</code> can
be assigned, but one declared using <code>const</code> can&rsquo;t. Swift treats <code>let</code> as
single-assignment and uses <code>var</code> for assignable variables. Scala and Kotlin
use <code>val</code> and <code>var</code>.</p>
<p>Pick a keyword for a single-assignment variable form to add to Lox. Justify
your choice, then implement it. An attempt to assign to a variable declared
using your new keyword should cause a compile error.</p>
</li>
<li>
<p>Extend clox to allow more than 256 local variables to be in scope at a time.</p>
</li>
</ol>
</div>

<footer>
<a href="jumping-back-and-forth.html" class="next">
  Next Chapter: &ldquo;Jumping Back and Forth&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/methods-and-initializers.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Methods and Initializers &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Methods and Initializers<small>28</small></a></h3>

<ul>
    <li><a href="#method-declarations"><small>28.1</small> Method Declarations</a></li>
    <li><a href="#method-references"><small>28.2</small> Method References</a></li>
    <li><a href="#this"><small>28.3</small> This</a></li>
    <li><a href="#instance-initializers"><small>28.4</small> Instance Initializers</a></li>
    <li><a href="#optimized-invocations"><small>28.5</small> Optimized Invocations</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Novelty Budget</a></li>
</ul>


<div class="prev-next">
    <a href="classes-and-instances.html" title="Classes and Instances" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="superclasses.html" title="Superclasses" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="classes-and-instances.html" title="Classes and Instances" class="prev">←</a>
<a href="superclasses.html" title="Superclasses" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Methods and Initializers<small>28</small></a></h3>

<ul>
    <li><a href="#method-declarations"><small>28.1</small> Method Declarations</a></li>
    <li><a href="#method-references"><small>28.2</small> Method References</a></li>
    <li><a href="#this"><small>28.3</small> This</a></li>
    <li><a href="#instance-initializers"><small>28.4</small> Instance Initializers</a></li>
    <li><a href="#optimized-invocations"><small>28.5</small> Optimized Invocations</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Novelty Budget</a></li>
</ul>


<div class="prev-next">
    <a href="classes-and-instances.html" title="Classes and Instances" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="superclasses.html" title="Superclasses" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">28</div>
  <h1>Methods and Initializers</h1>

<blockquote>
<p>When you are on the dancefloor, there is nothing to do but dance.</p>
<p><cite>Umberto Eco, <em>The Mysterious Flame of Queen Loana</em></cite></p>
</blockquote>
<p>It is time for our virtual machine to bring its nascent objects to life with
behavior. That means methods and method calls. And, since they are a special
kind of method, initializers too.</p>
<p>All of this is familiar territory from our previous jlox interpreter. What&rsquo;s new
in this second trip is an important optimization we&rsquo;ll implement to make method
calls over seven times faster than our baseline performance. But before we get
to that fun, we gotta get the basic stuff working.</p>
<h2><a href="#method-declarations" id="method-declarations"><small>28&#8202;.&#8202;1</small>Method Declarations</a></h2>
<p>We can&rsquo;t optimize method calls before we have method calls, and we can&rsquo;t call
methods without having methods to call, so we&rsquo;ll start with declarations.</p>
<h3><a href="#representing-methods" id="representing-methods"><small>28&#8202;.&#8202;1&#8202;.&#8202;1</small>Representing methods</a></h3>
<p>We usually start in the compiler, but let&rsquo;s knock the object model out first
this time. The runtime representation for methods in clox is similar to that of
jlox. Each class stores a hash table of methods. Keys are method names, and each
value is an ObjClosure for the body of the method.</p>
<div class="codehilite"><pre class="insert-before">typedef struct {
  Obj obj;
  ObjString* name;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>ObjClass</em></div>
<pre class="insert">  <span class="t">Table</span> <span class="i">methods</span>;
</pre><pre class="insert-after">} ObjClass;
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>ObjClass</em></div>

<p>A brand new class begins with an empty method table.</p>
<div class="codehilite"><pre class="insert-before">  klass-&gt;name = name;<span name="klass"> </span>
</pre><div class="source-file"><em>object.c</em><br>
in <em>newClass</em>()</div>
<pre class="insert">  <span class="i">initTable</span>(&amp;<span class="i">klass</span>-&gt;<span class="i">methods</span>);
</pre><pre class="insert-after">  return klass;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>newClass</em>()</div>

<p>The ObjClass struct owns the memory for this table, so when the memory manager
deallocates a class, the table should be freed too.</p>
<div class="codehilite"><pre class="insert-before">    case OBJ_CLASS: {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">      <span class="t">ObjClass</span>* <span class="i">klass</span> = (<span class="t">ObjClass</span>*)<span class="i">object</span>;
      <span class="i">freeTable</span>(&amp;<span class="i">klass</span>-&gt;<span class="i">methods</span>);
</pre><pre class="insert-after">      FREE(ObjClass, object);
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>Speaking of memory managers, the GC needs to trace through classes into the
method table. If a class is still reachable (likely through some instance),
then all of its methods certainly need to stick around too.</p>
<div class="codehilite"><pre class="insert-before">      markObject((Obj*)klass-&gt;name);
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert">      <span class="i">markTable</span>(&amp;<span class="i">klass</span>-&gt;<span class="i">methods</span>);
</pre><pre class="insert-after">      break;
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>We use the existing <code>markTable()</code> function, which traces through the key string
and value in each table entry.</p>
<p>Storing a class&rsquo;s methods is pretty familiar coming from jlox. The different
part is how that table gets populated. Our previous interpreter had access to
the entire AST node for the class declaration and all of the methods it
contained. At runtime, the interpreter simply walked that list of declarations.</p>
<p>Now every piece of information the compiler wants to shunt over to the runtime
has to squeeze through the interface of a flat series of bytecode instructions.
How do we take a class declaration, which can contain an arbitrarily large set
of methods, and represent it as bytecode? Let&rsquo;s hop over to the compiler and
find out.</p>
<h3><a href="#compiling-method-declarations" id="compiling-method-declarations"><small>28&#8202;.&#8202;1&#8202;.&#8202;2</small>Compiling method declarations</a></h3>
<p>The last chapter left us with a compiler that parses classes but allows only an
empty body. Now we insert a little code to compile a series of method
declarations between the braces.</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_LEFT_BRACE, &quot;Expect '{' before class body.&quot;);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">  <span class="k">while</span> (!<span class="i">check</span>(<span class="a">TOKEN_RIGHT_BRACE</span>) &amp;&amp; !<span class="i">check</span>(<span class="a">TOKEN_EOF</span>)) {
    <span class="i">method</span>();
  }
</pre><pre class="insert-after">  consume(TOKEN_RIGHT_BRACE, &quot;Expect '}' after class body.&quot;);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>Lox doesn&rsquo;t have field declarations, so anything before the closing brace at the
end of the class body must be a method. We stop compiling methods when we hit
that final curly or if we reach the end of the file. The latter check ensures
our compiler doesn&rsquo;t get stuck in an infinite loop if the user accidentally
forgets the closing brace.</p>
<p>The tricky part with compiling a class declaration is that a class may declare
any number of methods. Somehow the runtime needs to look up and bind all of
them. That would be a lot to pack into a single <code>OP_CLASS</code> instruction. Instead,
the bytecode we generate for a class declaration will split the process into a
<span name="series"><em>series</em></span> of instructions. The compiler already emits
an <code>OP_CLASS</code> instruction that creates a new empty ObjClass object. Then it
emits instructions to store the class in a variable with its name.</p>
<aside name="series">
<p>We did something similar for closures. The <code>OP_CLOSURE</code> instruction needs to
know the type and index for each captured upvalue. We encoded that using a
series of pseudo-instructions following the main <code>OP_CLOSURE</code> instruction<span class="em">&mdash;</span>basically a variable number of operands. The VM processes all of those extra
bytes immediately when interpreting the <code>OP_CLOSURE</code> instruction.</p>
<p>Here our approach is a little different because from the VM&rsquo;s perspective, each
instruction to define a method is a separate stand-alone operation. Either
approach would work. A variable-sized pseudo-instruction is possibly marginally
faster, but class declarations are rarely in hot loops, so it doesn&rsquo;t matter
much.</p>
</aside>
<p>Now, for each method declaration, we emit a new <code>OP_METHOD</code> instruction that
adds a single method to that class. When all of the <code>OP_METHOD</code> instructions
have executed, we&rsquo;re left with a fully formed class. While the user sees a class
declaration as a single atomic operation, the VM implements it as a series of
mutations.</p>
<p>To define a new method, the VM needs three things:</p>
<ol>
<li>
<p>The name of the method.</p>
</li>
<li>
<p>The closure for the method body.</p>
</li>
<li>
<p>The class to bind the method to.</p>
</li>
</ol>
<p>We&rsquo;ll incrementally write the compiler code to see how those all get through to
the runtime, starting here:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>function</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">method</span>() {
  <span class="i">consume</span>(<span class="a">TOKEN_IDENTIFIER</span>, <span class="s">&quot;Expect method name.&quot;</span>);
  <span class="t">uint8_t</span> <span class="i">constant</span> = <span class="i">identifierConstant</span>(&amp;<span class="i">parser</span>.<span class="i">previous</span>);
  <span class="i">emitBytes</span>(<span class="a">OP_METHOD</span>, <span class="i">constant</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>function</em>()</div>

<p>Like <code>OP_GET_PROPERTY</code> and other instructions that need names at runtime, the
compiler adds the method name token&rsquo;s lexeme to the constant table, getting back
a table index. Then we emit an <code>OP_METHOD</code> instruction with that index as the
operand. That&rsquo;s the name. Next is the method body:</p>
<div class="codehilite"><pre class="insert-before">  uint8_t constant = identifierConstant(&amp;parser.previous);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>method</em>()</div>
<pre class="insert">

  <span class="t">FunctionType</span> <span class="i">type</span> = <span class="a">TYPE_FUNCTION</span>;
  <span class="i">function</span>(<span class="i">type</span>);
</pre><pre class="insert-after">  emitBytes(OP_METHOD, constant);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>method</em>()</div>

<p>We use the same <code>function()</code> helper that we wrote for compiling function
declarations. That utility function compiles the subsequent parameter list and
function body. Then it emits the code to create an ObjClosure and leave it on
top of the stack. At runtime, the VM will find the closure there.</p>
<p>Last is the class to bind the method to. Where can the VM find that?
Unfortunately, by the time we reach the <code>OP_METHOD</code> instruction, we don&rsquo;t know
where it is. It <span name="global">could</span> be on the stack, if the user
declared the class in a local scope. But a top-level class declaration ends up
with the ObjClass in the global variable table.</p>
<aside name="global">
<p>If Lox supported declaring classes only at the top level, the VM could assume
that any class could be found by looking it up directly from the global
variable table. Alas, because we support local classes, we need to handle that
case too.</p>
</aside>
<p>Fear not. The compiler does know the <em>name</em> of the class. We can capture it
right after we consume its token.</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_IDENTIFIER, &quot;Expect class name.&quot;);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">  <span class="t">Token</span> <span class="i">className</span> = <span class="i">parser</span>.<span class="i">previous</span>;
</pre><pre class="insert-after">  uint8_t nameConstant = identifierConstant(&amp;parser.previous);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>And we know that no other declaration with that name could possibly shadow the
class. So we do the easy fix. Before we start binding methods, we emit whatever
code is necessary to load the class back on top of the stack.</p>
<div class="codehilite"><pre class="insert-before">  defineVariable(nameConstant);

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">  <span class="i">namedVariable</span>(<span class="i">className</span>, <span class="k">false</span>);
</pre><pre class="insert-after">  consume(TOKEN_LEFT_BRACE, &quot;Expect '{' before class body.&quot;);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>Right before compiling the class body, we <span name="load">call</span>
<code>namedVariable()</code>. That helper function generates code to load a variable with
the given name onto the stack. Then we compile the methods.</p>
<aside name="load">
<p>The preceding call to <code>defineVariable()</code> pops the class, so it seems silly to
call <code>namedVariable()</code> to load it right back onto the stack. Why not simply
leave it on the stack in the first place? We could, but in the <a href="superclasses.html">next
chapter</a> we will insert code between these two calls to support
inheritance. At that point, it will be simpler if the class isn&rsquo;t sitting around
on the stack.</p>
</aside>
<p>This means that when we execute each <code>OP_METHOD</code> instruction, the stack has the
method&rsquo;s closure on top with the class right under it. Once we&rsquo;ve reached the
end of the methods, we no longer need the class and tell the VM to pop it off
the stack.</p>
<div class="codehilite"><pre class="insert-before">  consume(TOKEN_RIGHT_BRACE, &quot;Expect '}' after class body.&quot;);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">  <span class="i">emitByte</span>(<span class="a">OP_POP</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>Putting all of that together, here is an example class declaration to throw at
the compiler:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Brunch</span> {
  <span class="i">bacon</span>() {}
  <span class="i">eggs</span>() {}
}
</pre></div>
<p>Given that, here is what the compiler generates and how those instructions
affect the stack at runtime:</p><img src="image/methods-and-initializers/method-instructions.png" alt="The series of bytecode instructions for a class declaration with two methods." />
<p>All that remains for us is to implement the runtime for that new <code>OP_METHOD</code>
instruction.</p>
<h3><a href="#executing-method-declarations" id="executing-method-declarations"><small>28&#8202;.&#8202;1&#8202;.&#8202;3</small>Executing method declarations</a></h3>
<p>First we define the opcode.</p>
<div class="codehilite"><pre class="insert-before">  OP_CLASS,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_METHOD</span>
</pre><pre class="insert-after">} OpCode;
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>We disassemble it like other instructions that have string constant operands.</p>
<div class="codehilite"><pre class="insert-before">    case OP_CLASS:
      return constantInstruction(&quot;OP_CLASS&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_METHOD</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_METHOD&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    default:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>And over in the interpreter, we add a new case too.</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_METHOD</span>:
        <span class="i">defineMethod</span>(<span class="a">READ_STRING</span>());
        <span class="k">break</span>;
</pre><pre class="insert-after">    }
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>There, we read the method name from the constant table and pass it here:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>closeUpvalues</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">defineMethod</span>(<span class="t">ObjString</span>* <span class="i">name</span>) {
  <span class="t">Value</span> <span class="i">method</span> = <span class="i">peek</span>(<span class="n">0</span>);
  <span class="t">ObjClass</span>* <span class="i">klass</span> = <span class="a">AS_CLASS</span>(<span class="i">peek</span>(<span class="n">1</span>));
  <span class="i">tableSet</span>(&amp;<span class="i">klass</span>-&gt;<span class="i">methods</span>, <span class="i">name</span>, <span class="i">method</span>);
  <span class="i">pop</span>();
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>closeUpvalues</em>()</div>

<p>The method closure is on top of the stack, above the class it will be bound to.
We read those two stack slots and store the closure in the class&rsquo;s method table.
Then we pop the closure since we&rsquo;re done with it.</p>
<p>Note that we don&rsquo;t do any runtime type checking on the closure or class object.
That <code>AS_CLASS()</code> call is safe because the compiler itself generated the code
that causes the class to be in that stack slot. The VM <span
name="verify">trusts</span> its own compiler.</p>
<aside name="verify">
<p>The VM trusts that the instructions it executes are valid because the <em>only</em> way
to get code to the bytecode interpreter is by going through clox&rsquo;s own compiler.
Many bytecode VMs, like the JVM and CPython, support executing bytecode that has
been compiled separately. That leads to a different security story. Maliciously
crafted bytecode could crash the VM or worse.</p>
<p>To prevent that, the JVM does a bytecode verification pass before it executes
any loaded code. CPython says it&rsquo;s up to the user to ensure any bytecode they
run is safe.</p>
</aside>
<p>After the series of <code>OP_METHOD</code> instructions is done and the <code>OP_POP</code> has popped
the class, we will have a class with a nicely populated method table, ready to
start doing things. The next step is pulling those methods back out and using
them.</p>
<h2><a href="#method-references" id="method-references"><small>28&#8202;.&#8202;2</small>Method References</a></h2>
<p>Most of the time, methods are accessed and immediately called, leading to this
familiar syntax:</p>
<div class="codehilite"><pre><span class="i">instance</span>.<span class="i">method</span>(<span class="i">argument</span>);
</pre></div>
<p>But remember, in Lox and some other languages, those two steps are distinct and
can be separated.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">closure</span> = <span class="i">instance</span>.<span class="i">method</span>;
<span class="i">closure</span>(<span class="i">argument</span>);
</pre></div>
<p>Since users <em>can</em> separate the operations, we have to implement them separately.
The first step is using our existing dotted property syntax to access a method
defined on the instance&rsquo;s class. That should return some kind of object that the
user can then call like a function.</p>
<p>The obvious approach is to look up the method in the class&rsquo;s method table and
return the ObjClosure associated with that name. But we also need to remember
that when you access a method, <code>this</code> gets bound to the instance the method was
accessed from. Here&rsquo;s the example from <a href="classes.html#methods-on-classes">when we added methods to jlox</a>:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Person</span> {
  <span class="i">sayName</span>() {
    <span class="k">print</span> <span class="k">this</span>.<span class="i">name</span>;
  }
}

<span class="k">var</span> <span class="i">jane</span> = <span class="t">Person</span>();
<span class="i">jane</span>.<span class="i">name</span> = <span class="s">&quot;Jane&quot;</span>;

<span class="k">var</span> <span class="i">method</span> = <span class="i">jane</span>.<span class="i">sayName</span>;
<span class="i">method</span>(); <span class="c">// ?</span>
</pre></div>
<p>This should print &ldquo;Jane&rdquo;, so the object returned by <code>.sayName</code> somehow needs to
remember the instance it was accessed from when it later gets called. In jlox,
we implemented that &ldquo;memory&rdquo; using the interpreter&rsquo;s existing heap-allocated
Environment class, which handled all variable storage.</p>
<p>Our bytecode VM has a more complex architecture for storing state. <a href="local-variables.html#representing-local-variables">Local
variables and temporaries</a> are on the stack, <a href="global-variables.html#variable-declarations">globals</a> are in a hash
table, and variables in closures use <a href="closures.html#upvalues">upvalues</a>. That necessitates a somewhat
more complex solution for tracking a method&rsquo;s receiver in clox, and a new
runtime type.</p>
<h3><a href="#bound-methods" id="bound-methods"><small>28&#8202;.&#8202;2&#8202;.&#8202;1</small>Bound methods</a></h3>
<p>When the user executes a method access, we&rsquo;ll find the closure for that method
and wrap it in a new <span name="bound">&ldquo;bound method&rdquo;</span> object that tracks
the instance that the method was accessed from. This bound object can be called
later like a function. When invoked, the VM will do some shenanigans to wire up
<code>this</code> to point to the receiver inside the method&rsquo;s body.</p>
<aside name="bound">
<p>I took the name &ldquo;bound method&rdquo; from CPython. Python behaves similar to Lox here,
and I used its implementation for inspiration.</p>
</aside>
<p>Here&rsquo;s the new object type:</p>
<div class="codehilite"><pre class="insert-before">} ObjInstance;

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjInstance</em></div>
<pre class="insert"><span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">Value</span> <span class="i">receiver</span>;
  <span class="t">ObjClosure</span>* <span class="i">method</span>;
} <span class="t">ObjBoundMethod</span>;

</pre><pre class="insert-after">ObjClass* newClass(ObjString* name);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjInstance</em></div>

<p>It wraps the receiver and the method closure together. The receiver&rsquo;s type is
Value even though methods can be called only on ObjInstances. Since the VM
doesn&rsquo;t care what kind of receiver it has anyway, using Value means we don&rsquo;t
have to keep converting the pointer back to a Value when it gets passed to more
general functions.</p>
<p>The new struct implies the usual boilerplate you&rsquo;re used to by now. A new case
in the object type enum:</p>
<div class="codehilite"><pre class="insert-before">typedef enum {
</pre><div class="source-file"><em>object.h</em><br>
in enum <em>ObjType</em></div>
<pre class="insert">  <span class="a">OBJ_BOUND_METHOD</span>,
</pre><pre class="insert-after">  OBJ_CLASS,
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in enum <em>ObjType</em></div>

<p>A macro to check a value&rsquo;s type:</p>
<div class="codehilite"><pre class="insert-before">#define OBJ_TYPE(value)        (AS_OBJ(value)-&gt;type)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define IS_BOUND_METHOD(value) isObjType(value, OBJ_BOUND_METHOD)</span>
</pre><pre class="insert-after">#define IS_CLASS(value)        isObjType(value, OBJ_CLASS)
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>Another macro to cast the value to an ObjBoundMethod pointer:</p>
<div class="codehilite"><pre class="insert-before">#define IS_STRING(value)       isObjType(value, OBJ_STRING)

</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert"><span class="a">#define AS_BOUND_METHOD(value) ((ObjBoundMethod*)AS_OBJ(value))</span>
</pre><pre class="insert-after">#define AS_CLASS(value)        ((ObjClass*)AS_OBJ(value))
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>A function to create a new ObjBoundMethod:</p>
<div class="codehilite"><pre class="insert-before">} ObjBoundMethod;

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjBoundMethod</em></div>
<pre class="insert"><span class="t">ObjBoundMethod</span>* <span class="i">newBoundMethod</span>(<span class="t">Value</span> <span class="i">receiver</span>,
                               <span class="t">ObjClosure</span>* <span class="i">method</span>);
</pre><pre class="insert-after">ObjClass* newClass(ObjString* name);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjBoundMethod</em></div>

<p>And an implementation of that function here:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>allocateObject</em>()</div>
<pre><span class="t">ObjBoundMethod</span>* <span class="i">newBoundMethod</span>(<span class="t">Value</span> <span class="i">receiver</span>,
                               <span class="t">ObjClosure</span>* <span class="i">method</span>) {
  <span class="t">ObjBoundMethod</span>* <span class="i">bound</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjBoundMethod</span>,
                                       <span class="a">OBJ_BOUND_METHOD</span>);
  <span class="i">bound</span>-&gt;<span class="i">receiver</span> = <span class="i">receiver</span>;
  <span class="i">bound</span>-&gt;<span class="i">method</span> = <span class="i">method</span>;
  <span class="k">return</span> <span class="i">bound</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>allocateObject</em>()</div>

<p>The constructor-like function simply stores the given closure and receiver. When
the bound method is no longer needed, we free it.</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>freeObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_BOUND_METHOD</span>:
      <span class="a">FREE</span>(<span class="t">ObjBoundMethod</span>, <span class="i">object</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_CLASS: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>freeObject</em>()</div>

<p>The bound method has a couple of references, but it doesn&rsquo;t <em>own</em> them, so it
frees nothing but itself. However, those references do get traced by the garbage
collector.</p>
<div class="codehilite"><pre class="insert-before">  switch (object-&gt;type) {
</pre><div class="source-file"><em>memory.c</em><br>
in <em>blackenObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_BOUND_METHOD</span>: {
      <span class="t">ObjBoundMethod</span>* <span class="i">bound</span> = (<span class="t">ObjBoundMethod</span>*)<span class="i">object</span>;
      <span class="i">markValue</span>(<span class="i">bound</span>-&gt;<span class="i">receiver</span>);
      <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">bound</span>-&gt;<span class="i">method</span>);
      <span class="k">break</span>;
    }
</pre><pre class="insert-after">    case OBJ_CLASS: {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>blackenObject</em>()</div>

<p>This <span name="trace">ensures</span> that a handle to a method keeps the
receiver around in memory so that <code>this</code> can still find the object when you
invoke the handle later. We also trace the method closure.</p>
<aside name="trace">
<p>Tracing the method closure isn&rsquo;t really necessary. The receiver is an
ObjInstance, which has a pointer to its ObjClass, which has a table for all of
the methods. But it feels dubious to me in some vague way to have ObjBoundMethod
rely on that.</p>
</aside>
<p>The last operation all objects support is printing.</p>
<div class="codehilite"><pre class="insert-before">  switch (OBJ_TYPE(value)) {
</pre><div class="source-file"><em>object.c</em><br>
in <em>printObject</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OBJ_BOUND_METHOD</span>:
      <span class="i">printFunction</span>(<span class="a">AS_BOUND_METHOD</span>(<span class="i">value</span>)-&gt;<span class="i">method</span>-&gt;<span class="i">function</span>);
      <span class="k">break</span>;
</pre><pre class="insert-after">    case OBJ_CLASS:
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>printObject</em>()</div>

<p>A bound method prints exactly the same way as a function. From the user&rsquo;s
perspective, a bound method <em>is</em> a function. It&rsquo;s an object they can call. We
don&rsquo;t expose that the VM implements bound methods using a different object type.</p>
<aside name="party"><img src="image/methods-and-initializers/party-hat.png" alt="A party hat." />
</aside>
<p>Put on your <span name="party">party</span> hat because we just reached a little
milestone. ObjBoundMethod is the very last runtime type to add to clox. You&rsquo;ve
written your last <code>IS_</code> and <code>AS_</code> macros. We&rsquo;re only a few chapters from the end
of the book, and we&rsquo;re getting close to a complete VM.</p>
<h3><a href="#accessing-methods" id="accessing-methods"><small>28&#8202;.&#8202;2&#8202;.&#8202;2</small>Accessing methods</a></h3>
<p>Let&rsquo;s get our new object type doing something. Methods are accessed using the
same &ldquo;dot&rdquo; property syntax we implemented in the last chapter. The compiler
already parses the right expressions and emits <code>OP_GET_PROPERTY</code> instructions
for them. The only changes we need to make are in the runtime.</p>
<p>When a property access instruction executes, the instance is on top of the
stack. The instruction&rsquo;s job is to find a field or method with the given name
and replace the top of the stack with the accessed property.</p>
<p>The interpreter already handles fields, so we simply extend the
<code>OP_GET_PROPERTY</code> case with another section.</p>
<div class="codehilite"><pre class="insert-before">          pop(); // Instance.
          push(value);
          break;
        }

</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 2 lines</div>
<pre class="insert">        <span class="k">if</span> (!<span class="i">bindMethod</span>(<span class="i">instance</span>-&gt;<span class="i">klass</span>, <span class="i">name</span>)) {
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="k">break</span>;
</pre><pre class="insert-after">      }
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 2 lines</div>

<p>We insert this after the code to look up a field on the receiver instance.
Fields take priority over and shadow methods, so we look for a field first. If
the instance does not have a field with the given property name, then the name
may refer to a method.</p>
<p>We take the instance&rsquo;s class and pass it to a new <code>bindMethod()</code> helper. If that
function finds a method, it places the method on the stack and returns <code>true</code>.
Otherwise it returns <code>false</code> to indicate a method with that name couldn&rsquo;t be
found. Since the name also wasn&rsquo;t a field, that means we have a runtime error,
which aborts the interpreter.</p>
<p>Here is the good stuff:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>callValue</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">bindMethod</span>(<span class="t">ObjClass</span>* <span class="i">klass</span>, <span class="t">ObjString</span>* <span class="i">name</span>) {
  <span class="t">Value</span> <span class="i">method</span>;
  <span class="k">if</span> (!<span class="i">tableGet</span>(&amp;<span class="i">klass</span>-&gt;<span class="i">methods</span>, <span class="i">name</span>, &amp;<span class="i">method</span>)) {
    <span class="i">runtimeError</span>(<span class="s">&quot;Undefined property &#39;%s&#39;.&quot;</span>, <span class="i">name</span>-&gt;<span class="i">chars</span>);
    <span class="k">return</span> <span class="k">false</span>;
  }

  <span class="t">ObjBoundMethod</span>* <span class="i">bound</span> = <span class="i">newBoundMethod</span>(<span class="i">peek</span>(<span class="n">0</span>),
                                         <span class="a">AS_CLOSURE</span>(<span class="i">method</span>));
  <span class="i">pop</span>();
  <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">bound</span>));
  <span class="k">return</span> <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>callValue</em>()</div>

<p>First we look for a method with the given name in the class&rsquo;s method table. If
we don&rsquo;t find one, we report a runtime error and bail out. Otherwise, we take
the method and wrap it in a new ObjBoundMethod. We grab the receiver from its
home on top of the stack. Finally, we pop the instance and replace the top of
the stack with the bound method.</p>
<p>For example:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Brunch</span> {
  <span class="i">eggs</span>() {}
}

<span class="k">var</span> <span class="i">brunch</span> = <span class="t">Brunch</span>();
<span class="k">var</span> <span class="i">eggs</span> = <span class="i">brunch</span>.<span class="i">eggs</span>;
</pre></div>
<p>Here is what happens when the VM executes the <code>bindMethod()</code> call for the
<code>brunch.eggs</code> expression:</p><img src="image/methods-and-initializers/bind-method.png" alt="The stack changes caused by bindMethod()." />
<p>That&rsquo;s a lot of machinery under the hood, but from the user&rsquo;s perspective, they
simply get a function that they can call.</p>
<h3><a href="#calling-methods" id="calling-methods"><small>28&#8202;.&#8202;2&#8202;.&#8202;3</small>Calling methods</a></h3>
<p>Users can declare methods on classes, access them on instances, and get bound
methods onto the stack. They just can&rsquo;t <span name="do"><em>do</em></span> anything
useful with those bound method objects. The operation we&rsquo;re missing is calling
them. Calls are implemented in <code>callValue()</code>, so we add a case there for the new
object type.</p>
<aside name="do">
<p>A bound method <em>is</em> a first-class value, so they can store it in variables, pass
it to functions, and otherwise do &ldquo;value&rdquo;-y stuff with it.</p>
</aside>
<div class="codehilite"><pre class="insert-before">    switch (OBJ_TYPE(callee)) {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>callValue</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OBJ_BOUND_METHOD</span>: {
        <span class="t">ObjBoundMethod</span>* <span class="i">bound</span> = <span class="a">AS_BOUND_METHOD</span>(<span class="i">callee</span>);
        <span class="k">return</span> <span class="i">call</span>(<span class="i">bound</span>-&gt;<span class="i">method</span>, <span class="i">argCount</span>);
      }
</pre><pre class="insert-after">      case OBJ_CLASS: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>callValue</em>()</div>

<p>We pull the raw closure back out of the ObjBoundMethod and use the existing
<code>call()</code> helper to begin an invocation of that closure by pushing a CallFrame
for it onto the call stack. That&rsquo;s all it takes to be able to run this Lox
program:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Scone</span> {
  <span class="i">topping</span>(<span class="i">first</span>, <span class="i">second</span>) {
    <span class="k">print</span> <span class="s">&quot;scone with &quot;</span> + <span class="i">first</span> + <span class="s">&quot; and &quot;</span> + <span class="i">second</span>;
  }
}

<span class="k">var</span> <span class="i">scone</span> = <span class="t">Scone</span>();
<span class="i">scone</span>.<span class="i">topping</span>(<span class="s">&quot;berries&quot;</span>, <span class="s">&quot;cream&quot;</span>);
</pre></div>
<p>That&rsquo;s three big steps. We can declare, access, and invoke methods. But
something is missing. We went to all that trouble to wrap the method closure in
an object that binds the receiver, but when we invoke the method, we don&rsquo;t use
that receiver at all.</p>
<h2><a href="#this" id="this"><small>28&#8202;.&#8202;3</small>This</a></h2>
<p>The reason bound methods need to keep hold of the receiver is so that it can be
accessed inside the body of the method. Lox exposes a method&rsquo;s receiver through
<code>this</code> expressions. It&rsquo;s time for some new syntax. The lexer already treats
<code>this</code> as a special token type, so the first step is wiring that token up in the
parse table.</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_SUPER]         = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_THIS</span>]          = {<span class="i">this_</span>,    <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_TRUE]          = {literal,  NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<aside name="this">
<p>The underscore at the end of the name of the parser function is because <code>this</code>
is a reserved word in C++ and we support compiling clox as C++.</p>
</aside>
<p>When the parser encounters a <code>this</code> in prefix position, it dispatches to a new
parser function.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>variable</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">this_</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
  <span class="i">variable</span>(<span class="k">false</span>);
}<span name="this"> </span>
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>variable</em>()</div>

<p>We&rsquo;ll apply the same implementation technique for <code>this</code> in clox that we used in
jlox. We treat <code>this</code> as a lexically scoped local variable whose value gets
magically initialized. Compiling it like a local variable means we get a lot of
behavior for free. In particular, closures inside a method that reference <code>this</code>
will do the right thing and capture the receiver in an upvalue.</p>
<p>When the parser function is called, the <code>this</code> token has just been consumed and
is stored as the previous token. We call our existing <code>variable()</code> function
which compiles identifier expressions as variable accesses. It takes a single
Boolean parameter for whether the compiler should look for a following <code>=</code>
operator and parse a setter. You can&rsquo;t assign to <code>this</code>, so we pass <code>false</code> to
disallow that.</p>
<p>The <code>variable()</code> function doesn&rsquo;t care that <code>this</code> has its own token type and
isn&rsquo;t an identifier. It is happy to treat the lexeme &ldquo;this&rdquo; as if it were a
variable name and then look it up using the existing scope resolution machinery.
Right now, that lookup will fail because we never declared a variable whose name
is &ldquo;this&rdquo;. It&rsquo;s time to think about where the receiver should live in memory.</p>
<p>At least until they get captured by closures, clox stores every local variable
on the VM&rsquo;s stack. The compiler keeps track of which slots in the function&rsquo;s
stack window are owned by which local variables. If you recall, the compiler
sets aside stack slot zero by declaring a local variable whose name is an empty
string.</p>
<p>For function calls, that slot ends up holding the function being called. Since
the slot has no name, the function body never accesses it. You can guess where
this is going. For <em>method</em> calls, we can repurpose that slot to store the
receiver. Slot zero will store the instance that <code>this</code> is bound to. In order to
compile <code>this</code> expressions, the compiler simply needs to give the correct name
to that local variable.</p>
<div class="codehilite"><pre class="insert-before">  local-&gt;isCaptured = false;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>initCompiler</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">type</span> != <span class="a">TYPE_FUNCTION</span>) {
    <span class="i">local</span>-&gt;<span class="i">name</span>.<span class="i">start</span> = <span class="s">&quot;this&quot;</span>;
    <span class="i">local</span>-&gt;<span class="i">name</span>.<span class="i">length</span> = <span class="n">4</span>;
  } <span class="k">else</span> {
    <span class="i">local</span>-&gt;<span class="i">name</span>.<span class="i">start</span> = <span class="s">&quot;&quot;</span>;
    <span class="i">local</span>-&gt;<span class="i">name</span>.<span class="i">length</span> = <span class="n">0</span>;
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>initCompiler</em>(), replace 2 lines</div>

<p>We want to do this only for methods. Function declarations don&rsquo;t have a <code>this</code>.
And, in fact, they <em>must not</em> declare a variable named &ldquo;this&rdquo;, so that if you
write a <code>this</code> expression inside a function declaration which is itself inside a
method, the <code>this</code> correctly resolves to the outer method&rsquo;s receiver.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Nested</span> {
  <span class="i">method</span>() {
    <span class="k">fun</span> <span class="i">function</span>() {
      <span class="k">print</span> <span class="k">this</span>;
    }

    <span class="i">function</span>();
  }
}

<span class="t">Nested</span>().<span class="i">method</span>();
</pre></div>
<p>This program should print &ldquo;Nested instance&rdquo;. To decide what name to give to
local slot zero, the compiler needs to know whether it&rsquo;s compiling a function or
method declaration, so we add a new case to our FunctionType enum to distinguish
methods.</p>
<div class="codehilite"><pre class="insert-before">  TYPE_FUNCTION,
</pre><div class="source-file"><em>compiler.c</em><br>
in enum <em>FunctionType</em></div>
<pre class="insert">  <span class="a">TYPE_METHOD</span>,
</pre><pre class="insert-after">  TYPE_SCRIPT
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in enum <em>FunctionType</em></div>

<p>When we compile a method, we use that type.</p>
<div class="codehilite"><pre class="insert-before">  uint8_t constant = identifierConstant(&amp;parser.previous);

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>method</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">FunctionType</span> <span class="i">type</span> = <span class="a">TYPE_METHOD</span>;
</pre><pre class="insert-after">  function(type);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>method</em>(), replace 1 line</div>

<p>Now we can correctly compile references to the special &ldquo;this&rdquo; variable, and the
compiler will emit the right <code>OP_GET_LOCAL</code> instructions to access it. Closures
can even capture <code>this</code> and store the receiver in upvalues. Pretty cool.</p>
<p>Except that at runtime, the receiver isn&rsquo;t actually <em>in</em> slot zero. The
interpreter isn&rsquo;t holding up its end of the bargain yet. Here is the fix:</p>
<div class="codehilite"><pre class="insert-before">      case OBJ_BOUND_METHOD: {
        ObjBoundMethod* bound = AS_BOUND_METHOD(callee);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>callValue</em>()</div>
<pre class="insert">        <span class="i">vm</span>.<span class="i">stackTop</span>[-<span class="i">argCount</span> - <span class="n">1</span>] = <span class="i">bound</span>-&gt;<span class="i">receiver</span>;
</pre><pre class="insert-after">        return call(bound-&gt;method, argCount);
      }
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>callValue</em>()</div>

<p>When a method is called, the top of the stack contains all of the arguments, and
then just under those is the closure of the called method. That&rsquo;s where slot
zero in the new CallFrame will be. This line of code inserts the receiver into
that slot. For example, given a method call like this:</p>
<div class="codehilite"><pre><span class="i">scone</span>.<span class="i">topping</span>(<span class="s">&quot;berries&quot;</span>, <span class="s">&quot;cream&quot;</span>);
</pre></div>
<p>We calculate the slot to store the receiver like so:</p><img src="image/methods-and-initializers/closure-slot.png" alt="Skipping over the argument stack slots to find the slot containing the closure." />
<p>The <code>-argCount</code> skips past the arguments and the <code>- 1</code> adjusts for the fact that
<code>stackTop</code> points just <em>past</em> the last used stack slot.</p>
<h3><a href="#misusing-this" id="misusing-this"><small>28&#8202;.&#8202;3&#8202;.&#8202;1</small>Misusing this</a></h3>
<p>Our VM now supports users <em>correctly</em> using <code>this</code>, but we also need to make
sure it properly handles users <em>mis</em>using <code>this</code>. Lox says it is a compile
error for a <code>this</code> expression to appear outside of the body of a method. These
two wrong uses should be caught by the compiler:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="k">this</span>; <span class="c">// At top level.</span>

<span class="k">fun</span> <span class="i">notMethod</span>() {
  <span class="k">print</span> <span class="k">this</span>; <span class="c">// In a function.</span>
}
</pre></div>
<p>So how does the compiler know if it&rsquo;s inside a method? The obvious answer is to
look at the FunctionType of the current Compiler. We did just add an enum case
there to treat methods specially. However, that wouldn&rsquo;t correctly handle code
like the earlier example where you are inside a function which is, itself,
nested inside a method.</p>
<p>We could try to resolve &ldquo;this&rdquo; and then report an error if it wasn&rsquo;t found in
any of the surrounding lexical scopes. That would work, but would require us to
shuffle around a bunch of code, since right now the code for resolving a
variable implicitly considers it a global access if no declaration is found.</p>
<p>In the next chapter, we will need information about the nearest enclosing class.
If we had that, we could use it here to determine if we are inside a method. So
we may as well make our future selves&rsquo; lives a little easier and put that
machinery in place now.</p>
<div class="codehilite"><pre class="insert-before">Compiler* current = NULL;
</pre><div class="source-file"><em>compiler.c</em><br>
add after variable <em>current</em></div>
<pre class="insert"><span class="t">ClassCompiler</span>* <span class="i">currentClass</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">

static Chunk* currentChunk() {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after variable <em>current</em></div>

<p>This module variable points to a struct representing the current, innermost
class being compiled. The new type looks like this:</p>
<div class="codehilite"><pre class="insert-before">} Compiler;
</pre><div class="source-file"><em>compiler.c</em><br>
add after struct <em>Compiler</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> <span class="t">ClassCompiler</span> {
  <span class="k">struct</span> <span class="t">ClassCompiler</span>* <span class="i">enclosing</span>;
} <span class="t">ClassCompiler</span>;
</pre><pre class="insert-after">

Parser parser;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after struct <em>Compiler</em></div>

<p>Right now we store only a pointer to the ClassCompiler for the enclosing class,
if any. Nesting a class declaration inside a method in some other class is an
uncommon thing to do, but Lox supports it. Just like the Compiler struct, this
means ClassCompiler forms a linked list from the current innermost class being
compiled out through all of the enclosing classes.</p>
<p>If we aren&rsquo;t inside any class declaration at all, the module variable
<code>currentClass</code> is <code>NULL</code>. When the compiler begins compiling a class, it pushes
a new ClassCompiler onto that implicit linked stack.</p>
<div class="codehilite"><pre class="insert-before">  defineVariable(nameConstant);

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">  <span class="t">ClassCompiler</span> <span class="i">classCompiler</span>;
  <span class="i">classCompiler</span>.<span class="i">enclosing</span> = <span class="i">currentClass</span>;
  <span class="i">currentClass</span> = &amp;<span class="i">classCompiler</span>;

</pre><pre class="insert-after">  namedVariable(className, false);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>The memory for the ClassCompiler struct lives right on the C stack, a handy
capability we get by writing our compiler using recursive descent. At the end of
the class body, we pop that compiler off the stack and restore the enclosing
one.</p>
<div class="codehilite"><pre class="insert-before">  emitByte(OP_POP);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">

  <span class="i">currentClass</span> = <span class="i">currentClass</span>-&gt;<span class="i">enclosing</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>When an outermost class body ends, <code>enclosing</code> will be <code>NULL</code>, so this resets
<code>currentClass</code> to <code>NULL</code>. Thus, to see if we are inside a class<span class="em">&mdash;</span>and therefore
inside a method<span class="em">&mdash;</span>we simply check that module variable.</p>
<div class="codehilite"><pre class="insert-before">static void this_(bool canAssign) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>this_</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">currentClass</span> == <span class="a">NULL</span>) {
    <span class="i">error</span>(<span class="s">&quot;Can&#39;t use &#39;this&#39; outside of a class.&quot;</span>);
    <span class="k">return</span>;
  }

</pre><pre class="insert-after">  variable(false);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>this_</em>()</div>

<p>With that, <code>this</code> outside of a class is correctly forbidden. Now our methods
really feel like <em>methods</em> in the object-oriented sense. Accessing the receiver
lets them affect the instance you called the method on. We&rsquo;re getting there!</p>
<h2><a href="#instance-initializers" id="instance-initializers"><small>28&#8202;.&#8202;4</small>Instance Initializers</a></h2>
<p>The reason object-oriented languages tie state and behavior together<span class="em">&mdash;</span>one of
the core tenets of the paradigm<span class="em">&mdash;</span>is to ensure that objects are always in a
valid, meaningful state. When the only way to touch an object&rsquo;s state is <span
name="through">through</span> its methods, the methods can make sure nothing
goes awry. But that presumes the object is <em>already</em> in a proper state. What
about when it&rsquo;s first created?</p>
<aside name="through">
<p>Of course, Lox does let outside code directly access and modify an instance&rsquo;s
fields without going through its methods. This is unlike Ruby and Smalltalk,
which completely encapsulate state inside objects. Our toy scripting language,
alas, isn&rsquo;t so principled.</p>
</aside>
<p>Object-oriented languages ensure that brand new objects are properly set up
through constructors, which both produce a new instance and initialize its
state. In Lox, the runtime allocates new raw instances, and a class may declare
an initializer to set up any fields. Initializers work mostly like normal
methods, with a few tweaks:</p>
<ol>
<li>
<p>The runtime automatically invokes the initializer method whenever an
instance of a class is created.</p>
</li>
<li>
<p>The caller that constructs an instance always gets the instance <span
name="return">back</span> after the initializer finishes, regardless of what
the initializer function itself returns. The initializer method doesn&rsquo;t need
to explicitly return <code>this</code>.</p>
</li>
<li>
<p>In fact, an initializer is <em>prohibited</em> from returning any value at all
since the value would never be seen anyway.</p>
</li>
</ol>
<aside name="return">
<p>It&rsquo;s as if the initializer is implicitly wrapped in a bundle of code like this:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">create</span>(<span class="i">klass</span>) {
  <span class="k">var</span> <span class="i">obj</span> = <span class="i">newInstance</span>(<span class="i">klass</span>);
  <span class="i">obj</span>.<span class="i">init</span>();
  <span class="k">return</span> <span class="i">obj</span>;
}
</pre></div>
<p>Note how the value returned by <code>init()</code> is discarded.</p>
</aside>
<p>Now that we support methods, to add initializers, we merely need to implement
those three special rules. We&rsquo;ll go in order.</p>
<h3><a href="#invoking-initializers" id="invoking-initializers"><small>28&#8202;.&#8202;4&#8202;.&#8202;1</small>Invoking initializers</a></h3>
<p>First, automatically calling <code>init()</code> on new instances:</p>
<div class="codehilite"><pre class="insert-before">        vm.stackTop[-argCount - 1] = OBJ_VAL(newInstance(klass));
</pre><div class="source-file"><em>vm.c</em><br>
in <em>callValue</em>()</div>
<pre class="insert">        <span class="t">Value</span> <span class="i">initializer</span>;
        <span class="k">if</span> (<span class="i">tableGet</span>(&amp;<span class="i">klass</span>-&gt;<span class="i">methods</span>, <span class="i">vm</span>.<span class="i">initString</span>,
                     &amp;<span class="i">initializer</span>)) {
          <span class="k">return</span> <span class="i">call</span>(<span class="a">AS_CLOSURE</span>(<span class="i">initializer</span>), <span class="i">argCount</span>);
        }
</pre><pre class="insert-after">        return true;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>callValue</em>()</div>

<p>After the runtime allocates the new instance, we look for an <code>init()</code> method on
the class. If we find one, we initiate a call to it. This pushes a new CallFrame
for the initializer&rsquo;s closure. Say we run this program:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Brunch</span> {
  <span class="i">init</span>(<span class="i">food</span>, <span class="i">drink</span>) {}
}

<span class="t">Brunch</span>(<span class="s">&quot;eggs&quot;</span>, <span class="s">&quot;coffee&quot;</span>);
</pre></div>
<p>When the VM executes the call to <code>Brunch()</code>, it goes like this:</p><img src="image/methods-and-initializers/init-call-frame.png" alt="The aligned stack windows for the Brunch() call and the corresponding init() method it forwards to." />
<p>Any arguments passed to the class when we called it are still sitting on the
stack above the instance. The new CallFrame for the <code>init()</code> method shares that
stack window, so those arguments implicitly get forwarded to the initializer.</p>
<p>Lox doesn&rsquo;t require a class to define an initializer. If omitted, the runtime
simply returns the new uninitialized instance. However, if there is no <code>init()</code>
method, then it doesn&rsquo;t make any sense to pass arguments to the class when
creating the instance. We make that an error.</p>
<div class="codehilite"><pre class="insert-before">          return call(AS_CLOSURE(initializer), argCount);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>callValue</em>()</div>
<pre class="insert">        } <span class="k">else</span> <span class="k">if</span> (<span class="i">argCount</span> != <span class="n">0</span>) {
          <span class="i">runtimeError</span>(<span class="s">&quot;Expected 0 arguments but got %d.&quot;</span>,
                       <span class="i">argCount</span>);
          <span class="k">return</span> <span class="k">false</span>;
</pre><pre class="insert-after">        }
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>callValue</em>()</div>

<p>When the class <em>does</em> provide an initializer, we also need to ensure that the
number of arguments passed matches the initializer&rsquo;s arity. Fortunately, the
<code>call()</code> helper does that for us already.</p>
<p>To call the initializer, the runtime looks up the <code>init()</code> method by name. We
want that to be fast since it happens every time an instance is constructed.
That means it would be good to take advantage of the string interning we&rsquo;ve
already implemented. To do that, the VM creates an ObjString for &ldquo;init&rdquo; and
reuses it. The string lives right in the VM struct.</p>
<div class="codehilite"><pre class="insert-before">  Table strings;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">ObjString</span>* <span class="i">initString</span>;
</pre><pre class="insert-after">  ObjUpvalue* openUpvalues;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>We create and intern the string when the VM boots up.</p>
<div class="codehilite"><pre class="insert-before">  initTable(&amp;vm.strings);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">

  <span class="i">vm</span>.<span class="i">initString</span> = <span class="i">copyString</span>(<span class="s">&quot;init&quot;</span>, <span class="n">4</span>);
</pre><pre class="insert-after">

  defineNative(&quot;clock&quot;, clockNative);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>We want it to stick around, so the GC considers it a root.</p>
<div class="codehilite"><pre class="insert-before">  markCompilerRoots();
</pre><div class="source-file"><em>memory.c</em><br>
in <em>markRoots</em>()</div>
<pre class="insert">  <span class="i">markObject</span>((<span class="t">Obj</span>*)<span class="i">vm</span>.<span class="i">initString</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, in <em>markRoots</em>()</div>

<p>Look carefully. See any bug waiting to happen? No? It&rsquo;s a subtle one. The
garbage collector now reads <code>vm.initString</code>. That field is initialized from the
result of calling <code>copyString()</code>. But copying a string allocates memory, which
can trigger a GC. If the collector ran at just the wrong time, it would read
<code>vm.initString</code> before it had been initialized. So, first we zero the field out.</p>
<div class="codehilite"><pre class="insert-before">  initTable(&amp;vm.strings);

</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">  <span class="i">vm</span>.<span class="i">initString</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">  vm.initString = copyString(&quot;init&quot;, 4);

</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>We clear the pointer when the VM shuts down since the next line will free it.</p>
<div class="codehilite"><pre class="insert-before">  freeTable(&amp;vm.strings);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>freeVM</em>()</div>
<pre class="insert">  <span class="i">vm</span>.<span class="i">initString</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">  freeObjects();
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>freeVM</em>()</div>

<p>OK, that lets us call initializers.</p>
<h3><a href="#initializer-return-values" id="initializer-return-values"><small>28&#8202;.&#8202;4&#8202;.&#8202;2</small>Initializer return values</a></h3>
<p>The next step is ensuring that constructing an instance of a class with an
initializer always returns the new instance, and not <code>nil</code> or whatever the body
of the initializer returns. Right now, if a class defines an initializer, then
when an instance is constructed, the VM pushes a call to that initializer onto
the CallFrame stack. Then it just keeps on trucking.</p>
<p>The user&rsquo;s invocation on the class to create the instance will complete whenever
that initializer method returns, and will leave on the stack whatever value the
initializer puts there. That means that unless the user takes care to put
<code>return this;</code> at the end of the initializer, no instance will come out. Not
very helpful.</p>
<p>To fix this, whenever the front end compiles an initializer method, it will emit
different bytecode at the end of the body to return <code>this</code> from the method
instead of the usual implicit <code>nil</code> most functions return. In order to do
<em>that</em>, the compiler needs to actually know when it is compiling an initializer.
We detect that by checking to see if the name of the method we&rsquo;re compiling is
&ldquo;init&rdquo;.</p>
<div class="codehilite"><pre class="insert-before">  FunctionType type = TYPE_METHOD;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>method</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">length</span> == <span class="n">4</span> &amp;&amp;
      <span class="i">memcmp</span>(<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">start</span>, <span class="s">&quot;init&quot;</span>, <span class="n">4</span>) == <span class="n">0</span>) {
    <span class="i">type</span> = <span class="a">TYPE_INITIALIZER</span>;
  }

</pre><pre class="insert-after">  function(type);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>method</em>()</div>

<p>We define a new function type to distinguish initializers from other methods.</p>
<div class="codehilite"><pre class="insert-before">  TYPE_FUNCTION,
</pre><div class="source-file"><em>compiler.c</em><br>
in enum <em>FunctionType</em></div>
<pre class="insert">  <span class="a">TYPE_INITIALIZER</span>,
</pre><pre class="insert-after">  TYPE_METHOD,
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in enum <em>FunctionType</em></div>

<p>Whenever the compiler emits the implicit return at the end of a body, we check
the type to decide whether to insert the initializer-specific behavior.</p>
<div class="codehilite"><pre class="insert-before">static void emitReturn() {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>emitReturn</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">type</span> == <span class="a">TYPE_INITIALIZER</span>) {
    <span class="i">emitBytes</span>(<span class="a">OP_GET_LOCAL</span>, <span class="n">0</span>);
  } <span class="k">else</span> {
    <span class="i">emitByte</span>(<span class="a">OP_NIL</span>);
  }

</pre><pre class="insert-after">  emitByte(OP_RETURN);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>emitReturn</em>(), replace 1 line</div>

<p>In an initializer, instead of pushing <code>nil</code> onto the stack before returning,
we load slot zero, which contains the instance. This <code>emitReturn()</code> function is
also called when compiling a <code>return</code> statement without a value, so this also
correctly handles cases where the user does an early return inside the
initializer.</p>
<h3><a href="#incorrect-returns-in-initializers" id="incorrect-returns-in-initializers"><small>28&#8202;.&#8202;4&#8202;.&#8202;3</small>Incorrect returns in initializers</a></h3>
<p>The last step, the last item in our list of special features of initializers, is
making it an error to try to return anything <em>else</em> from an initializer. Now
that the compiler tracks the method type, this is straightforward.</p>
<div class="codehilite"><pre class="insert-before">  if (match(TOKEN_SEMICOLON)) {
    emitReturn();
  } else {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>returnStatement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">current</span>-&gt;<span class="i">type</span> == <span class="a">TYPE_INITIALIZER</span>) {
      <span class="i">error</span>(<span class="s">&quot;Can&#39;t return a value from an initializer.&quot;</span>);
    }

</pre><pre class="insert-after">    expression();
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>returnStatement</em>()</div>

<p>We report an error if a <code>return</code> statement in an initializer has a value. We
still go ahead and compile the value afterwards so that the compiler doesn&rsquo;t get
confused by the trailing expression and report a bunch of cascaded errors.</p>
<p>Aside from inheritance, which we&rsquo;ll get to <a href="superclasses.html">soon</a>, we now have a
fairly full-featured class system working in clox.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">CoffeeMaker</span> {
  <span class="i">init</span>(<span class="i">coffee</span>) {
    <span class="k">this</span>.<span class="i">coffee</span> = <span class="i">coffee</span>;
  }

  <span class="i">brew</span>() {
    <span class="k">print</span> <span class="s">&quot;Enjoy your cup of &quot;</span> + <span class="k">this</span>.<span class="i">coffee</span>;

    <span class="c">// No reusing the grounds!</span>
    <span class="k">this</span>.<span class="i">coffee</span> = <span class="k">nil</span>;
  }
}

<span class="k">var</span> <span class="i">maker</span> = <span class="t">CoffeeMaker</span>(<span class="s">&quot;coffee and chicory&quot;</span>);
<span class="i">maker</span>.<span class="i">brew</span>();
</pre></div>
<p>Pretty fancy for a C program that would fit on an old <span
name="floppy">floppy</span> disk.</p>
<aside name="floppy">
<p>I acknowledge that &ldquo;floppy disk&rdquo; may no longer be a useful size reference for
current generations of programmers. Maybe I should have said &ldquo;a few tweets&rdquo; or
something.</p>
</aside>
<h2><a href="#optimized-invocations" id="optimized-invocations"><small>28&#8202;.&#8202;5</small>Optimized Invocations</a></h2>
<p>Our VM correctly implements the language&rsquo;s semantics for method calls and
initializers. We could stop here. But the main reason we are building an entire
second implementation of Lox from scratch is to execute faster than our old Java
interpreter. Right now, method calls even in clox are slow.</p>
<p>Lox&rsquo;s semantics define a method invocation as two operations<span class="em">&mdash;</span>accessing the
method and then calling the result. Our VM must support those as separate
operations because the user <em>can</em> separate them. You can access a method without
calling it and then invoke the bound method later. Nothing we&rsquo;ve implemented so
far is unnecessary.</p>
<p>But <em>always</em> executing those as separate operations has a significant cost.
Every single time a Lox program accesses and invokes a method, the runtime
heap allocates a new ObjBoundMethod, initializes its fields, then pulls them
right back out. Later, the GC has to spend time freeing all of those ephemeral
bound methods.</p>
<p>Most of the time, a Lox program accesses a method and then immediately calls it.
The bound method is created by one bytecode instruction and then consumed by the
very next one. In fact, it&rsquo;s so immediate that the compiler can even textually
<em>see</em> that it&rsquo;s happening<span class="em">&mdash;</span>a dotted property access followed by an opening
parenthesis is most likely a method call.</p>
<p>Since we can recognize this pair of operations at compile time, we have the
opportunity to emit a <span name="super">new, special</span> instruction that
performs an optimized method call.</p>
<p>We start in the function that compiles dotted property expressions.</p>
<aside name="super" class="bottom">
<p>If you spend enough time watching your bytecode VM run, you&rsquo;ll notice it often
executes the same series of bytecode instructions one after the other. A classic
optimization technique is to define a new single instruction called a
<strong>superinstruction</strong> that fuses those into a single instruction with the same
behavior as the entire sequence.</p>
<p>One of the largest performance drains in a bytecode interpreter is the overhead
of decoding and dispatching each instruction. Fusing several instructions into
one eliminates some of that.</p>
<p>The challenge is determining <em>which</em> instruction sequences are common enough to
benefit from this optimization. Every new superinstruction claims an opcode for
its own use and there are only so many of those to go around. Add too many, and
you&rsquo;ll need a larger encoding for opcodes, which then increases code size and
makes decoding <em>all</em> instructions slower.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  if (canAssign &amp;&amp; match(TOKEN_EQUAL)) {
    expression();
    emitBytes(OP_SET_PROPERTY, name);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>dot</em>()</div>
<pre class="insert">  } <span class="k">else</span> <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_LEFT_PAREN</span>)) {
    <span class="t">uint8_t</span> <span class="i">argCount</span> = <span class="i">argumentList</span>();
    <span class="i">emitBytes</span>(<span class="a">OP_INVOKE</span>, <span class="i">name</span>);
    <span class="i">emitByte</span>(<span class="i">argCount</span>);
</pre><pre class="insert-after">  } else {
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>dot</em>()</div>

<p>After the compiler has parsed the property name, we look for a left parenthesis.
If we match one, we switch to a new code path. There, we compile the argument
list exactly like we do when compiling a call expression. Then we emit a single
new <code>OP_INVOKE</code> instruction. It takes two operands:</p>
<ol>
<li>
<p>The index of the property name in the constant table.</p>
</li>
<li>
<p>The number of arguments passed to the method.</p>
</li>
</ol>
<p>In other words, this single instruction combines the operands of the
<code>OP_GET_PROPERTY</code> and <code>OP_CALL</code> instructions it replaces, in that order. It
really is a fusion of those two instructions. Let&rsquo;s define it.</p>
<div class="codehilite"><pre class="insert-before">  OP_CALL,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_INVOKE</span>,
</pre><pre class="insert-after">  OP_CLOSURE,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>And add it to the disassembler:</p>
<div class="codehilite"><pre class="insert-before">    case OP_CALL:
      return byteInstruction(&quot;OP_CALL&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_INVOKE</span>:
      <span class="k">return</span> <span class="i">invokeInstruction</span>(<span class="s">&quot;OP_INVOKE&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_CLOSURE: {
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>This is a new, special instruction format, so it needs a little custom
disassembly logic.</p>
<div class="codehilite"><div class="source-file"><em>debug.c</em><br>
add after <em>constantInstruction</em>()</div>
<pre><span class="k">static</span> <span class="t">int</span> <span class="i">invokeInstruction</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">name</span>, <span class="t">Chunk</span>* <span class="i">chunk</span>,
                                <span class="t">int</span> <span class="i">offset</span>) {
  <span class="t">uint8_t</span> <span class="i">constant</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span> + <span class="n">1</span>];
  <span class="t">uint8_t</span> <span class="i">argCount</span> = <span class="i">chunk</span>-&gt;<span class="i">code</span>[<span class="i">offset</span> + <span class="n">2</span>];
  <span class="i">printf</span>(<span class="s">&quot;%-16s (%d args) %4d &#39;&quot;</span>, <span class="i">name</span>, <span class="i">argCount</span>, <span class="i">constant</span>);
  <span class="i">printValue</span>(<span class="i">chunk</span>-&gt;<span class="i">constants</span>.<span class="i">values</span>[<span class="i">constant</span>]);
  <span class="i">printf</span>(<span class="s">&quot;&#39;</span><span class="e">\n</span><span class="s">&quot;</span>);
  <span class="k">return</span> <span class="i">offset</span> + <span class="n">3</span>;
}
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, add after <em>constantInstruction</em>()</div>

<p>We read the two operands and then print out both the method name and the
argument count. Over in the interpreter&rsquo;s bytecode dispatch loop is where the
real action begins.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_INVOKE</span>: {
        <span class="t">ObjString</span>* <span class="i">method</span> = <span class="a">READ_STRING</span>();
        <span class="t">int</span> <span class="i">argCount</span> = <span class="a">READ_BYTE</span>();
        <span class="k">if</span> (!<span class="i">invoke</span>(<span class="i">method</span>, <span class="i">argCount</span>)) {
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span> - <span class="n">1</span>];
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_CLOSURE: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Most of the work happens in <code>invoke()</code>, which we&rsquo;ll get to. Here, we look up the
method name from the first operand and then read the argument count operand.
Then we hand off to <code>invoke()</code> to do the heavy lifting. That function returns
<code>true</code> if the invocation succeeds. As usual, a <code>false</code> return means a runtime
error occurred. We check for that here and abort the interpreter if disaster has
struck.</p>
<p>Finally, assuming the invocation succeeded, then there is a new CallFrame on the
stack, so we refresh our cached copy of the current frame in <code>frame</code>.</p>
<p>The interesting work happens here:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>callValue</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">invoke</span>(<span class="t">ObjString</span>* <span class="i">name</span>, <span class="t">int</span> <span class="i">argCount</span>) {
  <span class="t">Value</span> <span class="i">receiver</span> = <span class="i">peek</span>(<span class="i">argCount</span>);
  <span class="t">ObjInstance</span>* <span class="i">instance</span> = <span class="a">AS_INSTANCE</span>(<span class="i">receiver</span>);
  <span class="k">return</span> <span class="i">invokeFromClass</span>(<span class="i">instance</span>-&gt;<span class="i">klass</span>, <span class="i">name</span>, <span class="i">argCount</span>);
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>callValue</em>()</div>

<p>First we grab the receiver off the stack. The arguments passed to the method are
above it on the stack, so we peek that many slots down. Then it&rsquo;s a simple
matter to cast the object to an instance and invoke the method on it.</p>
<p>That does assume the object <em>is</em> an instance. As with <code>OP_GET_PROPERTY</code>
instructions, we also need to handle the case where a user incorrectly tries to
call a method on a value of the wrong type.</p>
<div class="codehilite"><pre class="insert-before">  Value receiver = peek(argCount);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>invoke</em>()</div>
<pre class="insert">

  <span class="k">if</span> (!<span class="a">IS_INSTANCE</span>(<span class="i">receiver</span>)) {
    <span class="i">runtimeError</span>(<span class="s">&quot;Only instances have methods.&quot;</span>);
    <span class="k">return</span> <span class="k">false</span>;
  }

</pre><pre class="insert-after">  ObjInstance* instance = AS_INSTANCE(receiver);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>invoke</em>()</div>

<p><span name="helper">That&rsquo;s</span> a runtime error, so we report that and bail
out. Otherwise, we get the instance&rsquo;s class and jump over to this other new
utility function:</p>
<aside name="helper">
<p>As you can guess by now, we split this code into a separate function because
we&rsquo;re going to reuse it later<span class="em">&mdash;</span>in this case for <code>super</code> calls.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>callValue</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">invokeFromClass</span>(<span class="t">ObjClass</span>* <span class="i">klass</span>, <span class="t">ObjString</span>* <span class="i">name</span>,
                            <span class="t">int</span> <span class="i">argCount</span>) {
  <span class="t">Value</span> <span class="i">method</span>;
  <span class="k">if</span> (!<span class="i">tableGet</span>(&amp;<span class="i">klass</span>-&gt;<span class="i">methods</span>, <span class="i">name</span>, &amp;<span class="i">method</span>)) {
    <span class="i">runtimeError</span>(<span class="s">&quot;Undefined property &#39;%s&#39;.&quot;</span>, <span class="i">name</span>-&gt;<span class="i">chars</span>);
    <span class="k">return</span> <span class="k">false</span>;
  }
  <span class="k">return</span> <span class="i">call</span>(<span class="a">AS_CLOSURE</span>(<span class="i">method</span>), <span class="i">argCount</span>);
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>callValue</em>()</div>

<p>This function combines the logic of how the VM implements <code>OP_GET_PROPERTY</code> and
<code>OP_CALL</code> instructions, in that order. First we look up the method by name in
the class&rsquo;s method table. If we don&rsquo;t find one, we report that runtime error and
exit.</p>
<p>Otherwise, we take the method&rsquo;s closure and push a call to it onto the CallFrame
stack. We don&rsquo;t need to heap allocate and initialize an ObjBoundMethod. In fact,
we don&rsquo;t even need to <span name="juggle">juggle</span> anything on the stack.
The receiver and method arguments are already right where they need to be.</p>
<aside name="juggle">
<p>This is a key reason <em>why</em> we use stack slot zero to store the receiver<span class="em">&mdash;</span>it&rsquo;s
how the caller already organizes the stack for a method call. An efficient
calling convention is an important part of a bytecode VM&rsquo;s performance story.</p>
</aside>
<p>If you fire up the VM and run a little program that calls methods now, you
should see the exact same behavior as before. But, if we did our job right, the
<em>performance</em> should be much improved. I wrote a little microbenchmark that
does a batch of 10,000 method calls. Then it tests how many of these batches it
can execute in 10 seconds. On my computer, without the new <code>OP_INVOKE</code>
instruction, it got through 1,089 batches. With this new optimization, it
finished 8,324 batches in the same time. That&rsquo;s <em>7.6 times faster</em>, which is a
huge improvement when it comes to programming language optimization.</p>
<p><span name="pat"></span></p>
<aside name="pat">
<p>We shouldn&rsquo;t pat ourselves on the back <em>too</em> firmly. This performance
improvement is relative to our own unoptimized method call implementation which
was quite slow. Doing a heap allocation for every single method call isn&rsquo;t going
to win any races.</p>
</aside><img src="image/methods-and-initializers/benchmark.png" alt="Bar chart comparing the two benchmark results." />
<h3><a href="#invoking-fields" id="invoking-fields"><small>28&#8202;.&#8202;5&#8202;.&#8202;1</small>Invoking fields</a></h3>
<p>The fundamental creed of optimization is: &ldquo;Thou shalt not break correctness.&rdquo;
<span name="monte">Users</span> like it when a language implementation gives
them an answer faster, but only if it&rsquo;s the <em>right</em> answer. Alas, our
implementation of faster method invocations fails to uphold that principle:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Oops</span> {
  <span class="i">init</span>() {
    <span class="k">fun</span> <span class="i">f</span>() {
      <span class="k">print</span> <span class="s">&quot;not a method&quot;</span>;
    }

    <span class="k">this</span>.<span class="i">field</span> = <span class="i">f</span>;
  }
}

<span class="k">var</span> <span class="i">oops</span> = <span class="t">Oops</span>();
<span class="i">oops</span>.<span class="i">field</span>();
</pre></div>
<p>The last line looks like a method call. The compiler thinks that it is and
dutifully emits an <code>OP_INVOKE</code> instruction for it. However, it&rsquo;s not. What is
actually happening is a <em>field</em> access that returns a function which then gets
called. Right now, instead of executing that correctly, our VM reports a runtime
error when it can&rsquo;t find a method named &ldquo;field&rdquo;.</p>
<aside name="monte">
<p>There are cases where users may be satisfied when a program sometimes returns
the wrong answer in return for running significantly faster or with a better
bound on the performance. These are the field of <a href="https://en.wikipedia.org/wiki/Monte_Carlo_algorithm"><strong>Monte Carlo
algorithms</strong></a>. For some use cases, this is a good trade-off.</p>
<p>The important part, though, is that the user is <em>choosing</em> to apply one of these
algorithms. We language implementers can&rsquo;t unilaterally decide to sacrifice
their program&rsquo;s correctness.</p>
</aside>
<p>Earlier, when we implemented <code>OP_GET_PROPERTY</code>, we handled both field and method
accesses. To squash this new bug, we need to do the same thing for <code>OP_INVOKE</code>.</p>
<div class="codehilite"><pre class="insert-before">  ObjInstance* instance = AS_INSTANCE(receiver);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>invoke</em>()</div>
<pre class="insert">

  <span class="t">Value</span> <span class="i">value</span>;
  <span class="k">if</span> (<span class="i">tableGet</span>(&amp;<span class="i">instance</span>-&gt;<span class="i">fields</span>, <span class="i">name</span>, &amp;<span class="i">value</span>)) {
    <span class="i">vm</span>.<span class="i">stackTop</span>[-<span class="i">argCount</span> - <span class="n">1</span>] = <span class="i">value</span>;
    <span class="k">return</span> <span class="i">callValue</span>(<span class="i">value</span>, <span class="i">argCount</span>);
  }

</pre><pre class="insert-after">  return invokeFromClass(instance-&gt;klass, name, argCount);
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>invoke</em>()</div>

<p>Pretty simple fix. Before looking up a method on the instance&rsquo;s class, we look
for a field with the same name. If we find a field, then we store it on the
stack in place of the receiver, <em>under</em> the argument list. This is how
<code>OP_GET_PROPERTY</code> behaves since the latter instruction executes before a
subsequent parenthesized list of arguments has been evaluated.</p>
<p>Then we try to call that field&rsquo;s value like the callable that it hopefully is.
The <code>callValue()</code> helper will check the value&rsquo;s type and call it as appropriate
or report a runtime error if the field&rsquo;s value isn&rsquo;t a callable type like a
closure.</p>
<p>That&rsquo;s all it takes to make our optimization fully safe. We do sacrifice a
little performance, unfortunately. But that&rsquo;s the price you have to pay
sometimes. You occasionally get frustrated by optimizations you <em>could</em> do if
only the language wouldn&rsquo;t allow some annoying corner case. But, as language
<span name="designer">implementers</span>, we have to play the game we&rsquo;re given.</p>
<aside name="designer">
<p>As language <em>designers</em>, our role is very different. If we do control the
language itself, we may sometimes choose to restrict or change the language in
ways that enable optimizations. Users want expressive languages, but they also
want fast implementations. Sometimes it is good language design to sacrifice a
little power if you can give them perf in return.</p>
</aside>
<p>The code we wrote here follows a typical pattern in optimization:</p>
<ol>
<li>
<p>Recognize a common operation or sequence of operations that is performance
critical. In this case, it is a method access followed by a call.</p>
</li>
<li>
<p>Add an optimized implementation of that pattern. That&rsquo;s our <code>OP_INVOKE</code>
instruction.</p>
</li>
<li>
<p>Guard the optimized code with some conditional logic that validates that the
pattern actually applies. If it does, stay on the fast path. Otherwise, fall
back to a slower but more robust unoptimized behavior. Here, that means
checking that we are actually calling a method and not accessing a field.</p>
</li>
</ol>
<p>As your language work moves from getting the implementation working <em>at all</em> to
getting it to work <em>faster</em>, you will find yourself spending more and more
time looking for patterns like this and adding guarded optimizations for them.
Full-time VM engineers spend much of their careers in this loop.</p>
<p>But we can stop here for now. With this, clox now supports most of the features
of an object-oriented programming language, and with respectable performance.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>The hash table lookup to find a class&rsquo;s <code>init()</code> method is constant time,
but still fairly slow. Implement something faster. Write a benchmark and
measure the performance difference.</p>
</li>
<li>
<p>In a dynamically typed language like Lox, a single callsite may invoke a
variety of methods on a number of classes throughout a program&rsquo;s execution.
Even so, in practice, most of the time a callsite ends up calling the exact
same method on the exact same class for the duration of the run. Most calls
are actually not polymorphic even if the language says they can be.</p>
<p>How do advanced language implementations optimize based on that observation?</p>
</li>
<li>
<p>When interpreting an <code>OP_INVOKE</code> instruction, the VM has to do two hash
table lookups. First, it looks for a field that could shadow a method, and
only if that fails does it look for a method. The former check is rarely
useful<span class="em">&mdash;</span>most fields do not contain functions. But it is <em>necessary</em>
because the language says fields and methods are accessed using the same
syntax, and fields shadow methods.</p>
<p>That is a language <em>choice</em> that affects the performance of our
implementation. Was it the right choice? If Lox were your language, what
would you do?</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Novelty Budget</a></h2>
<p>I still remember the first time I wrote a tiny BASIC program on a TRS-80 and
made a computer do something it hadn&rsquo;t done before. It felt like a superpower.
The first time I cobbled together just enough of a parser and interpreter to let
me write a tiny program in <em>my own language</em> that made a computer do a thing was
like some sort of higher-order meta-superpower. It was and remains a wonderful
feeling.</p>
<p>I realized I could design a language that looked and behaved however I chose. It
was like I&rsquo;d been going to a private school that required uniforms my whole life
and then one day transferred to a public school where I could wear whatever I
wanted. I don&rsquo;t need to use curly braces for blocks? I can use something other
than an equals sign for assignment? I can do objects without classes? Multiple
inheritance <em>and</em> multimethods? A dynamic language that overloads statically, by
arity?</p>
<p>Naturally, I took that freedom and ran with it. I made the weirdest, most
arbitrary language design decisions. Apostrophes for generics. No commas between
arguments. Overload resolution that can fail at runtime. I did things
differently just for difference&rsquo;s sake.</p>
<p>This is a very fun experience that I highly recommend. We need more weird,
avant-garde programming languages. I want to see more art languages. I still
make oddball toy languages for fun sometimes.</p>
<p><em>However</em>, if your goal is success where &ldquo;success&rdquo; is defined as a large number
of users, then your priorities must be different. In that case, your primary
goal is to have your language loaded into the brains of as many people as
possible. That&rsquo;s <em>really hard</em>. It takes a lot of human effort to move a
language&rsquo;s syntax and semantics from a computer into trillions of neurons.</p>
<p>Programmers are naturally conservative with their time and cautious about what
languages are worth uploading into their wetware. They don&rsquo;t want to waste their
time on a language that ends up not being useful to them. As a language
designer, your goal is thus to give them as much language power as you can with
as little required learning as possible.</p>
<p>One natural approach is <em>simplicity</em>. The fewer concepts and features your
language has, the less total volume of stuff there is to learn. This is one of
the reasons minimal <span name="dynamic">scripting</span> languages often find
success even though they aren&rsquo;t as powerful as the big industrial languages<span class="em">&mdash;</span>they are easier to get started with, and once they are in someone&rsquo;s brain, the
user wants to keep using them.</p>
<aside name="dynamic">
<p>In particular, this is a big advantage of dynamically typed languages. A static
language requires you to learn <em>two</em> languages<span class="em">&mdash;</span>the runtime semantics and the
static type system<span class="em">&mdash;</span>before you can get to the point where you are making the
computer do stuff. Dynamic languages require you to learn only the former.</p>
<p>Eventually, programs get big enough that the value of static analysis pays for
the effort to learn that second static language, but the value proposition isn&rsquo;t
as obvious at the outset.</p>
</aside>
<p>The problem with simplicity is that simply cutting features often sacrifices
power and expressiveness. There is an art to finding features that punch above
their weight, but often minimal languages simply do less.</p>
<p>There is another path that avoids much of that problem. The trick is to realize
that a user doesn&rsquo;t have to load your entire language into their head, <em>just the
part they don&rsquo;t already have in there</em>. As I mentioned in an <a href="parsing-expressions.html#design-note">earlier design
note</a>, learning is about transferring the <em>delta</em> between what they
already know and what they need to know.</p>
<p>Many potential users of your language already know some other programming
language. Any features your language shares with that language are essentially
&ldquo;free&rdquo; when it comes to learning. It&rsquo;s already in their head, they just have to
recognize that your language does the same thing.</p>
<p>In other words, <em>familiarity</em> is another key tool to lower the adoption cost of
your language. Of course, if you fully maximize that attribute, the end result
is a language that is completely identical to some existing one. That&rsquo;s not a
recipe for success, because at that point there&rsquo;s no incentive for users to
switch to your language at all.</p>
<p>So you do need to provide some compelling differences. Some things your language
can do that other languages can&rsquo;t, or at least can&rsquo;t do as well. I believe this
is one of the fundamental balancing acts of language design: similarity to other
languages lowers learning cost, while divergence raises the compelling
advantages.</p>
<p>I think of this balancing act in terms of a <span name="idiosyncracy"><strong>novelty
budget</strong></span>, or as Steve Klabnik calls it, a &ldquo;<a href="https://words.steveklabnik.com/the-language-strangeness-budget">strangeness budget</a>&rdquo;. Users
have a low threshold for the total amount of new stuff they are willing to
accept to learn a new language. Exceed that, and they won&rsquo;t show up.</p>
<aside name="idiosyncracy">
<p>A related concept in psychology is <a href="https://en.wikipedia.org/wiki/Idiosyncrasy_credit"><strong>idiosyncrasy credit</strong></a>, the
idea that other people in society grant you a finite amount of deviations from
social norms. You earn credit by fitting in and doing in-group things, which you
can then spend on oddball activities that might otherwise raise eyebrows. In
other words, demonstrating that you are &ldquo;one of the good ones&rdquo; gives you license
to raise your freak flag, but only so far.</p>
</aside>
<p>Anytime you add something new to your language that other languages don&rsquo;t have,
or anytime your language does something other languages do in a different way,
you spend some of that budget. That&rsquo;s OK<span class="em">&mdash;</span>you <em>need</em> to spend it to make your
language compelling. But your goal is to spend it <em>wisely</em>. For each feature or
difference, ask yourself how much compelling power it adds to your language and
then evaluate critically whether it pays its way. Is the change so valuable that
it is worth blowing some of your novelty budget?</p>
<p>In practice, I find this means that you end up being pretty conservative with
syntax and more adventurous with semantics. As fun as it is to put on a new
change of clothes, swapping out curly braces with some other block delimiter is
very unlikely to add much real power to the language, but it does spend some
novelty. It&rsquo;s hard for syntax differences to carry their weight.</p>
<p>On the other hand, new semantics can significantly increase the power of the
language. Multimethods, mixins, traits, reflection, dependent types, runtime
metaprogramming, etc. can radically level up what a user can do with the
language.</p>
<p>Alas, being conservative like this is not as fun as just changing everything.
But it&rsquo;s up to you to decide whether you want to chase mainstream success or not
in the first place. We don&rsquo;t all need to be radio-friendly pop bands. If you
want your language to be like free jazz or drone metal and are happy with the
proportionally smaller (but likely more devoted) audience size, go for it.</p>
</div>

<footer>
<a href="superclasses.html" class="next">
  Next Chapter: &ldquo;Superclasses&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/optimization.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Optimization &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Optimization<small>30</small></a></h3>

<ul>
    <li><a href="#measuring-performance"><small>30.1</small> Measuring Performance</a></li>
    <li><a href="#faster-hash-table-probing"><small>30.2</small> Faster Hash Table Probing</a></li>
    <li><a href="#nan-boxing"><small>30.3</small> NaN Boxing</a></li>
    <li><a href="#where-to-next"><small>30.4</small> Where to Next</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="superclasses.html" title="Superclasses" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="backmatter.html" title="Backmatter" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="superclasses.html" title="Superclasses" class="prev">←</a>
<a href="backmatter.html" title="Backmatter" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Optimization<small>30</small></a></h3>

<ul>
    <li><a href="#measuring-performance"><small>30.1</small> Measuring Performance</a></li>
    <li><a href="#faster-hash-table-probing"><small>30.2</small> Faster Hash Table Probing</a></li>
    <li><a href="#nan-boxing"><small>30.3</small> NaN Boxing</a></li>
    <li><a href="#where-to-next"><small>30.4</small> Where to Next</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="superclasses.html" title="Superclasses" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="backmatter.html" title="Backmatter" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">30</div>
  <h1>Optimization</h1>

<blockquote>
<p>The evening&rsquo;s the best part of the day. You&rsquo;ve done your day&rsquo;s work. Now you
can put your feet up and enjoy it.</p>
<p><cite>Kazuo Ishiguro, <em>The Remains of the Day</em></cite></p>
</blockquote>
<p>If I still lived in New Orleans, I&rsquo;d call this chapter a <em>lagniappe</em>, a little
something extra given for free to a customer. You&rsquo;ve got a whole book and a
complete virtual machine already, but I want you to have some more fun hacking
on clox. This time, we&rsquo;re going for pure performance. We&rsquo;ll apply two very
different optimizations to our virtual machine.  In the process, you&rsquo;ll get a
feel for measuring and improving the performance of a language implementation<span class="em">&mdash;</span>or any program, really.</p>
<h2><a href="#measuring-performance" id="measuring-performance"><small>30&#8202;.&#8202;1</small>Measuring Performance</a></h2>
<p><strong>Optimization</strong> means taking a working application and improving its
performance. An optimized program does the same thing, it just takes less
resources to do so. The resource we usually think of when optimizing is runtime
speed, but it can also be important to reduce memory usage, startup time,
persistent storage size, or network bandwidth. All physical resources have some
cost<span class="em">&mdash;</span>even if the cost is mostly in wasted human time<span class="em">&mdash;</span>so optimization work
often pays off.</p>
<p>There was a time in the early days of computing that a skilled programmer could
hold the entire hardware architecture and compiler pipeline in their head and
understand a program&rsquo;s performance just by thinking real hard. Those days are
long gone, separated from the present by microcode, cache lines, branch
prediction, deep compiler pipelines, and mammoth instruction sets. We like to
pretend C is a &ldquo;low-level&rdquo; language, but the stack of technology between</p>
<div class="codehilite"><pre><span class="i">printf</span>(<span class="s">&quot;Hello, world!&quot;</span>);
</pre></div>
<p>and a greeting appearing on screen is now perilously tall.</p>
<p>Optimization today is an empirical science. Our program is a border collie
sprinting through the hardware&rsquo;s obstacle course. If we want her to reach the
end faster, we can&rsquo;t just sit and ruminate on canine physiology until
enlightenment strikes. Instead, we need to <em>observe</em> her performance, see where
she stumbles, and then find faster paths for her to take.</p>
<p>Much like agility training is particular to one dog and one obstacle course, we
can&rsquo;t assume that our virtual machine optimizations will make <em>all</em> Lox programs
run faster on <em>all</em> hardware. Different Lox programs stress different areas of
the VM, and different architectures have their own strengths and weaknesses.</p>
<h3><a href="#benchmarks" id="benchmarks"><small>30&#8202;.&#8202;1&#8202;.&#8202;1</small>Benchmarks</a></h3>
<p>When we add new functionality, we validate correctness by writing tests<span class="em">&mdash;</span>Lox
programs that use a feature and validate the VM&rsquo;s behavior. Tests pin down
semantics and ensure we don&rsquo;t break existing features when we add new ones. We
have similar needs when it comes to performance:</p>
<ol>
<li>
<p>How do we validate that an optimization <em>does</em> improve performance, and by
how much?</p>
</li>
<li>
<p>How do we ensure that other unrelated changes don&rsquo;t <em>regress</em> performance?</p>
</li>
</ol>
<p>The Lox programs we write to accomplish those goals are <strong>benchmarks</strong>. These
are carefully crafted programs that stress some part of the language
implementation. They measure not <em>what</em> the program does, but how <span
name="much"><em>long</em></span> it takes to do it.</p>
<aside name="much">
<p>Most benchmarks measure running time. But, of course, you&rsquo;ll eventually find
yourself needing to write benchmarks that measure memory allocation, how much
time is spent in the garbage collector, startup time, etc.</p>
</aside>
<p>By measuring the performance of a benchmark before and after a change, you can
see what your change does. When you land an optimization, all of the tests
should behave exactly the same as they did before, but hopefully the benchmarks
run faster.</p>
<p>Once you have an entire <span name="js"><em>suite</em></span> of benchmarks, you can
measure not just <em>that</em> an optimization changes performance, but on which
<em>kinds</em> of code. Often you&rsquo;ll find that some benchmarks get faster while others
get slower. Then you have to make hard decisions about what kinds of code your
language implementation optimizes for.</p>
<p>The suite of benchmarks you choose to write is a key part of that decision. In
the same way that your tests encode your choices around what correct behavior
looks like, your benchmarks are the embodiment of your priorities when it comes
to performance. They will guide which optimizations you implement, so choose
your benchmarks carefully, and don&rsquo;t forget to periodically reflect on whether
they are helping you reach your larger goals.</p>
<aside name="js">
<p>In the early proliferation of JavaScript VMs, the first widely used benchmark
suite was SunSpider from WebKit. During the browser wars, marketing folks used
SunSpider results to claim their browser was fastest. That highly incentivized
VM hackers to optimize to those benchmarks.</p>
<p>Unfortunately, SunSpider programs often didn&rsquo;t match real-world JavaScript. They
were mostly microbenchmarks<span class="em">&mdash;</span>tiny toy programs that completed quickly. Those
benchmarks penalize complex just-in-time compilers that start off slower but get
<em>much</em> faster once the JIT has had enough time to optimize and re-compile hot
code paths. This put VM hackers in the unfortunate position of having to choose
between making the SunSpider numbers get better, or actually optimizing the
kinds of programs real users ran.</p>
<p>Google&rsquo;s V8 team responded by sharing their Octane benchmark suite, which was
closer to real-world code at the time. Years later, as JavaScript use patterns
continued to evolve, even Octane outlived its usefulness. Expect that your
benchmarks will evolve as your language&rsquo;s ecosystem does.</p>
<p>Remember, the ultimate goal is to make <em>user programs</em> faster, and benchmarks
are only a proxy for that.</p>
</aside>
<p>Benchmarking is a subtle art. Like tests, you need to balance not overfitting to
your implementation while ensuring that the benchmark does actually tickle the
code paths that you care about. When you measure performance, you need to
compensate for variance caused by CPU throttling, caching, and other weird
hardware and operating system quirks. I won&rsquo;t give you a whole sermon here,
but treat benchmarking as its own skill that improves with practice.</p>
<h3><a href="#profiling" id="profiling"><small>30&#8202;.&#8202;1&#8202;.&#8202;2</small>Profiling</a></h3>
<p>OK, so you&rsquo;ve got a few benchmarks now. You want to make them go faster. Now
what? First of all, let&rsquo;s assume you&rsquo;ve done all the obvious, easy work. You are
using the right algorithms and data structures<span class="em">&mdash;</span>or, at least, you aren&rsquo;t using
ones that are aggressively wrong. I don&rsquo;t consider using a hash table instead of
a linear search through a huge unsorted array &ldquo;optimization&rdquo; so much as &ldquo;good
software engineering&rdquo;.</p>
<p>Since the hardware is too complex to reason about our program&rsquo;s performance from
first principles, we have to go out into the field. That means <em>profiling</em>. A
<strong>profiler</strong>, if you&rsquo;ve never used one, is a tool that runs your <span
name="program">program</span> and tracks hardware resource use as the code
executes. Simple ones show you how much time was spent in each function in your
program. Sophisticated ones log data cache misses, instruction cache misses,
branch mispredictions, memory allocations, and all sorts of other metrics.</p>
<aside name="program">
<p>&ldquo;Your program&rdquo; here means the Lox VM itself running some <em>other</em> Lox program. We
are trying to optimize clox, not the user&rsquo;s Lox script. Of course, the choice of
which Lox program to load into our VM will highly affect which parts of clox get
stressed, which is why benchmarks are so important.</p>
<p>A profiler <em>won&rsquo;t</em> show us how much time is spent in each <em>Lox</em> function in the
script being run. We&rsquo;d have to write our own &ldquo;Lox profiler&rdquo; to do that, which is
slightly out of scope for this book.</p>
</aside>
<p>There are many profilers out there for various operating systems and languages.
On whatever platform you program, it&rsquo;s worth getting familiar with a decent
profiler. You don&rsquo;t need to be a master. I have learned things within minutes of
throwing a program at a profiler that would have taken me <em>days</em> to discover on
my own through trial and error. Profilers are wonderful, magical tools.</p>
<h2><a href="#faster-hash-table-probing" id="faster-hash-table-probing"><small>30&#8202;.&#8202;2</small>Faster Hash Table Probing</a></h2>
<p>Enough pontificating, let&rsquo;s get some performance charts going up and to the
right. The first optimization we&rsquo;ll do, it turns out, is about the <em>tiniest</em>
possible change we could make to our VM.</p>
<p>When I first got the bytecode virtual machine that clox is descended from
working, I did what any self-respecting VM hacker would do. I cobbled together a
couple of benchmarks, fired up a profiler, and ran those scripts through my
interpreter. In a dynamically typed language like Lox, a large fraction of user
code is field accesses and method calls, so one of my benchmarks looked
something like this:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Zoo</span> {
  <span class="i">init</span>() {
    <span class="k">this</span>.<span class="i">aardvark</span> = <span class="n">1</span>;
    <span class="k">this</span>.<span class="i">baboon</span>   = <span class="n">1</span>;
    <span class="k">this</span>.<span class="i">cat</span>      = <span class="n">1</span>;
    <span class="k">this</span>.<span class="i">donkey</span>   = <span class="n">1</span>;
    <span class="k">this</span>.<span class="i">elephant</span> = <span class="n">1</span>;
    <span class="k">this</span>.<span class="i">fox</span>      = <span class="n">1</span>;
  }
  <span class="i">ant</span>()    { <span class="k">return</span> <span class="k">this</span>.<span class="i">aardvark</span>; }
  <span class="i">banana</span>() { <span class="k">return</span> <span class="k">this</span>.<span class="i">baboon</span>; }
  <span class="i">tuna</span>()   { <span class="k">return</span> <span class="k">this</span>.<span class="i">cat</span>; }
  <span class="i">hay</span>()    { <span class="k">return</span> <span class="k">this</span>.<span class="i">donkey</span>; }
  <span class="i">grass</span>()  { <span class="k">return</span> <span class="k">this</span>.<span class="i">elephant</span>; }
  <span class="i">mouse</span>()  { <span class="k">return</span> <span class="k">this</span>.<span class="i">fox</span>; }
}

<span class="k">var</span> <span class="i">zoo</span> = <span class="t">Zoo</span>();
<span class="k">var</span> <span class="i">sum</span> = <span class="n">0</span>;
<span class="k">var</span> <span class="i">start</span> = <span class="i">clock</span>();
<span class="k">while</span> (<span class="i">sum</span> &lt; <span class="n">100000000</span>) {
  <span class="i">sum</span> = <span class="i">sum</span> + <span class="i">zoo</span>.<span class="i">ant</span>()
            + <span class="i">zoo</span>.<span class="i">banana</span>()
            + <span class="i">zoo</span>.<span class="i">tuna</span>()
            + <span class="i">zoo</span>.<span class="i">hay</span>()
            + <span class="i">zoo</span>.<span class="i">grass</span>()
            + <span class="i">zoo</span>.<span class="i">mouse</span>();
}

<span class="k">print</span> <span class="i">clock</span>() - <span class="i">start</span>;
<span class="k">print</span> <span class="i">sum</span>;
</pre></div>
<aside name="sum" class="bottom">
<p>Another thing this benchmark is careful to do is <em>use</em> the result of the code it
executes. By calculating a rolling sum and printing the result, we ensure the VM
<em>must</em> execute all that Lox code. This is an important habit. Unlike our simple
Lox VM, many compilers do aggressive dead code elimination and are smart enough
to discard a computation whose result is never used.</p>
<p>Many a programming language hacker has been impressed by the blazing performance
of a VM on some benchmark, only to realize that it&rsquo;s because the compiler
optimized the entire benchmark program away to nothing.</p>
</aside>
<p>If you&rsquo;ve never seen a benchmark before, this might seem ludicrous. <em>What</em> is
going on here? The program itself doesn&rsquo;t intend to <span name="sum">do</span>
anything useful. What it does do is call a bunch of methods and access a bunch
of fields since those are the parts of the language we&rsquo;re interested in. Fields
and methods live in hash tables, so it takes care to populate at least a <span
name="more"><em>few</em></span> interesting keys in those tables. That is all wrapped
in a big loop to ensure our profiler has enough execution time to dig in and see
where the cycles are going.</p>
<aside name="more">
<p>If you really want to benchmark hash table performance, you should use many
tables of different sizes. The six keys we add to each table here aren&rsquo;t even
enough to get over our hash table&rsquo;s eight-element minimum threshold. But I
didn&rsquo;t want to throw an enormous benchmark script at you. Feel free to add more
critters and treats if you like.</p>
</aside>
<p>Before I tell you what my profiler showed me, spend a minute taking a few
guesses. Where in clox&rsquo;s codebase do you think the VM spent most of its time? Is
there any code we&rsquo;ve written in previous chapters that you suspect is
particularly slow?</p>
<p>Here&rsquo;s what I found: Naturally, the function with the greatest inclusive time is
<code>run()</code>. (<strong>Inclusive time</strong> means the total time spent in some function and all
other functions it calls<span class="em">&mdash;</span>the total time between when you enter the function
and when it returns.) Since <code>run()</code> is the main bytecode execution loop, it
drives everything.</p>
<p>Inside <code>run()</code>, there are small chunks of time sprinkled in various cases in the
bytecode switch for common instructions like <code>OP_POP</code>, <code>OP_RETURN</code>, and
<code>OP_ADD</code>. The big heavy instructions are <code>OP_GET_GLOBAL</code> with 17% of the
execution time, <code>OP_GET_PROPERTY</code> at 12%, and <code>OP_INVOKE</code> which takes a whopping
42% of the total running time.</p>
<p>So we&rsquo;ve got three hotspots to optimize? Actually, no. Because it turns out
those three instructions spend almost all of their time inside calls to the same
function: <code>tableGet()</code>. That function claims a whole 72% of the execution time
(again, inclusive). Now, in a dynamically typed language, we expect to spend a
fair bit of time looking stuff up in hash tables<span class="em">&mdash;</span>it&rsquo;s sort of the price of
dynamism. But, still, <em>wow.</em></p>
<h3><a href="#slow-key-wrapping" id="slow-key-wrapping"><small>30&#8202;.&#8202;2&#8202;.&#8202;1</small>Slow key wrapping</a></h3>
<p>If you take a look at <code>tableGet()</code>, you&rsquo;ll see it&rsquo;s mostly a wrapper around a
call to <code>findEntry()</code> where the actual hash table lookup happens. To refresh
your memory, here it is in full:</p>
<div class="codehilite"><pre><span class="k">static</span> <span class="t">Entry</span>* <span class="i">findEntry</span>(<span class="t">Entry</span>* <span class="i">entries</span>, <span class="t">int</span> <span class="i">capacity</span>,
                        <span class="t">ObjString</span>* <span class="i">key</span>) {
  <span class="t">uint32_t</span> <span class="i">index</span> = <span class="i">key</span>-&gt;<span class="i">hash</span> % <span class="i">capacity</span>;
  <span class="t">Entry</span>* <span class="i">tombstone</span> = <span class="a">NULL</span>;

  <span class="k">for</span> (;;) {
    <span class="t">Entry</span>* <span class="i">entry</span> = &amp;<span class="i">entries</span>[<span class="i">index</span>];
    <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="a">NULL</span>) {
      <span class="k">if</span> (<span class="a">IS_NIL</span>(<span class="i">entry</span>-&gt;<span class="i">value</span>)) {
        <span class="c">// Empty entry.</span>
        <span class="k">return</span> <span class="i">tombstone</span> != <span class="a">NULL</span> ? <span class="i">tombstone</span> : <span class="i">entry</span>;
      } <span class="k">else</span> {
        <span class="c">// We found a tombstone.</span>
        <span class="k">if</span> (<span class="i">tombstone</span> == <span class="a">NULL</span>) <span class="i">tombstone</span> = <span class="i">entry</span>;
      }
    } <span class="k">else</span> <span class="k">if</span> (<span class="i">entry</span>-&gt;<span class="i">key</span> == <span class="i">key</span>) {
      <span class="c">// We found the key.</span>
      <span class="k">return</span> <span class="i">entry</span>;
    }

    <span class="i">index</span> = (<span class="i">index</span> + <span class="n">1</span>) % <span class="i">capacity</span>;
  }
}
</pre></div>
<p>When running that previous benchmark<span class="em">&mdash;</span>on my machine, at least<span class="em">&mdash;</span>the VM spends
70% of the total execution time on <em>one line</em> in this function. Any guesses as
to which one? No? It&rsquo;s this:</p>
<div class="codehilite"><pre>  <span class="t">uint32_t</span> <span class="i">index</span> = <span class="i">key</span>-&gt;<span class="i">hash</span> % <span class="i">capacity</span>;
</pre></div>
<p>That pointer dereference isn&rsquo;t the problem. It&rsquo;s the little <code>%</code>. It turns out
the modulo operator is <em>really</em> slow. Much slower than other <span
name="division">arithmetic</span> operators. Can we do something better?</p>
<aside name="division">
<p>Pipelining makes it hard to talk about the performance of an individual CPU
instruction, but to give you a feel for things, division and modulo are about
30-50 <em>times</em> slower than addition and subtraction on x86.</p>
</aside>
<p>In the general case, it&rsquo;s really hard to re-implement a fundamental arithmetic
operator in user code in a way that&rsquo;s faster than what the CPU itself can do.
After all, our C code ultimately compiles down to the CPU&rsquo;s own arithmetic
operations. If there were tricks we could use to go faster, the chip would
already be using them.</p>
<p>However, we can take advantage of the fact that we know more about our problem
than the CPU does. We use modulo here to take a key string&rsquo;s hash code and
wrap it to fit within the bounds of the table&rsquo;s entry array. That array starts
out at eight elements and grows by a factor of two each time. We know<span class="em">&mdash;</span>and the
CPU and C compiler do not<span class="em">&mdash;</span>that our table&rsquo;s size is always a power of two.</p>
<p>Because we&rsquo;re clever bit twiddlers, we know a faster way to calculate the
remainder of a number modulo a power of two: <strong>bit masking</strong>. Let&rsquo;s say we want
to calculate 229 modulo 64. The answer is 37, which is not particularly apparent
in decimal, but is clearer when you view those numbers in binary:</p><img src="image/optimization/mask.png" alt="The bit patterns resulting from 229 % 64 = 37 and 229 &amp; 63 = 37." />
<p>On the left side of the illustration, notice how the result (37) is simply the
dividend (229) with the highest two bits shaved off? Those two highest bits are
the bits at or to the left of the divisor&rsquo;s single 1 bit.</p>
<p>On the right side, we get the same result by taking 229 and bitwise <span
class="small-caps">AND</span>-ing it with 63, which is one less than our
original power of two divisor. Subtracting one from a power of two gives you a
series of 1 bits. That is exactly the mask we need in order to strip out those
two leftmost bits.</p>
<p>In other words, you can calculate a number modulo any power of two by simply
<span class="small-caps">AND</span>-ing it with that power of two minus one. I&rsquo;m
not enough of a mathematician to <em>prove</em> to you that this works, but if you
think it through, it should make sense. We can replace that slow modulo operator
with a very fast decrement and bitwise <span class="small-caps">AND</span>. We
simply change the offending line of code to this:</p>
<div class="codehilite"><pre class="insert-before">static Entry* findEntry(Entry* entries, int capacity,
                        ObjString* key) {
</pre><div class="source-file"><em>table.c</em><br>
in <em>findEntry</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">uint32_t</span> <span class="i">index</span> = <span class="i">key</span>-&gt;<span class="i">hash</span> &amp; (<span class="i">capacity</span> - <span class="n">1</span>);
</pre><pre class="insert-after">  Entry* tombstone = NULL;
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>findEntry</em>(), replace 1 line</div>

<p>CPUs love bitwise operators, so it&rsquo;s hard to <span name="sub">improve</span> on that. </p>
<aside name="sub">
<p>Another potential improvement is to eliminate the decrement by storing the bit
mask directly instead of the capacity. In my tests, that didn&rsquo;t make a
difference. Instruction pipelining makes some operations essentially free if the
CPU is bottlenecked elsewhere.</p>
</aside>
<p>Our linear probing search may need to wrap around the end of the array, so there
is another modulo in <code>findEntry()</code> to update.</p>
<div class="codehilite"><pre class="insert-before">      // We found the key.
      return entry;
    }

</pre><div class="source-file"><em>table.c</em><br>
in <em>findEntry</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">index</span> = (<span class="i">index</span> + <span class="n">1</span>) &amp; (<span class="i">capacity</span> - <span class="n">1</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>findEntry</em>(), replace 1 line</div>

<p>This line didn&rsquo;t show up in the profiler since most searches don&rsquo;t wrap.</p>
<p>The <code>findEntry()</code> function has a sister function, <code>tableFindString()</code> that does
a hash table lookup for interning strings. We may as well apply the same
optimizations there too. This function is called only when interning strings,
which wasn&rsquo;t heavily stressed by our benchmark. But a Lox program that created
lots of strings might noticeably benefit from this change.</p>
<div class="codehilite"><pre class="insert-before">  if (table-&gt;count == 0) return NULL;

</pre><div class="source-file"><em>table.c</em><br>
in <em>tableFindString</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="t">uint32_t</span> <span class="i">index</span> = <span class="i">hash</span> &amp; (<span class="i">table</span>-&gt;<span class="i">capacity</span> - <span class="n">1</span>);
</pre><pre class="insert-after">  for (;;) {
    Entry* entry = &amp;table-&gt;entries[index];
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>tableFindString</em>(), replace 1 line</div>

<p>And also when the linear probing wraps around.</p>
<div class="codehilite"><pre class="insert-before">      return entry-&gt;key;
    }

</pre><div class="source-file"><em>table.c</em><br>
in <em>tableFindString</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">index</span> = (<span class="i">index</span> + <span class="n">1</span>) &amp; (<span class="i">table</span>-&gt;<span class="i">capacity</span> - <span class="n">1</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>table.c</em>, in <em>tableFindString</em>(), replace 1 line</div>

<p>Let&rsquo;s see if our fixes were worth it. I tweaked that zoological benchmark to
count how many <span name="batch">batches</span> of 10,000 calls it can run in
ten seconds. More batches equals faster performance. On my machine using the
unoptimized code, the benchmark gets through 3,192 batches. After this
optimization, that jumps to 6,249.</p><img src="image/optimization/hash-chart.png" alt="Bar chart comparing the performance before and after the optimization." />
<p>That&rsquo;s almost exactly twice as much work in the same amount of time. We made the
VM twice as fast (usual caveat: on this benchmark). That is a massive win when
it comes to optimization. Usually you feel good if you can claw a few percentage
points here or there. Since methods, fields, and global variables are so
prevalent in Lox programs, this tiny optimization improves performance across
the board. Almost every Lox program benefits.</p>
<aside name="batch">
<p>Our original benchmark fixed the amount of <em>work</em> and then measured the <em>time</em>.
Changing the script to count how many batches of calls it can do in ten seconds
fixes the time and measures the work. For performance comparisons, I like the
latter measure because the reported number represents <em>speed</em>. You can directly
compare the numbers before and after an optimization. When measuring execution
time, you have to do a little arithmetic to get to a good relative measure of
performance.</p>
</aside>
<p>Now, the point of this section is <em>not</em> that the modulo operator is profoundly
evil and you should stamp it out of every program you ever write. Nor is it that
micro-optimization is a vital engineering skill. It&rsquo;s rare that a performance
problem has such a narrow, effective solution. We got lucky.</p>
<p>The point is that we didn&rsquo;t <em>know</em> that the modulo operator was a performance
drain until our profiler told us so. If we had wandered around our VM&rsquo;s codebase
blindly guessing at hotspots, we likely wouldn&rsquo;t have noticed it. What I want
you to take away from this is how important it is to have a profiler in your
toolbox.</p>
<p>To reinforce that point, let&rsquo;s go ahead and run the original benchmark in our
now-optimized VM and see what the profiler shows us. On my machine, <code>tableGet()</code>
is still a fairly large chunk of execution time. That&rsquo;s to be expected for a
dynamically typed language. But it has dropped from 72% of the total execution
time down to 35%. That&rsquo;s much more in line with what we&rsquo;d like to see and shows
that our optimization didn&rsquo;t just make the program faster, but made it faster
<em>in the way we expected</em>. Profilers are as useful for verifying solutions as
they are for discovering problems.</p>
<h2><a href="#nan-boxing" id="nan-boxing"><small>30&#8202;.&#8202;3</small>NaN Boxing</a></h2>
<p>This next optimization has a very different feel. Thankfully, despite the odd
name, it does not involve punching your grandmother. It&rsquo;s different, but not,
like, <em>that</em> different. With our previous optimization, the profiler told us
where the problem was, and we merely had to use some ingenuity to come up with a
solution.</p>
<p>This optimization is more subtle, and its performance effects more scattered
across the virtual machine. The profiler won&rsquo;t help us come up with this.
Instead, it was invented by <span name="someone">someone</span> thinking deeply
about the lowest levels of machine architecture.</p>
<aside name="someone">
<p>I&rsquo;m not sure who first came up with this trick. The earliest source I can find
is David Gudeman&rsquo;s 1993 paper &ldquo;Representing Type Information in Dynamically
Typed Languages&rdquo;. Everyone else cites that. But Gudeman himself says the paper
isn&rsquo;t novel work, but instead &ldquo;gathers together a body of folklore&rdquo;.</p>
<p>Maybe the inventor has been lost to the mists of time, or maybe it&rsquo;s been
reinvented a number of times. Anyone who ruminates on IEEE 754 long enough
probably starts thinking about trying to stuff something useful into all those
unused NaN bits.</p>
</aside>
<p>Like the heading says, this optimization is called <strong>NaN boxing</strong> or sometimes
<strong>NaN tagging</strong>. Personally I like the latter name because &ldquo;boxing&rdquo; tends to imply
some kind of heap-allocated representation, but the former seems to be the more
widely used term. This technique changes how we represent values in the VM.</p>
<p>On a 64-bit machine, our Value type takes up 16 bytes. The struct has two
fields, a type tag and a union for the payload. The largest fields in the union
are an Obj pointer and a double, which are both 8 bytes. To keep the union field
aligned to an 8-byte boundary, the compiler adds padding after the tag too:</p><img src="image/optimization/union.png" alt="Byte layout of the 16-byte tagged union Value." />
<p>That&rsquo;s pretty big. If we could cut that down, then the VM could pack more values
into the same amount of memory. Most computers have plenty of RAM these days, so
the direct memory savings aren&rsquo;t a huge deal. But a smaller representation means
more Values fit in a cache line. That means fewer cache misses, which affects
<em>speed</em>.</p>
<p>If Values need to be aligned to their largest payload size, and a Lox number or
Obj pointer needs a full 8 bytes, how can we get any smaller? In a dynamically
typed language like Lox, each value needs to carry not just its payload, but
enough additional information to determine the value&rsquo;s type at runtime. If a Lox
number is already using the full 8 bytes, where could we squirrel away a couple
of extra bits to tell the runtime &ldquo;this is a number&rdquo;?</p>
<p>This is one of the perennial problems for dynamic language hackers. It
particularly bugs them because statically typed languages don&rsquo;t generally have
this problem. The type of each value is known at compile time, so no extra
memory is needed at runtime to track it. When your C compiler compiles a 32-bit
int, the resulting variable gets <em>exactly</em> 32 bits of storage.</p>
<p>Dynamic language folks hate losing ground to the static camp, so they&rsquo;ve come up
with a number of very clever ways to pack type information and a payload into a
small number of bits. NaN boxing is one of those. It&rsquo;s a particularly good fit
for languages like JavaScript and Lua, where all numbers are double-precision
floating point. Lox is in that same boat.</p>
<h3><a href="#what-is-and-is-not-a-number" id="what-is-and-is-not-a-number"><small>30&#8202;.&#8202;3&#8202;.&#8202;1</small>What is (and is not) a number?</a></h3>
<p>Before we start optimizing, we need to really understand how our friend the CPU
represents floating-point numbers. Almost all machines today use the same
scheme, encoded in the venerable scroll <a href="https://en.wikipedia.org/wiki/IEEE_754">IEEE 754</a>, known to mortals as the
&ldquo;IEEE Standard for Floating-Point Arithmetic&rdquo;.</p>
<p>In the eyes of your computer, a <span name="hyphen">64-bit</span>,
double-precision, IEEE floating-point number looks like this:</p>
<aside name="hyphen">
<p>That&rsquo;s a lot of hyphens for one sentence.</p>
</aside><img src="image/optimization/double.png" alt="Bit representation of an IEEE 754 double." />
<ul>
<li>
<p>Starting from the right, the first 52 bits are the <strong>fraction</strong>,
<strong>mantissa</strong>, or <strong>significand</strong> bits. They represent the significant digits
of the number, as a binary integer.</p>
</li>
<li>
<p>Next to that are 11 <strong>exponent</strong> bits. These tell you how far the mantissa
is shifted away from the decimal (well, binary) point.</p>
</li>
<li>
<p>The highest bit is the <span name="sign"><strong>sign bit</strong></span>, which
indicates whether the number is positive or negative.</p>
</li>
</ul>
<p>I know that&rsquo;s a little vague, but this chapter isn&rsquo;t a deep dive on
floating point representation. If you want to know how the exponent and mantissa
play together, there are already better explanations out there than I could
write.</p>
<aside name="sign">
<p>Since the sign bit is always present, even if the number is zero, that implies
that &ldquo;positive zero&rdquo; and &ldquo;negative zero&rdquo; have different bit representations, and
indeed, IEEE 754 does distinguish those.</p>
</aside>
<p>The important part for our purposes is that the spec carves out a special case
exponent. When all of the exponent bits are set, then instead of just
representing a really big number, the value has a different meaning. These
values are &ldquo;Not a Number&rdquo; (hence, <strong>NaN</strong>) values. They represent concepts like
infinity or the result of division by zero.</p>
<p><em>Any</em> double whose exponent bits are all set is a NaN, regardless of the
mantissa bits. That means there&rsquo;s lots and lots of <em>different</em> NaN bit patterns.
IEEE 754 divides those into two categories. Values where the highest mantissa
bit is 0 are called <strong>signalling NaNs</strong>, and the others are <strong>quiet NaNs</strong>.
Signalling NaNs are intended to be the result of erroneous computations, like
division by zero. A chip <span name="abort">may</span> detect when one of these
values is produced and abort a program completely. They may self-destruct if you
try to read one.</p>
<aside name="abort">
<p>I don&rsquo;t know if any CPUs actually <em>do</em> trap signalling NaNs and abort. The spec
just says they <em>could</em>.</p>
</aside>
<p>Quiet NaNs are supposed to be safer to use. They don&rsquo;t represent useful numeric
values, but they should at least not set your hand on fire if you touch them.</p>
<p>Every double with all of its exponent bits set and its highest mantissa bit set
is a quiet NaN. That leaves 52 bits unaccounted for. We&rsquo;ll avoid one of those so
that we don&rsquo;t step on Intel&rsquo;s &ldquo;QNaN Floating-Point Indefinite&rdquo; value, leaving us
51 bits. Those remaining bits can be anything. We&rsquo;re talking
2,251,799,813,685,248 unique quiet NaN bit patterns.</p><img src="image/optimization/nan.png" alt="The bits in a double that make it a quiet NaN." />
<p>This means a 64-bit double has enough room to store all of the various different
numeric floating-point values and <em>also</em> has room for another 51 bits of data
that we can use however we want. That&rsquo;s plenty of room to set aside a couple of
bit patterns to represent Lox&rsquo;s <code>nil</code>, <code>true</code>, and <code>false</code> values. But what
about Obj pointers? Don&rsquo;t pointers need a full 64 bits too?</p>
<p>Fortunately, we have another trick up our other sleeve. Yes, technically
pointers on a 64-bit architecture are 64 bits. But, no architecture I know of
actually uses that entire address space. Instead, most widely used chips today
only ever use the low <span name="48">48</span> bits. The remaining 16 bits are
either unspecified or always zero.</p>
<aside name="48">
<p>48 bits is enough to address 262,144 gigabytes of memory. Modern operating
systems also give each process its own address space, so that should be plenty.</p>
</aside>
<p>If we&rsquo;ve got 51 bits, we can stuff a 48-bit pointer in there with three bits to
spare. Those three bits are just enough to store tiny type tags to distinguish
between <code>nil</code>, Booleans, and Obj pointers.</p>
<p>That&rsquo;s NaN boxing. Within a single 64-bit double, you can store all of the
different floating-point numeric values, a pointer, or any of a couple of other
special sentinel values. Half the memory usage of our current Value struct,
while retaining all of the fidelity.</p>
<p>What&rsquo;s particularly nice about this representation is that there is no need to
<em>convert</em> a numeric double value into a &ldquo;boxed&rdquo; form. Lox numbers <em>are</em> just
normal, 64-bit doubles. We still need to <em>check</em> their type before we use them,
since Lox is dynamically typed, but we don&rsquo;t need to do any bit shifting or
pointer indirection to go from &ldquo;value&rdquo; to &ldquo;number&rdquo;.</p>
<p>For the other value types, there is a conversion step, of course. But,
fortunately, our VM hides all of the mechanism to go from values to raw types
behind a handful of macros. Rewrite those to implement NaN boxing, and the rest
of the VM should just work.</p>
<h3><a href="#conditional-support" id="conditional-support"><small>30&#8202;.&#8202;3&#8202;.&#8202;2</small>Conditional support</a></h3>
<p>I know the details of this new representation aren&rsquo;t clear in your head yet.
Don&rsquo;t worry, they will crystallize as we work through the implementation. Before
we get to that, we&rsquo;re going to put some compile-time scaffolding in place.</p>
<p>For our previous optimization, we rewrote the previous slow code and called it
done. This one is a little different. NaN boxing relies on some very low-level
details of how a chip represents floating-point numbers and pointers. It
<em>probably</em> works on most CPUs you&rsquo;re likely to encounter, but you can never be
totally sure.</p>
<p>It would suck if our VM completely lost support for an architecture just because
of its value representation. To avoid that, we&rsquo;ll maintain support for <em>both</em>
the old tagged union implementation of Value and the new NaN-boxed form. We
select which representation we want at compile time using this flag:</p>
<div class="codehilite"><pre class="insert-before">#include &lt;stdint.h&gt;

</pre><div class="source-file"><em>common.h</em></div>
<pre class="insert"><span class="a">#define NAN_BOXING</span>
</pre><pre class="insert-after">#define DEBUG_PRINT_CODE
</pre></div>
<div class="source-file-narrow"><em>common.h</em></div>

<p>If that&rsquo;s defined, the VM uses the new form. Otherwise, it reverts to the old
style. The few pieces of code that care about the details of the value
representation<span class="em">&mdash;</span>mainly the handful of macros for wrapping and unwrapping
Values<span class="em">&mdash;</span>vary based on whether this flag is set. The rest of the VM can
continue along its merry way.</p>
<p>Most of the work happens in the &ldquo;value&rdquo; module where we add a section for the
new type.</p>
<div class="codehilite"><pre class="insert-before">typedef struct ObjString ObjString;

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#ifdef NAN_BOXING</span>

<span class="k">typedef</span> <span class="t">uint64_t</span> <span class="t">Value</span>;

<span class="a">#else</span>

</pre><pre class="insert-after">typedef enum {
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>When NaN boxing is enabled, the actual type of a Value is a flat, unsigned
64-bit integer. We could use double instead, which would make the macros for
dealing with Lox numbers a little simpler. But all of the other macros need to
do bitwise operations and uint64_t is a much friendlier type for that. Outside
of this module, the rest of the VM doesn&rsquo;t really care one way or the other.</p>
<p>Before we start re-implementing those macros, we close the <code>#else</code> branch of the
<code>#ifdef</code> at the end of the definitions for the old representation.</p>
<div class="codehilite"><pre class="insert-before">#define OBJ_VAL(object)   ((Value){VAL_OBJ, {.obj = (Obj*)object}})
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="a">#endif</span>
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>Our remaining task is simply to fill in that first <code>#ifdef</code> section with new
implementations of all the stuff already in the <code>#else</code> side. We&rsquo;ll work through
it one value type at a time, from easiest to hardest.</p>
<h3><a href="#numbers" id="numbers"><small>30&#8202;.&#8202;3&#8202;.&#8202;3</small>Numbers</a></h3>
<p>We&rsquo;ll start with numbers since they have the most direct representation under
NaN boxing. To &ldquo;convert&rdquo; a C double to a NaN-boxed clox Value, we don&rsquo;t need to
touch a single bit<span class="em">&mdash;</span>the representation is exactly the same. But we do need to
convince our C compiler of that fact, which we made harder by defining Value to
be uint64_t.</p>
<p>We need to get the compiler to take a set of bits that it thinks are a double
and use those same bits as a uint64_t, or vice versa. This is called <strong>type
punning</strong>. C and C++ programmers have been doing this since the days of bell
bottoms and 8-tracks, but the language specifications have <span
name="hesitate">hesitated</span> to say which of the many ways to do this is
officially sanctioned.</p>
<aside name="hesitate" class="bottom">
<p>Spec authors don&rsquo;t like type punning because it makes optimization harder. A key
optimization technique is reordering instructions to fill the CPU&rsquo;s execution
pipelines. A compiler can reorder code only when doing so doesn&rsquo;t have a
user-visible effect, obviously.</p>
<p>Pointers make that harder. If two pointers point to the same value, then a write
through one and a read through the other cannot be reordered. But what about two
pointers of <em>different</em> types? If those could point to the same object, then
basically <em>any</em> two pointers could be aliases to the same value. That
drastically limits the amount of code the compiler is free to rearrange.</p>
<p>To avoid that, compilers want to assume <strong>strict aliasing</strong><span class="em">&mdash;</span>pointers of
incompatible types cannot point to the same value. Type punning, by nature,
breaks that assumption.</p>
</aside>
<p>I know one way to convert a <code>double</code> to <code>Value</code> and back that I believe is
supported by both the C and C++ specs. Unfortunately, it doesn&rsquo;t fit in a single
expression, so the conversion macros have to call out to helper functions.
Here&rsquo;s the first macro:</p>
<div class="codehilite"><pre class="insert-before">typedef uint64_t Value;
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="a">#define NUMBER_VAL(num) numToValue(num)</span>
</pre><pre class="insert-after">

#else
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>That macro passes the double here:</p>
<div class="codehilite"><pre class="insert-before">#define NUMBER_VAL(num) numToValue(num)
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="k">static</span> <span class="k">inline</span> <span class="t">Value</span> <span class="i">numToValue</span>(<span class="t">double</span> <span class="i">num</span>) {
  <span class="t">Value</span> <span class="i">value</span>;
  <span class="i">memcpy</span>(&amp;<span class="i">value</span>, &amp;<span class="i">num</span>, <span class="k">sizeof</span>(<span class="t">double</span>));
  <span class="k">return</span> <span class="i">value</span>;
}
</pre><pre class="insert-after">

#else
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>I know, weird, right? The way to treat a series of bytes as having a different
type without changing their value at all is <code>memcpy()</code>? This looks horrendously
slow: Create a local variable. Pass its address to the operating system through
a syscall to copy a few bytes. Then return the result, which is the exact same
bytes as the input. Thankfully, because this <em>is</em> the supported idiom for type
punning, most compilers recognize the pattern and optimize away the <code>memcpy()</code>
entirely.</p>
<p>&ldquo;Unwrapping&rdquo; a Lox number is the mirror image.</p>
<div class="codehilite"><pre class="insert-before">typedef uint64_t Value;
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="a">#define AS_NUMBER(value)    valueToNum(value)</span>
</pre><pre class="insert-after">

#define NUMBER_VAL(num) numToValue(num)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>That macro calls this function:</p>
<div class="codehilite"><pre class="insert-before">#define NUMBER_VAL(num) numToValue(num)
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="k">static</span> <span class="k">inline</span> <span class="t">double</span> <span class="i">valueToNum</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="t">double</span> <span class="i">num</span>;
  <span class="i">memcpy</span>(&amp;<span class="i">num</span>, &amp;<span class="i">value</span>, <span class="k">sizeof</span>(<span class="t">Value</span>));
  <span class="k">return</span> <span class="i">num</span>;
}
</pre><pre class="insert-after">

static inline Value numToValue(double num) {
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>It works exactly the same except we swap the types. Again, the compiler will
eliminate all of it. Even though those calls to
<code>memcpy()</code> will disappear, we still need to show the compiler <em>which</em> <code>memcpy()</code>
we&rsquo;re calling so we also need an <span name="union">include</span>.</p>
<aside name="union" class="bottom">
<p>If you find yourself with a compiler that does not optimize the <code>memcpy()</code> away,
try this instead:</p>
<div class="codehilite"><pre><span class="t">double</span> <span class="i">valueToNum</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="k">union</span> {
    <span class="t">uint64_t</span> <span class="i">bits</span>;
    <span class="t">double</span> <span class="i">num</span>;
  } <span class="i">data</span>;
  <span class="i">data</span>.<span class="i">bits</span> = <span class="i">value</span>;
  <span class="k">return</span> <span class="i">data</span>.<span class="i">num</span>;
}
</pre></div>
</aside>
<div class="codehilite"><pre class="insert-before">#define clox_value_h
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="a">#include &lt;string.h&gt;</span>
</pre><pre class="insert-after">

#include &quot;common.h&quot;
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>That was a lot of code to ultimately do nothing but silence the C type checker.
Doing a runtime type <em>test</em> on a Lox number is a little more interesting. If all
we have are exactly the bits for a double, how do we tell that it <em>is</em> a double?
It&rsquo;s time to get bit twiddling.</p>
<div class="codehilite"><pre class="insert-before">typedef uint64_t Value;
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="a">#define IS_NUMBER(value)    (((value) &amp; QNAN) != QNAN)</span>
</pre><pre class="insert-after">

#define AS_NUMBER(value)    valueToNum(value)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>We know that every Value that is <em>not</em> a number will use a special quiet NaN
representation. And we presume we have correctly avoided any of the meaningful
NaN representations that may actually be produced by doing arithmetic on
numbers.</p>
<p>If the double has all of its NaN bits set, and the quiet NaN bit set, and one
more for good measure, we can be <span name="certain">pretty certain</span> it
is one of the bit patterns we ourselves have set aside for other types. To check
that, we mask out all of the bits except for our set of quiet NaN bits. If <em>all</em>
of those bits are set, it must be a NaN-boxed value of some other Lox type.
Otherwise, it is actually a number.</p>
<aside name="certain">
<p>Pretty certain, but not strictly guaranteed. As far as I know, there is nothing
preventing a CPU from producing a NaN value as the result of some operation
whose bit representation collides with ones we have claimed. But in my tests
across a number of architectures, I haven&rsquo;t seen it happen.</p>
</aside>
<p>The set of quiet NaN bits are declared like this:</p>
<div class="codehilite"><pre class="insert-before">#ifdef NAN_BOXING
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="a">#define QNAN     ((uint64_t)0x7ffc000000000000)</span>
</pre><pre class="insert-after">

typedef uint64_t Value;
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>It would be nice if C supported binary literals. But if you do the conversion,
you&rsquo;ll see that value is the same as this:</p><img src="image/optimization/qnan.png" alt="The quiet NaN bits." />
<p>This is exactly all of the exponent bits, plus the quiet NaN bit, plus one extra
to dodge that Intel value.</p>
<h3><a href="#nil-true-and-false" id="nil-true-and-false"><small>30&#8202;.&#8202;3&#8202;.&#8202;4</small>Nil, true, and false</a></h3>
<p>The next type to handle is <code>nil</code>. That&rsquo;s pretty simple since there&rsquo;s only one
<code>nil</code> value and thus we need only a single bit pattern to represent it. There
are two other singleton values, the two Booleans, <code>true</code> and <code>false</code>. This calls
for three total unique bit patterns.</p>
<p>Two bits give us four different combinations, which is plenty. We claim the two
lowest bits of our unused mantissa space as a &ldquo;type tag&rdquo; to determine which of
these three singleton values we&rsquo;re looking at. The three type tags are defined
like so:</p>
<div class="codehilite"><pre class="insert-before">#define QNAN     ((uint64_t)0x7ffc000000000000)
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert">

<span class="a">#define TAG_NIL   1 </span><span class="c">// 01.</span>
<span class="a">#define TAG_FALSE 2 </span><span class="c">// 10.</span>
<span class="a">#define TAG_TRUE  3 </span><span class="c">// 11.</span>
</pre><pre class="insert-after">

typedef uint64_t Value;
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>Our representation of <code>nil</code> is thus all of the bits required to define our
quiet NaN representation along with the <code>nil</code> type tag bits:</p><img src="image/optimization/nil.png" alt="The bit representation of the nil value." />
<p>In code, we check the bits like so:</p>
<div class="codehilite"><pre class="insert-before">#define AS_NUMBER(value)    valueToNum(value)

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define NIL_VAL         ((Value)(uint64_t)(QNAN | TAG_NIL))</span>
</pre><pre class="insert-after">#define NUMBER_VAL(num) numToValue(num)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>We simply bitwise <span class="small-caps">OR</span> the quiet NaN bits and the
type tag, and then do a little cast dance to teach the C compiler what we want
those bits to mean.</p>
<p>Since <code>nil</code> has only a single bit representation, we can use equality on
uint64_t to see if a Value is <code>nil</code>.</p>
<p><span name="equal"></span></p>
<div class="codehilite"><pre class="insert-before">typedef uint64_t Value;

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define IS_NIL(value)       ((value) == NIL_VAL)</span>
</pre><pre class="insert-after">#define IS_NUMBER(value)    (((value) &amp; QNAN) != QNAN)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>You can guess how we define the <code>true</code> and <code>false</code> values.</p>
<div class="codehilite"><pre class="insert-before">#define AS_NUMBER(value)    valueToNum(value)

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define FALSE_VAL       ((Value)(uint64_t)(QNAN | TAG_FALSE))</span>
<span class="a">#define TRUE_VAL        ((Value)(uint64_t)(QNAN | TAG_TRUE))</span>
</pre><pre class="insert-after">#define NIL_VAL         ((Value)(uint64_t)(QNAN | TAG_NIL))
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>The bits look like this:</p><img src="image/optimization/bools.png" alt="The bit representation of the true and false values." />
<p>To convert a C bool into a Lox Boolean, we rely on these two singleton values
and the good old conditional operator.</p>
<div class="codehilite"><pre class="insert-before">#define AS_NUMBER(value)    valueToNum(value)

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define BOOL_VAL(b)     ((b) ? TRUE_VAL : FALSE_VAL)</span>
</pre><pre class="insert-after">#define FALSE_VAL       ((Value)(uint64_t)(QNAN | TAG_FALSE))
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>There&rsquo;s probably a cleverer bitwise way to do this, but my hunch is that the
compiler can figure one out faster than I can. Going the other direction is
simpler.</p>
<div class="codehilite"><pre class="insert-before">#define IS_NUMBER(value)    (((value) &amp; QNAN) != QNAN)

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define AS_BOOL(value)      ((value) == TRUE_VAL)</span>
</pre><pre class="insert-after">#define AS_NUMBER(value)    valueToNum(value)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>Since we know there are exactly two Boolean bit representations in Lox<span class="em">&mdash;</span>unlike
in C where any non-zero value can be considered &ldquo;true&rdquo;<span class="em">&mdash;</span>if it ain&rsquo;t <code>true</code>, it
must be <code>false</code>. This macro does assume you call it only on a Value that you
know <em>is</em> a Lox Boolean. To check that, there&rsquo;s one more macro.</p>
<div class="codehilite"><pre class="insert-before">typedef uint64_t Value;

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define IS_BOOL(value)      (((value) | 1) == TRUE_VAL)</span>
</pre><pre class="insert-after">#define IS_NIL(value)       ((value) == NIL_VAL)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>That looks a little strange. A more obvious macro would look like this:</p>
<div class="codehilite"><pre><span class="a">#define IS_BOOL(v) ((v) == TRUE_VAL || (v) == FALSE_VAL)</span>
</pre></div>
<p>Unfortunately, that&rsquo;s not safe. The expansion mentions <code>v</code> twice, which means if
that expression has any side effects, they will be executed twice. We could have
the macro call out to a separate function, but, ugh, what a chore.</p>
<p>Instead, we bitwise <span class="small-caps">OR</span> a 1 onto the value to
merge the only two valid Boolean bit patterns. That leaves three potential
states the value can be in:</p>
<ol>
<li>
<p>It was <code>FALSE_VAL</code> and has now been converted to <code>TRUE_VAL</code>.</p>
</li>
<li>
<p>It was <code>TRUE_VAL</code> and the <code>| 1</code> did nothing and it&rsquo;s still <code>TRUE_VAL</code>.</p>
</li>
<li>
<p>It&rsquo;s some other, non-Boolean value.</p>
</li>
</ol>
<p>At that point, we can simply compare the result to <code>TRUE_VAL</code> to see if we&rsquo;re
in the first two states or the third.</p>
<h3><a href="#objects" id="objects"><small>30&#8202;.&#8202;3&#8202;.&#8202;5</small>Objects</a></h3>
<p>The last value type is the hardest. Unlike the singleton values, there are
billions of different pointer values we need to box inside a NaN. This means we
need both some kind of tag to indicate that these particular NaNs <em>are</em> Obj
pointers, and room for the addresses themselves.</p>
<p>The tag bits we used for the singleton values are in the region where I decided
to store the pointer itself, so we can&rsquo;t easily use a different <span
name="ptr">bit</span> there to indicate that the value is an object reference.
However, there is another bit we aren&rsquo;t using. Since all our NaN values are not
numbers<span class="em">&mdash;</span>it&rsquo;s right there in the name<span class="em">&mdash;</span>the sign bit isn&rsquo;t used for anything.
We&rsquo;ll go ahead and use that as the type tag for objects. If one of our quiet
NaNs has its sign bit set, then it&rsquo;s an Obj pointer. Otherwise, it must be one
of the previous singleton values.</p>
<aside name="ptr">
<p>We actually <em>could</em> use the lowest bits to store the type tag even when the
value is an Obj pointer. That&rsquo;s because Obj pointers are always aligned to an
8-byte boundary since Obj contains a 64-bit field. That, in turn, implies that
the three lowest bits of an Obj pointer will always be zero. We could store
whatever we wanted in there and just mask it off before dereferencing the
pointer.</p>
<p>This is another value representation optimization called <strong>pointer tagging</strong>.</p>
</aside>
<p>If the sign bit is set, then the remaining low bits store the pointer to the
Obj:</p><img src="image/optimization/obj.png" alt="Bit representation of an Obj* stored in a Value." />
<p>To convert a raw Obj pointer to a Value, we take the pointer and set all of the
quiet NaN bits and the sign bit.</p>
<div class="codehilite"><pre class="insert-before">#define NUMBER_VAL(num) numToValue(num)
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define OBJ_VAL(obj) \</span>
<span class="a">    (Value)(SIGN_BIT | QNAN | (uint64_t)(uintptr_t)(obj))</span>
</pre><pre class="insert-after">

static inline double valueToNum(Value value) {
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>The pointer itself is a full 64 bits, and in <span name="safe">principle</span>,
it could thus overlap with some of those quiet NaN and sign bits. But in
practice, at least on the architectures I&rsquo;ve tested, everything above the 48th
bit in a pointer is always zero. There&rsquo;s a lot of casting going on here, which
I&rsquo;ve found is necessary to satisfy some of the pickiest C compilers, but the
end result is just jamming some bits together.</p>
<aside name="safe">
<p>I try to follow the letter of the law when it comes to the code in this book, so
this paragraph is dubious. There comes a point when optimizing where you push
the boundary of not just what the <em>spec says</em> you can do, but what a real
compiler and chip let you get away with.</p>
<p>There are risks when stepping outside of the spec, but there are rewards in that
lawless territory too. It&rsquo;s up to you to decide if the gains are worth it.</p>
</aside>
<p>We define the sign bit like so:</p>
<div class="codehilite"><pre class="insert-before">#ifdef NAN_BOXING

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define SIGN_BIT ((uint64_t)0x8000000000000000)</span>
</pre><pre class="insert-after">#define QNAN     ((uint64_t)0x7ffc000000000000)

</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>To get the Obj pointer back out, we simply mask off all of those extra bits.</p>
<div class="codehilite"><pre class="insert-before">#define AS_NUMBER(value)    valueToNum(value)
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define AS_OBJ(value) \</span>
<span class="a">    ((Obj*)(uintptr_t)((value) &amp; ~(SIGN_BIT | QNAN)))</span>
</pre><pre class="insert-after">

#define BOOL_VAL(b)     ((b) ? TRUE_VAL : FALSE_VAL)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>The tilde (<code>~</code>), if you haven&rsquo;t done enough bit manipulation to encounter it
before, is bitwise <span class="small-caps">NOT</span>. It toggles all ones and
zeroes in its operand. By masking the value with the bitwise negation of the
quiet NaN and sign bits, we <em>clear</em> those bits and let the pointer bits remain.</p>
<p>One last macro:</p>
<div class="codehilite"><pre class="insert-before">#define IS_NUMBER(value)    (((value) &amp; QNAN) != QNAN)
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define IS_OBJ(value) \</span>
<span class="a">    (((value) &amp; (QNAN | SIGN_BIT)) == (QNAN | SIGN_BIT))</span>
</pre><pre class="insert-after">

#define AS_BOOL(value)      ((value) == TRUE_VAL)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>A Value storing an Obj pointer has its sign bit set, but so does any negative
number. To tell if a Value is an Obj pointer, we need to check that both the
sign bit and all of the quiet NaN bits are set. This is similar to how we detect
the type of the singleton values, except this time we use the sign bit as the
tag.</p>
<h3><a href="#value-functions" id="value-functions"><small>30&#8202;.&#8202;3&#8202;.&#8202;6</small>Value functions</a></h3>
<p>The rest of the VM usually goes through the macros when working with Values, so
we are almost done. However, there are a couple of functions in the &ldquo;value&rdquo;
module that peek inside the otherwise black box of Value and work with its
encoding directly. We need to fix those too.</p>
<p>The first is <code>printValue()</code>. It has separate code for each value type. We no
longer have an explicit type enum we can switch on, so instead we use a series
of type tests to handle each kind of value.</p>
<div class="codehilite"><pre class="insert-before">void printValue(Value value) {
</pre><div class="source-file"><em>value.c</em><br>
in <em>printValue</em>()</div>
<pre class="insert"><span class="a">#ifdef NAN_BOXING</span>
  <span class="k">if</span> (<span class="a">IS_BOOL</span>(<span class="i">value</span>)) {
    <span class="i">printf</span>(<span class="a">AS_BOOL</span>(<span class="i">value</span>) ? <span class="s">&quot;true&quot;</span> : <span class="s">&quot;false&quot;</span>);
  } <span class="k">else</span> <span class="k">if</span> (<span class="a">IS_NIL</span>(<span class="i">value</span>)) {
    <span class="i">printf</span>(<span class="s">&quot;nil&quot;</span>);
  } <span class="k">else</span> <span class="k">if</span> (<span class="a">IS_NUMBER</span>(<span class="i">value</span>)) {
    <span class="i">printf</span>(<span class="s">&quot;%g&quot;</span>, <span class="a">AS_NUMBER</span>(<span class="i">value</span>));
  } <span class="k">else</span> <span class="k">if</span> (<span class="a">IS_OBJ</span>(<span class="i">value</span>)) {
    <span class="i">printObject</span>(<span class="i">value</span>);
  }
<span class="a">#else</span>
</pre><pre class="insert-after">  switch (value.type) {
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>printValue</em>()</div>

<p>This is technically a tiny bit slower than a switch, but compared to the
overhead of actually writing to a stream, it&rsquo;s negligible.</p>
<p>We still support the original tagged union representation, so we keep the old
code and enclose it in the <code>#else</code> conditional section.</p>
<div class="codehilite"><pre class="insert-before">  }
</pre><div class="source-file"><em>value.c</em><br>
in <em>printValue</em>()</div>
<pre class="insert"><span class="a">#endif</span>
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>printValue</em>()</div>

<p>The other operation is testing two values for equality.</p>
<div class="codehilite"><pre class="insert-before">bool valuesEqual(Value a, Value b) {
</pre><div class="source-file"><em>value.c</em><br>
in <em>valuesEqual</em>()</div>
<pre class="insert"><span class="a">#ifdef NAN_BOXING</span>
  <span class="k">return</span> <span class="i">a</span> == <span class="i">b</span>;
<span class="a">#else</span>
</pre><pre class="insert-after">  if (a.type != b.type) return false;
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>valuesEqual</em>()</div>

<p>It doesn&rsquo;t get much simpler than that! If the two bit representations are
identical, the values are equal. That does the right thing for the singleton
values since each has a unique bit representation and they are only equal to
themselves. It also does the right thing for Obj pointers, since objects use
identity for equality<span class="em">&mdash;</span>two Obj references are equal only if they point to the
exact same object.</p>
<p>It&rsquo;s <em>mostly</em> correct for numbers too. Most floating-point numbers with
different bit representations are distinct numeric values. Alas, IEEE 754
contains a pothole to trip us up. For reasons that aren&rsquo;t entirely clear to me,
the spec mandates that NaN values are <em>not</em> equal to <em>themselves</em>. This isn&rsquo;t a
problem for the special quiet NaNs that we are using for our own purposes. But
it&rsquo;s possible to produce a &ldquo;real&rdquo; arithmetic NaN in Lox, and if we want to
correctly implement IEEE 754 numbers, then the resulting value is not supposed
to be equal to itself. More concretely:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">nan</span> = <span class="n">0</span>/<span class="n">0</span>;
<span class="k">print</span> <span class="i">nan</span> == <span class="i">nan</span>;
</pre></div>
<p>IEEE 754 says this program is supposed to print &ldquo;false&rdquo;. It does the right thing
with our old tagged union representation because the <code>VAL_NUMBER</code> case applies
<code>==</code> to two values that the C compiler knows are doubles. Thus the compiler
generates the right CPU instruction to perform an IEEE floating-point equality.</p>
<p>Our new representation breaks that by defining Value to be a uint64_t. If we
want to be <em>fully</em> compliant with IEEE 754, we need to handle this case.</p>
<div class="codehilite"><pre class="insert-before">#ifdef NAN_BOXING
</pre><div class="source-file"><em>value.c</em><br>
in <em>valuesEqual</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="a">IS_NUMBER</span>(<span class="i">a</span>) &amp;&amp; <span class="a">IS_NUMBER</span>(<span class="i">b</span>)) {
    <span class="k">return</span> <span class="a">AS_NUMBER</span>(<span class="i">a</span>) == <span class="a">AS_NUMBER</span>(<span class="i">b</span>);
  }
</pre><pre class="insert-after">  return a == b;
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>valuesEqual</em>()</div>

<p>I know, it&rsquo;s weird. And there is a performance cost to doing this type test
every time we check two Lox values for equality. If we are willing to sacrifice
a little <span name="java">compatibility</span><span class="em">&mdash;</span>who <em>really</em> cares if NaN is
not equal to itself?<span class="em">&mdash;</span>we could leave this off. I&rsquo;ll leave it up to you to
decide how pedantic you want to be.</p>
<aside name="java">
<p>In fact, jlox gets NaN equality wrong. Java does the right thing when you
compare primitive doubles using <code>==</code>, but not if you box those to Double or
Object and compare them using <code>equals()</code>, which is how jlox implements equality.</p>
</aside>
<p>Finally, we close the conditional compilation section around the old
implementation.</p>
<div class="codehilite"><pre class="insert-before">  }
</pre><div class="source-file"><em>value.c</em><br>
in <em>valuesEqual</em>()</div>
<pre class="insert"><span class="a">#endif</span>
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>valuesEqual</em>()</div>

<p>And that&rsquo;s it. This optimization is complete, as is our clox virtual machine.
That was the last line of new code in the book.</p>
<h3><a href="#evaluating-performance" id="evaluating-performance"><small>30&#8202;.&#8202;3&#8202;.&#8202;7</small>Evaluating performance</a></h3>
<p>The code is done, but we still need to figure out if we actually made anything
better with these changes. Evaluating an optimization like this is very
different from the previous one. There, we had a clear hotspot visible in the
profiler. We fixed that part of the code and could instantly see the hotspot
get faster.</p>
<p>The effects of changing the value representation are more diffused. The macros
are expanded in place wherever they are used, so the performance changes are
spread across the codebase in a way that&rsquo;s hard for many profilers to track
well, especially in an <span name="opt">optimized</span> build.</p>
<aside name="opt">
<p>When doing profiling work, you almost always want to profile an optimized
&ldquo;release&rdquo; build of your program since that reflects the performance story your
end users experience. Compiler optimizations, like inlining, can dramatically
affect which parts of the code are performance hotspots. Hand-optimizing a debug
build risks sending you off &ldquo;fixing&rdquo; problems that the optimizing compiler will
already solve for you.</p>
<p>Make sure you don&rsquo;t accidentally benchmark and optimize your debug build. I seem
to make that mistake at least once a year.</p>
</aside>
<p>We also can&rsquo;t easily <em>reason</em> about the effects of our change. We&rsquo;ve made values
smaller, which reduces cache misses all across the VM. But the actual real-world
performance effect of that change is highly dependent on the memory use of the
Lox program being run. A tiny Lox microbenchmark may not have enough values
scattered around in memory for the effect to be noticeable, and even things like
the addresses handed out to us by the C memory allocator can impact the results.</p>
<p>If we did our job right, basically everything gets a little faster, especially
on larger, more complex Lox programs. But it is possible that the extra bitwise
operations we do when NaN-boxing values nullify the gains from the better
memory use. Doing performance work like this is unnerving because you can&rsquo;t
easily <em>prove</em> that you&rsquo;ve made the VM better. You can&rsquo;t point to a single
surgically targeted microbenchmark and say, &ldquo;There, see?&rdquo;</p>
<p>Instead, what we really need is a <em>suite</em> of larger benchmarks. Ideally, they
would be distilled from real-world applications<span class="em">&mdash;</span>not that such a thing exists
for a toy language like Lox. Then we can measure the aggregate performance
changes across all of those. I did my best to cobble together a handful of
larger Lox programs. On my machine, the new value representation seems to make
everything roughly 10% faster across the board.</p>
<p>That&rsquo;s not a huge improvement, especially compared to the profound effect of
making hash table lookups faster. I added this optimization in large part
because it&rsquo;s a good example of a certain <em>kind</em> of performance work you may
experience, and honestly, because I think it&rsquo;s technically really cool. It might
not be the first thing I would reach for if I were seriously trying to make clox
faster. There is probably other, lower-hanging fruit.</p>
<p>But, if you find yourself working on a program where all of the easy wins have
been taken, then at some point you may want to think about tuning your value
representation. I hope this chapter has shined a light on some of the options
you have in that area.</p>
<h2><a href="#where-to-next" id="where-to-next"><small>30&#8202;.&#8202;4</small>Where to Next</a></h2>
<p>We&rsquo;ll stop here with the Lox language and our two interpreters. We could tinker
on it forever, adding new language features and clever speed improvements. But,
for this book, I think we&rsquo;ve reached a natural place to call our work complete.
I won&rsquo;t rehash everything we&rsquo;ve learned in the past many pages. You were there
with me and you remember. Instead, I&rsquo;d like to take a minute to talk about where
you might go from here. What is the next step in your programming language
journey?</p>
<p>Most of you probably won&rsquo;t spend a significant part of your career working in
compilers or interpreters. It&rsquo;s a pretty small slice of the computer science
academia pie, and an even smaller segment of software engineering in industry.
That&rsquo;s OK. Even if you never work on a compiler again in your life, you will
certainly <em>use</em> one, and I hope this book has equipped you with a better
understanding of how the programming languages you use are designed and
implemented.</p>
<p>You have also learned a handful of important, fundamental data structures and
gotten some practice doing low-level profiling and optimization work. That kind
of expertise is helpful no matter what domain you program in.</p>
<p>I also hope I gave you a new way of <span name="domain">looking</span> at and
solving problems. Even if you never work on a language again, you may be
surprised to discover how many programming problems can be seen as
language-<em>like</em>. Maybe that report generator you need to write can be modeled as
a series of stack-based &ldquo;instructions&rdquo; that the generator &ldquo;executes&rdquo;. That user
interface you need to render looks an awful lot like traversing an AST.</p>
<aside name="domain">
<p>This goes for other domains too. I don&rsquo;t think there&rsquo;s a single topic I&rsquo;ve
learned in programming<span class="em">&mdash;</span>or even outside of programming<span class="em">&mdash;</span>that I haven&rsquo;t ended
up finding useful in other areas. One of my favorite aspects of software
engineering is how much it rewards those with eclectic interests.</p>
</aside>
<p>If you do want to go further down the programming language rabbit hole, here
are some suggestions for which branches in the tunnel to explore:</p>
<ul>
<li>
<p>Our simple, single-pass bytecode compiler pushed us towards mostly runtime
optimization. In a mature language implementation, compile-time optimization
is generally more important, and the field of compiler optimizations is
incredibly rich. Grab a classic <span name="cooper">compilers</span> book,
and rebuild the front end of clox or jlox to be a sophisticated compilation
pipeline with some interesting intermediate representations and optimization
passes.</p>
<p>Dynamic typing will place some restrictions on how far you can go, but there
is still a lot you can do. Or maybe you want to take a big leap and add
static types and a type checker to Lox. That will certainly give your front
end a lot more to chew on.</p>
<aside name="cooper">
<p>I like Cooper and Torczon&rsquo;s <em>Engineering a Compiler</em> for this. Appel&rsquo;s
<em>Modern Compiler Implementation</em> books are also well regarded.</p>
</aside></li>
<li>
<p>In this book, I aim to be correct, but not particularly rigorous. My goal is
mostly to give you an <em>intuition</em> and a feel for doing language work. If you
like more precision, then the whole world of programming language academia
is waiting for you. Languages and compilers have been studied formally since
before we even had computers, so there is no shortage of books and papers on
parser theory, type systems, semantics, and formal logic. Going down this
path will also teach you how to read CS papers, which is a valuable skill in
its own right.</p>
</li>
<li>
<p>Or, if you just really enjoy hacking on and making languages, you can take
Lox and turn it into your own <span name="license">plaything</span>. Change
the syntax to something that delights your eye. Add missing features or
remove ones you don&rsquo;t like. Jam new optimizations in there.</p>
<aside name="license">
<p>The <em>text</em> of this book is copyrighted to me, but the <em>code</em> and the
implementations of jlox and clox use the very permissive <a href="https://en.wikipedia.org/wiki/MIT_License">MIT license</a>.
You are more than welcome to <a href="https://github.com/munificent/craftinginterpreters">take either of those interpreters</a> and
do whatever you want with them. Go to town.</p>
<p>If you make significant changes to the language, it would be good to also
change the name, mostly to avoid confusing people about what the name &ldquo;Lox&rdquo;
represents.</p>
</aside>
<p>Eventually you may get to a point where you have something you think others
could use as well. That gets you into the very distinct world of programming
language <em>popularity</em>. Expect to spend a ton of time writing documentation,
example programs, tools, and useful libraries. The field is crowded with
languages vying for users. To thrive in that space you&rsquo;ll have to put on
your marketing hat and <em>sell</em>. Not everyone enjoys that kind of
public-facing work, but if you do, it can be incredibly gratifying to see
people use your language to express themselves.</p>
</li>
</ul>
<p>Or maybe this book has satisfied your craving and you&rsquo;ll stop here. Whichever
way you go, or don&rsquo;t go, there is one lesson I hope to lodge in your heart. Like
I was, you may have initially been intimidated by programming languages. But in
these chapters, you&rsquo;ve seen that even really challenging material can be tackled
by us mortals if we get our hands dirty and take it a step at a time. If you can
handle compilers and interpreters, you can do anything you put your mind to.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<p>Assigning homework on the last day of school seems cruel but if you really want
something to do during your summer vacation:</p>
<ol>
<li>
<p>Fire up your profiler, run a couple of benchmarks, and look for other
hotspots in the VM. Do you see anything in the runtime that you can improve?</p>
</li>
<li>
<p>Many strings in real-world user programs are small, often only a character
or two. This is less of a concern in clox because we intern strings, but
most VMs don&rsquo;t. For those that don&rsquo;t, heap allocating a tiny character array
for each of those little strings and then representing the value as a
pointer to that array is wasteful. Often, the pointer is larger than the
string&rsquo;s characters. A classic trick is to have a separate value
representation for small strings that stores the characters inline in the
value.</p>
<p>Starting from clox&rsquo;s original tagged union representation, implement that
optimization. Write a couple of relevant benchmarks and see if it helps.</p>
</li>
<li>
<p>Reflect back on your experience with this book. What parts of it worked well
for you? What didn&rsquo;t? Was it easier for you to learn bottom-up or top-down?
Did the illustrations help or distract? Did the analogies clarify or
confuse?</p>
<p>The more you understand your personal learning style, the more effectively
you can upload knowledge into your head. You can specifically target
material that teaches you the way you learn best.</p>
</li>
</ol>
</div>

<footer>
<a href="backmatter.html" class="next">
  Next Part: &ldquo;Backmatter&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/parsing-expressions.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Parsing Expressions &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Parsing Expressions<small>6</small></a></h3>

<ul>
    <li><a href="#ambiguity-and-the-parsing-game"><small>6.1</small> Ambiguity and the Parsing Game</a></li>
    <li><a href="#recursive-descent-parsing"><small>6.2</small> Recursive Descent Parsing</a></li>
    <li><a href="#syntax-errors"><small>6.3</small> Syntax Errors</a></li>
    <li><a href="#wiring-up-the-parser"><small>6.4</small> Wiring up the Parser</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Logic Versus History</a></li>
</ul>


<div class="prev-next">
    <a href="representing-code.html" title="Representing Code" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="evaluating-expressions.html" title="Evaluating Expressions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="representing-code.html" title="Representing Code" class="prev">←</a>
<a href="evaluating-expressions.html" title="Evaluating Expressions" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Parsing Expressions<small>6</small></a></h3>

<ul>
    <li><a href="#ambiguity-and-the-parsing-game"><small>6.1</small> Ambiguity and the Parsing Game</a></li>
    <li><a href="#recursive-descent-parsing"><small>6.2</small> Recursive Descent Parsing</a></li>
    <li><a href="#syntax-errors"><small>6.3</small> Syntax Errors</a></li>
    <li><a href="#wiring-up-the-parser"><small>6.4</small> Wiring up the Parser</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Logic Versus History</a></li>
</ul>


<div class="prev-next">
    <a href="representing-code.html" title="Representing Code" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="evaluating-expressions.html" title="Evaluating Expressions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">6</div>
  <h1>Parsing Expressions</h1>

<blockquote>
<p>Grammar, which knows how to control even kings.
<cite>Molière</cite></p>
</blockquote>
<p><span name="parse">This</span> chapter marks the first major milestone of the
book. Many of us have cobbled together a mishmash of regular expressions and
substring operations to extract some sense out of a pile of text. The code was
probably riddled with bugs and a beast to maintain. Writing a <em>real</em> parser<span class="em">&mdash;</span>one with decent error handling, a coherent internal structure, and the ability
to robustly chew through a sophisticated syntax<span class="em">&mdash;</span>is considered a rare,
impressive skill. In this chapter, you will <span name="attain">attain</span>
it.</p>
<aside name="parse">
<p>&ldquo;Parse&rdquo; comes to English from the Old French &ldquo;pars&rdquo; for &ldquo;part of speech&rdquo;. It
means to take a text and map each word to the grammar of the language. We use it
here in the same sense, except that our language is a little more modern than
Old French.</p>
</aside>
<aside name="attain">
<p>Like many rites of passage, you&rsquo;ll probably find it looks a little smaller, a
little less daunting when it&rsquo;s behind you than when it loomed ahead.</p>
</aside>
<p>It&rsquo;s easier than you think, partially because we front-loaded a lot of the hard
work in the <a href="representing-code.html">last chapter</a>. You already know your way around a formal grammar.
You&rsquo;re familiar with syntax trees, and we have some Java classes to represent
them. The only remaining piece is parsing<span class="em">&mdash;</span>transmogrifying a sequence of
tokens into one of those syntax trees.</p>
<p>Some CS textbooks make a big deal out of parsers. In the &rsquo;60s, computer
scientists<span class="em">&mdash;</span>understandably tired of programming in assembly language<span class="em">&mdash;</span>started designing more sophisticated, <span name="human">human</span>-friendly
languages like Fortran and ALGOL. Alas, they weren&rsquo;t very <em>machine</em>-friendly
for the primitive computers of the time.</p>
<aside name="human">
<p>Imagine how harrowing assembly programming on those old machines must have been
that they considered <em>Fortran</em> to be an improvement.</p>
</aside>
<p>These pioneers designed languages that they honestly weren&rsquo;t even sure how to
write compilers for, and then did groundbreaking work inventing parsing and
compiling techniques that could handle these new, big languages on those old, tiny
machines.</p>
<p>Classic compiler books read like fawning hagiographies of these heroes and their
tools. The cover of <em>Compilers: Principles, Techniques, and Tools</em> literally has
a dragon labeled &ldquo;complexity of compiler design&rdquo; being slain by a knight bearing
a sword and shield branded &ldquo;LALR parser generator&rdquo; and &ldquo;syntax directed
translation&rdquo;. They laid it on thick.</p>
<p>A little self-congratulation is well-deserved, but the truth is you don&rsquo;t need
to know most of that stuff to bang out a high quality parser for a modern
machine. As always, I encourage you to broaden your education and take it in
later, but this book omits the trophy case.</p>
<h2><a href="#ambiguity-and-the-parsing-game" id="ambiguity-and-the-parsing-game"><small>6&#8202;.&#8202;1</small>Ambiguity and the Parsing Game</a></h2>
<p>In the last chapter, I said you can &ldquo;play&rdquo; a context-free grammar like a game in
order to <em>generate</em> strings. Parsers play that game in reverse. Given a string<span class="em">&mdash;</span>a series of tokens<span class="em">&mdash;</span>we map those tokens to terminals in the grammar to
figure out which rules could have generated that string.</p>
<p>The &ldquo;could have&rdquo; part is interesting. It&rsquo;s entirely possible to create a grammar
that is <em>ambiguous</em>, where different choices of productions can lead to the same
string. When you&rsquo;re using the grammar to <em>generate</em> strings, that doesn&rsquo;t matter
much. Once you have the string, who cares how you got to it?</p>
<p>When parsing, ambiguity means the parser may misunderstand the user&rsquo;s code. As
we parse, we aren&rsquo;t just determining if the string is valid Lox code, we&rsquo;re
also tracking which rules match which parts of it so that we know what part of
the language each token belongs to. Here&rsquo;s the Lox expression grammar we put
together in the last chapter:</p>
<div class="codehilite"><pre><span class="i">expression</span>     → <span class="i">literal</span>
               | <span class="i">unary</span>
               | <span class="i">binary</span>
               | <span class="i">grouping</span> ;

<span class="i">literal</span>        → <span class="t">NUMBER</span> | <span class="t">STRING</span> | <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span> ;
<span class="i">grouping</span>       → <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> ;
<span class="i">unary</span>          → ( <span class="s">&quot;-&quot;</span> | <span class="s">&quot;!&quot;</span> ) <span class="i">expression</span> ;
<span class="i">binary</span>         → <span class="i">expression</span> <span class="i">operator</span> <span class="i">expression</span> ;
<span class="i">operator</span>       → <span class="s">&quot;==&quot;</span> | <span class="s">&quot;!=&quot;</span> | <span class="s">&quot;&lt;&quot;</span> | <span class="s">&quot;&lt;=&quot;</span> | <span class="s">&quot;&gt;&quot;</span> | <span class="s">&quot;&gt;=&quot;</span>
               | <span class="s">&quot;+&quot;</span>  | <span class="s">&quot;-&quot;</span>  | <span class="s">&quot;*&quot;</span> | <span class="s">&quot;/&quot;</span> ;
</pre></div>
<p>This is a valid string in that grammar:</p><img src="image/parsing-expressions/tokens.png" alt="6 / 3 - 1" />
<p>But there are two ways we could have generated it. One way is:</p>
<ol>
<li>Starting at <code>expression</code>, pick <code>binary</code>.</li>
<li>For the left-hand <code>expression</code>, pick <code>NUMBER</code>, and use <code>6</code>.</li>
<li>For the operator, pick <code>"/"</code>.</li>
<li>For the right-hand <code>expression</code>, pick <code>binary</code> again.</li>
<li>In that nested <code>binary</code> expression, pick <code>3 - 1</code>.</li>
</ol>
<p>Another is:</p>
<ol>
<li>Starting at <code>expression</code>, pick <code>binary</code>.</li>
<li>For the left-hand <code>expression</code>, pick <code>binary</code> again.</li>
<li>In that nested <code>binary</code> expression, pick <code>6 / 3</code>.</li>
<li>Back at the outer <code>binary</code>, for the operator, pick <code>"-"</code>.</li>
<li>For the right-hand <code>expression</code>, pick <code>NUMBER</code>, and use <code>1</code>.</li>
</ol>
<p>Those produce the same <em>strings</em>, but not the same <em>syntax trees</em>:</p><img src="image/parsing-expressions/syntax-trees.png" alt="Two valid syntax trees: (6 / 3) - 1 and 6 / (3 - 1)" />
<p>In other words, the grammar allows seeing the expression as <code>(6 / 3) - 1</code> or <code>6 / (3 - 1)</code>. The <code>binary</code> rule lets operands nest any which way you want. That in
turn affects the result of evaluating the parsed tree. The way mathematicians
have addressed this ambiguity since blackboards were first invented is by
defining rules for precedence and associativity.</p>
<ul>
<li>
<p><span name="nonassociative"><strong>Precedence</strong></span> determines which operator
is evaluated first in an expression containing a mixture of different
operators. Precedence rules tell us that we evaluate the <code>/</code> before the <code>-</code>
in the above example. Operators with higher precedence are evaluated
before operators with lower precedence. Equivalently, higher precedence
operators are said to &ldquo;bind tighter&rdquo;.</p>
</li>
<li>
<p><strong>Associativity</strong> determines which operator is evaluated first in a series
of the <em>same</em> operator. When an operator is <strong>left-associative</strong> (think
&ldquo;left-to-right&rdquo;), operators on the left evaluate before those on the right.
Since <code>-</code> is left-associative, this expression:</p>
<div class="codehilite"><pre><span class="n">5</span> - <span class="n">3</span> - <span class="n">1</span>
</pre></div>
<p>is equivalent to:</p>
<div class="codehilite"><pre>(<span class="n">5</span> - <span class="n">3</span>) - <span class="n">1</span>
</pre></div>
<p>Assignment, on the other hand, is <strong>right-associative</strong>. This:</p>
<div class="codehilite"><pre><span class="i">a</span> = <span class="i">b</span> = <span class="i">c</span>
</pre></div>
<p>is equivalent to:</p>
<div class="codehilite"><pre><span class="i">a</span> = (<span class="i">b</span> = <span class="i">c</span>)
</pre></div>
</li>
</ul>
<aside name="nonassociative">
<p>While not common these days, some languages specify that certain pairs of
operators have <em>no</em> relative precedence. That makes it a syntax error to mix
those operators in an expression without using explicit grouping.</p>
<p>Likewise, some operators are <strong>non-associative</strong>. That means it&rsquo;s an error to
use that operator more than once in a sequence. For example, Perl&rsquo;s range
operator isn&rsquo;t associative, so <code>a .. b</code> is OK, but <code>a .. b .. c</code> is an error.</p>
</aside>
<p>Without well-defined precedence and associativity, an expression that uses
multiple operators is ambiguous<span class="em">&mdash;</span>it can be parsed into different syntax trees,
which could in turn evaluate to different results. We&rsquo;ll fix that in Lox by
applying the same precedence rules as C, going from lowest to highest.</p><table>
<thead>
<tr>
  <td>Name</td>
  <td>Operators</td>
  <td>Associates</td>
</tr>
</thead>
<tbody>
<tr>
  <td>Equality</td>
  <td><code>==</code> <code>!=</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Comparison</td>
  <td><code>&gt;</code> <code>&gt;=</code>
      <code>&lt;</code> <code>&lt;=</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Term</td>
  <td><code>-</code> <code>+</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Factor</td>
  <td><code>/</code> <code>*</code></td>
  <td>Left</td>
</tr>
<tr>
  <td>Unary</td>
  <td><code>!</code> <code>-</code></td>
  <td>Right</td>
</tr>
</tbody>
</table>
<p>Right now, the grammar stuffs all expression types into a single <code>expression</code>
rule. That same rule is used as the non-terminal for operands, which lets the
grammar accept any kind of expression as a subexpression, regardless of whether
the precedence rules allow it.</p>
<p>We fix that by <span name="massage">stratifying</span> the grammar. We define a
separate rule for each precedence level.</p>
<div class="codehilite"><pre><span class="i">expression</span>     → ...
<span class="i">equality</span>       → ...
<span class="i">comparison</span>     → ...
<span class="i">term</span>           → ...
<span class="i">factor</span>         → ...
<span class="i">unary</span>          → ...
<span class="i">primary</span>        → ...
</pre></div>
<aside name="massage">
<p>Instead of baking precedence right into the grammar rules, some parser
generators let you keep the same ambiguous-but-simple grammar and then add in a
little explicit operator precedence metadata on the side in order to
disambiguate.</p>
</aside>
<p>Each rule here only matches expressions at its precedence level or higher. For
example, <code>unary</code> matches a unary expression like <code>!negated</code> or a primary
expression like <code>1234</code>. And <code>term</code> can match <code>1 + 2</code> but also <code>3 * 4 / 5</code>. The
final <code>primary</code> rule covers the highest-precedence forms<span class="em">&mdash;</span>literals and
parenthesized expressions.</p>
<p>We just need to fill in the productions for each of those rules. We&rsquo;ll do the
easy ones first. The top <code>expression</code> rule matches any expression at any
precedence level. Since <span name="equality"><code>equality</code></span> has the lowest
precedence, if we match that, then it covers everything.</p>
<aside name="equality">
<p>We could eliminate <code>expression</code> and simply use <code>equality</code> in the other rules
that contain expressions, but using <code>expression</code> makes those other rules read a
little better.</p>
<p>Also, in later chapters when we expand the grammar to include assignment and
logical operators, we&rsquo;ll only need to change the production for <code>expression</code>
instead of touching every rule that contains an expression.</p>
</aside>
<div class="codehilite"><pre><span class="i">expression</span>     → <span class="i">equality</span>
</pre></div>
<p>Over at the other end of the precedence table, a primary expression contains
all the literals and grouping expressions.</p>
<div class="codehilite"><pre><span class="i">primary</span>        → <span class="t">NUMBER</span> | <span class="t">STRING</span> | <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span>
               | <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> ;
</pre></div>
<p>A unary expression starts with a unary operator followed by the operand. Since
unary operators can nest<span class="em">&mdash;</span><code>!!true</code> is a valid if weird expression<span class="em">&mdash;</span>the
operand can itself be a unary operator. A recursive rule handles that nicely.</p>
<div class="codehilite"><pre><span class="i">unary</span>          → ( <span class="s">&quot;!&quot;</span> | <span class="s">&quot;-&quot;</span> ) <span class="i">unary</span> ;
</pre></div>
<p>But this rule has a problem. It never terminates.</p>
<p>Remember, each rule needs to match expressions at that precedence level <em>or
higher</em>, so we also need to let this match a primary expression.</p>
<div class="codehilite"><pre><span class="i">unary</span>          → ( <span class="s">&quot;!&quot;</span> | <span class="s">&quot;-&quot;</span> ) <span class="i">unary</span>
               | <span class="i">primary</span> ;
</pre></div>
<p>That works.</p>
<p>The remaining rules are all binary operators. We&rsquo;ll start with the rule for
multiplication and division. Here&rsquo;s a first try:</p>
<div class="codehilite"><pre><span class="i">factor</span>         → <span class="i">factor</span> ( <span class="s">&quot;/&quot;</span> | <span class="s">&quot;*&quot;</span> ) <span class="i">unary</span>
               | <span class="i">unary</span> ;
</pre></div>
<p>The rule recurses to match the left operand. That enables the rule to match a
series of multiplication and division expressions like <code>1 * 2 / 3</code>. Putting the
recursive production on the left side and <code>unary</code> on the right makes the rule
<span name="mult">left-associative</span> and unambiguous.</p>
<aside name="mult">
<p>In principle, it doesn&rsquo;t matter whether you treat multiplication as left- or
right-associative<span class="em">&mdash;</span>you get the same result either way. Alas, in the real world
with limited precision, roundoff and overflow mean that associativity can affect
the result of a sequence of multiplications. Consider:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="n">0.1</span> * (<span class="n">0.2</span> * <span class="n">0.3</span>);
<span class="k">print</span> (<span class="n">0.1</span> * <span class="n">0.2</span>) * <span class="n">0.3</span>;
</pre></div>
<p>In languages like Lox that use <a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">IEEE 754</a> double-precision floating-point
numbers, the first evaluates to <code>0.006</code>, while the second yields
<code>0.006000000000000001</code>. Sometimes that tiny difference matters.
<a href="https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html">This</a> is a good place to learn more.</p>
</aside>
<p>All of this is correct, but the fact that the first symbol in the body of the
rule is the same as the head of the rule means this production is
<strong>left-recursive</strong>. Some parsing techniques, including the one we&rsquo;re going to
use, have trouble with left recursion. (Recursion elsewhere, like we have in
<code>unary</code> and the indirect recursion for grouping in <code>primary</code> are not a problem.)</p>
<p>There are many grammars you can define that match the same language. The choice
for how to model a particular language is partially a matter of taste and
partially a pragmatic one. This rule is correct, but not optimal for how we
intend to parse it. Instead of a left recursive rule, we&rsquo;ll use a different one.</p>
<div class="codehilite"><pre><span class="i">factor</span>         → <span class="i">unary</span> ( ( <span class="s">&quot;/&quot;</span> | <span class="s">&quot;*&quot;</span> ) <span class="i">unary</span> )* ;
</pre></div>
<p>We define a factor expression as a flat <em>sequence</em> of multiplications
and divisions. This matches the same syntax as the previous rule, but better
mirrors the code we&rsquo;ll write to parse Lox. We use the same structure for all of
the other binary operator precedence levels, giving us this complete expression
grammar:</p>
<div class="codehilite"><pre><span class="i">expression</span>     → <span class="i">equality</span> ;
<span class="i">equality</span>       → <span class="i">comparison</span> ( ( <span class="s">&quot;!=&quot;</span> | <span class="s">&quot;==&quot;</span> ) <span class="i">comparison</span> )* ;
<span class="i">comparison</span>     → <span class="i">term</span> ( ( <span class="s">&quot;&gt;&quot;</span> | <span class="s">&quot;&gt;=&quot;</span> | <span class="s">&quot;&lt;&quot;</span> | <span class="s">&quot;&lt;=&quot;</span> ) <span class="i">term</span> )* ;
<span class="i">term</span>           → <span class="i">factor</span> ( ( <span class="s">&quot;-&quot;</span> | <span class="s">&quot;+&quot;</span> ) <span class="i">factor</span> )* ;
<span class="i">factor</span>         → <span class="i">unary</span> ( ( <span class="s">&quot;/&quot;</span> | <span class="s">&quot;*&quot;</span> ) <span class="i">unary</span> )* ;
<span class="i">unary</span>          → ( <span class="s">&quot;!&quot;</span> | <span class="s">&quot;-&quot;</span> ) <span class="i">unary</span>
               | <span class="i">primary</span> ;
<span class="i">primary</span>        → <span class="t">NUMBER</span> | <span class="t">STRING</span> | <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span>
               | <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> ;
</pre></div>
<p>This grammar is more complex than the one we had before, but in return we have
eliminated the previous one&rsquo;s ambiguity. It&rsquo;s just what we need to make a
parser.</p>
<h2><a href="#recursive-descent-parsing" id="recursive-descent-parsing"><small>6&#8202;.&#8202;2</small>Recursive Descent Parsing</a></h2>
<p>There is a whole pack of parsing techniques whose names are mostly combinations
of &ldquo;L&rdquo; and &ldquo;R&rdquo;<span class="em">&mdash;</span><a href="https://en.wikipedia.org/wiki/LL_parser">LL(k)</a>, <a href="https://en.wikipedia.org/wiki/LR_parser">LR(1)</a>, <a href="https://en.wikipedia.org/wiki/LALR_parser">LALR</a><span class="em">&mdash;</span>along with more exotic
beasts like <a href="https://en.wikipedia.org/wiki/Parser_combinator">parser combinators</a>, <a href="https://en.wikipedia.org/wiki/Earley_parser">Earley parsers</a>, <a href="https://en.wikipedia.org/wiki/Shunting-yard_algorithm">the shunting yard
algorithm</a>, and <a href="https://en.wikipedia.org/wiki/Parsing_expression_grammar">packrat parsing</a>. For our first interpreter, one
technique is more than sufficient: <strong>recursive descent</strong>.</p>
<p>Recursive descent is the simplest way to build a parser, and doesn&rsquo;t require
using complex parser generator tools like Yacc, Bison or ANTLR. All you need is
straightforward handwritten code. Don&rsquo;t be fooled by its simplicity, though.
Recursive descent parsers are fast, robust, and can support sophisticated
error handling. In fact, GCC, V8 (the JavaScript VM in Chrome), Roslyn (the C#
compiler written in C#) and many other heavyweight production language
implementations use recursive descent. It rocks.</p>
<p>Recursive descent is considered a <strong>top-down parser</strong> because it starts from the
top or outermost grammar rule (here <code>expression</code>) and works its way <span
name="descent">down</span> into the nested subexpressions before finally
reaching the leaves of the syntax tree. This is in contrast with bottom-up
parsers like LR that start with primary expressions and compose them into larger
and larger chunks of syntax.</p>
<aside name="descent">
<p>It&rsquo;s called &ldquo;recursive <em>descent</em>&rdquo; because it walks <em>down</em> the grammar.
Confusingly, we also use direction metaphorically when talking about &ldquo;high&rdquo; and
&ldquo;low&rdquo; precedence, but the orientation is reversed. In a top-down parser, you
reach the lowest-precedence expressions first because they may in turn contain
subexpressions of higher precedence.</p><img src="image/parsing-expressions/direction.png" alt="Top-down grammar rules in order of increasing precedence." />
<p>CS people really need to get together and straighten out their metaphors. Don&rsquo;t
even get me started on which direction a stack grows or why trees have their
roots on top.</p>
</aside>
<p>A recursive descent parser is a literal translation of the grammar&rsquo;s rules
straight into imperative code. Each rule becomes a function. The body of the
rule translates to code roughly like:</p><table>
<thead>
<tr>
  <td>Grammar notation</td>
  <td>Code representation</td>
</tr>
</thead>
<tbody>
  <tr><td>Terminal</td><td>Code to match and consume a token</td></tr>
  <tr><td>Nonterminal</td><td>Call to that rule&rsquo;s function</td></tr>
  <tr><td><code>|</code></td><td><code>if</code> or <code>switch</code> statement</td></tr>
  <tr><td><code>*</code> or <code>+</code></td><td><code>while</code> or <code>for</code> loop</td></tr>
  <tr><td><code>?</code></td><td><code>if</code> statement</td></tr>
</tbody>
</table>
<p>The descent is described as &ldquo;recursive&rdquo; because when a grammar rule refers to
itself<span class="em">&mdash;</span>directly or indirectly<span class="em">&mdash;</span>that translates to a recursive function
call.</p>
<h3><a href="#the-parser-class" id="the-parser-class"><small>6&#8202;.&#8202;2&#8202;.&#8202;1</small>The parser class</a></h3>
<p>Each grammar rule becomes a method inside this new class:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.List</span>;

<span class="k">import static</span> <span class="i">com.craftinginterpreters.lox.TokenType.*</span>;

<span class="k">class</span> <span class="t">Parser</span> {
  <span class="k">private</span> <span class="k">final</span> <span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">tokens</span>;
  <span class="k">private</span> <span class="t">int</span> <span class="i">current</span> = <span class="n">0</span>;

  <span class="t">Parser</span>(<span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">tokens</span>) {
    <span class="k">this</span>.<span class="i">tokens</span> = <span class="i">tokens</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, create new file</div>

<p>Like the scanner, the parser consumes a flat input sequence, only now we&rsquo;re
reading tokens instead of characters. We store the list of tokens and use
<code>current</code> to point to the next token eagerly waiting to be parsed.</p>
<p>We&rsquo;re going to run straight through the expression grammar now and translate
each rule to Java code. The first rule, <code>expression</code>, simply expands to the
<code>equality</code> rule, so that&rsquo;s straightforward.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>Parser</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">expression</span>() {
    <span class="k">return</span> <span class="i">equality</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>Parser</em>()</div>

<p>Each method for parsing a grammar rule produces a syntax tree for that rule and
returns it to the caller. When the body of the rule contains a nonterminal<span class="em">&mdash;</span>a
reference to another rule<span class="em">&mdash;</span>we <span name="left">call</span> that other rule&rsquo;s
method.</p>
<aside name="left">
<p>This is why left recursion is problematic for recursive descent. The function
for a left-recursive rule immediately calls itself, which calls itself again,
and so on, until the parser hits a stack overflow and dies.</p>
</aside>
<p>The rule for equality is a little more complex.</p>
<div class="codehilite"><pre><span class="i">equality</span>       → <span class="i">comparison</span> ( ( <span class="s">&quot;!=&quot;</span> | <span class="s">&quot;==&quot;</span> ) <span class="i">comparison</span> )* ;
</pre></div>
<p>In Java, that becomes:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>expression</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">equality</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">comparison</span>();

    <span class="k">while</span> (<span class="i">match</span>(<span class="i">BANG_EQUAL</span>, <span class="i">EQUAL_EQUAL</span>)) {
      <span class="t">Token</span> <span class="i">operator</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">right</span> = <span class="i">comparison</span>();
      <span class="i">expr</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Binary</span>(<span class="i">expr</span>, <span class="i">operator</span>, <span class="i">right</span>);
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>expression</em>()</div>

<p>Let&rsquo;s step through it. The first <code>comparison</code> nonterminal in the body translates
to the first call to <code>comparison()</code> in the method. We take that result and store
it in a local variable.</p>
<p>Then, the <code>( ... )*</code> loop in the rule maps to a <code>while</code> loop. We need to know
when to exit that loop. We can see that inside the rule, we must first find
either a <code>!=</code> or <code>==</code> token. So, if we <em>don&rsquo;t</em> see one of those, we must be done
with the sequence of equality operators. We express that check using a handy
<code>match()</code> method.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>equality</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">match</span>(<span class="t">TokenType</span>... <span class="i">types</span>) {
    <span class="k">for</span> (<span class="t">TokenType</span> <span class="i">type</span> : <span class="i">types</span>) {
      <span class="k">if</span> (<span class="i">check</span>(<span class="i">type</span>)) {
        <span class="i">advance</span>();
        <span class="k">return</span> <span class="k">true</span>;
      }
    }

    <span class="k">return</span> <span class="k">false</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>equality</em>()</div>

<p>This checks to see if the current token has any of the given types. If so, it
consumes the token and returns <code>true</code>. Otherwise, it returns <code>false</code> and leaves
the current token alone. The <code>match()</code> method is defined in terms of two more
fundamental operations.</p>
<p>The <code>check()</code> method returns <code>true</code> if the current token is of the given type.
Unlike <code>match()</code>, it never consumes the token, it only looks at it.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>match</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">check</span>(<span class="t">TokenType</span> <span class="i">type</span>) {
    <span class="k">if</span> (<span class="i">isAtEnd</span>()) <span class="k">return</span> <span class="k">false</span>;
    <span class="k">return</span> <span class="i">peek</span>().<span class="i">type</span> == <span class="i">type</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>match</em>()</div>

<p>The <code>advance()</code> method consumes the current token and returns it, similar to how
our scanner&rsquo;s corresponding method crawled through characters.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>check</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Token</span> <span class="i">advance</span>() {
    <span class="k">if</span> (!<span class="i">isAtEnd</span>()) <span class="i">current</span>++;
    <span class="k">return</span> <span class="i">previous</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>check</em>()</div>

<p>These methods bottom out on the last handful of primitive operations.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>advance</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">isAtEnd</span>() {
    <span class="k">return</span> <span class="i">peek</span>().<span class="i">type</span> == <span class="i">EOF</span>;
  }

  <span class="k">private</span> <span class="t">Token</span> <span class="i">peek</span>() {
    <span class="k">return</span> <span class="i">tokens</span>.<span class="i">get</span>(<span class="i">current</span>);
  }

  <span class="k">private</span> <span class="t">Token</span> <span class="i">previous</span>() {
    <span class="k">return</span> <span class="i">tokens</span>.<span class="i">get</span>(<span class="i">current</span> - <span class="n">1</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>advance</em>()</div>

<p><code>isAtEnd()</code> checks if we&rsquo;ve run out of tokens to parse. <code>peek()</code> returns the
current token we have yet to consume, and <code>previous()</code> returns the most recently
consumed token. The latter makes it easier to use <code>match()</code> and then access the
just-matched token.</p>
<p>That&rsquo;s most of the parsing infrastructure we need. Where were we? Right, so if
we are inside the <code>while</code> loop in <code>equality()</code>, then we know we have found a
<code>!=</code> or <code>==</code> operator and must be parsing an equality expression.</p>
<p>We grab the matched operator token so we can track which kind of equality
expression we have. Then we call <code>comparison()</code> again to parse the right-hand
operand. We combine the operator and its two operands into a new <code>Expr.Binary</code>
syntax tree node, and then loop around. For each iteration, we store the
resulting expression back in the same <code>expr</code> local variable. As we zip through a
sequence of equality expressions, that creates a left-associative nested tree of
binary operator nodes.</p>
<p><span name="sequence"></span></p><img src="image/parsing-expressions/sequence.png" alt="The syntax tree created by parsing 'a == b == c == d == e'" />
<aside name="sequence">
<p>Parsing <code>a == b == c == d == e</code>. For each iteration, we create a new binary
expression using the previous one as the left operand.</p>
</aside>
<p>The parser falls out of the loop once it hits a token that&rsquo;s not an equality
operator. Finally, it returns the expression. Note that if the parser never
encounters an equality operator, then it never enters the loop. In that case,
the <code>equality()</code> method effectively calls and returns <code>comparison()</code>. In that
way, this method matches an equality operator <em>or anything of higher
precedence</em>.</p>
<p>Moving on to the next rule<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<div class="codehilite"><pre><span class="i">comparison</span>     → <span class="i">term</span> ( ( <span class="s">&quot;&gt;&quot;</span> | <span class="s">&quot;&gt;=&quot;</span> | <span class="s">&quot;&lt;&quot;</span> | <span class="s">&quot;&lt;=&quot;</span> ) <span class="i">term</span> )* ;
</pre></div>
<p>Translated to Java:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>equality</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">comparison</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">term</span>();

    <span class="k">while</span> (<span class="i">match</span>(<span class="i">GREATER</span>, <span class="i">GREATER_EQUAL</span>, <span class="i">LESS</span>, <span class="i">LESS_EQUAL</span>)) {
      <span class="t">Token</span> <span class="i">operator</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">right</span> = <span class="i">term</span>();
      <span class="i">expr</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Binary</span>(<span class="i">expr</span>, <span class="i">operator</span>, <span class="i">right</span>);
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>equality</em>()</div>

<p>The grammar rule is virtually <span name="handle">identical</span> to <code>equality</code>
and so is the corresponding code. The only differences are the token types for
the operators we match, and the method we call for the operands<span class="em">&mdash;</span>now
<code>term()</code> instead of <code>comparison()</code>. The remaining two binary operator rules
follow the same pattern.</p>
<p>In order of precedence, first addition and subtraction:</p>
<aside name="handle">
<p>If you wanted to do some clever Java 8, you could create a helper method for
parsing a left-associative series of binary operators given a list of token
types, and an operand method handle to simplify this redundant code.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>comparison</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">term</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">factor</span>();

    <span class="k">while</span> (<span class="i">match</span>(<span class="i">MINUS</span>, <span class="i">PLUS</span>)) {
      <span class="t">Token</span> <span class="i">operator</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">right</span> = <span class="i">factor</span>();
      <span class="i">expr</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Binary</span>(<span class="i">expr</span>, <span class="i">operator</span>, <span class="i">right</span>);
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>comparison</em>()</div>

<p>And finally, multiplication and division:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>term</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">factor</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">unary</span>();

    <span class="k">while</span> (<span class="i">match</span>(<span class="i">SLASH</span>, <span class="i">STAR</span>)) {
      <span class="t">Token</span> <span class="i">operator</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">right</span> = <span class="i">unary</span>();
      <span class="i">expr</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Binary</span>(<span class="i">expr</span>, <span class="i">operator</span>, <span class="i">right</span>);
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>term</em>()</div>

<p>That&rsquo;s all of the binary operators, parsed with the correct precedence and
associativity. We&rsquo;re crawling up the precedence hierarchy and now we&rsquo;ve reached
the unary operators.</p>
<div class="codehilite"><pre><span class="i">unary</span>          → ( <span class="s">&quot;!&quot;</span> | <span class="s">&quot;-&quot;</span> ) <span class="i">unary</span>
               | <span class="i">primary</span> ;
</pre></div>
<p>The code for this is a little different.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>factor</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">unary</span>() {
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">BANG</span>, <span class="i">MINUS</span>)) {
      <span class="t">Token</span> <span class="i">operator</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">right</span> = <span class="i">unary</span>();
      <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Unary</span>(<span class="i">operator</span>, <span class="i">right</span>);
    }

    <span class="k">return</span> <span class="i">primary</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>factor</em>()</div>

<p>Again, we look at the <span name="current">current</span> token to see how to
parse. If it&rsquo;s a <code>!</code> or <code>-</code>, we must have a unary expression. In that case, we
grab the token and then recursively call <code>unary()</code> again to parse the operand.
Wrap that all up in a unary expression syntax tree and we&rsquo;re done.</p>
<aside name="current">
<p>The fact that the parser looks ahead at upcoming tokens to decide how to parse
puts recursive descent into the category of <strong>predictive parsers</strong>.</p>
</aside>
<p>Otherwise, we must have reached the highest level of precedence, primary
expressions.</p>
<div class="codehilite"><pre><span class="i">primary</span>        → <span class="t">NUMBER</span> | <span class="t">STRING</span> | <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span>
               | <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> ;
</pre></div>
<p>Most of the cases for the rule are single terminals, so parsing is
straightforward.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>unary</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">primary</span>() {
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">FALSE</span>)) <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Literal</span>(<span class="k">false</span>);
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">TRUE</span>)) <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Literal</span>(<span class="k">true</span>);
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">NIL</span>)) <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Literal</span>(<span class="k">null</span>);

    <span class="k">if</span> (<span class="i">match</span>(<span class="i">NUMBER</span>, <span class="i">STRING</span>)) {
      <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Literal</span>(<span class="i">previous</span>().<span class="i">literal</span>);
    }

    <span class="k">if</span> (<span class="i">match</span>(<span class="i">LEFT_PAREN</span>)) {
      <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">expression</span>();
      <span class="i">consume</span>(<span class="i">RIGHT_PAREN</span>, <span class="s">&quot;Expect &#39;)&#39; after expression.&quot;</span>);
      <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Grouping</span>(<span class="i">expr</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>unary</em>()</div>

<p>The interesting branch is the one for handling parentheses. After we match an
opening <code>(</code> and parse the expression inside it, we <em>must</em> find a <code>)</code> token. If
we don&rsquo;t, that&rsquo;s an error.</p>
<h2><a href="#syntax-errors" id="syntax-errors"><small>6&#8202;.&#8202;3</small>Syntax Errors</a></h2>
<p>A parser really has two jobs:</p>
<ol>
<li>
<p>Given a valid sequence of tokens, produce a corresponding syntax tree.</p>
</li>
<li>
<p>Given an <em>invalid</em> sequence of tokens, detect any errors and tell the
user about their mistakes.</p>
</li>
</ol>
<p>Don&rsquo;t underestimate how important the second job is! In modern IDEs and editors,
the parser is constantly reparsing code<span class="em">&mdash;</span>often while the user is still editing
it<span class="em">&mdash;</span>in order to syntax highlight and support things like auto-complete. That
means it will encounter code in incomplete, half-wrong states <em>all the time.</em></p>
<p>When the user doesn&rsquo;t realize the syntax is wrong, it is up to the parser to
help guide them back onto the right path. The way it reports errors is a large
part of your language&rsquo;s user interface. Good syntax error handling is hard. By
definition, the code isn&rsquo;t in a well-defined state, so there&rsquo;s no infallible way
to know what the user <em>meant</em> to write. The parser can&rsquo;t read your <span
name="telepathy">mind</span>.</p>
<aside name="telepathy">
<p>Not yet at least. With the way things are going in machine learning these days,
who knows what the future will bring?</p>
</aside>
<p>There are a couple of hard requirements for when the parser runs into a syntax
error. A parser must:</p>
<ul>
<li>
<p><strong>Detect and report the error.</strong> If it doesn&rsquo;t detect the <span
name="error">error</span> and passes the resulting malformed syntax tree on
to the interpreter, all manner of horrors may be summoned.</p>
<aside name="error">
<p>Philosophically speaking, if an error isn&rsquo;t detected and the interpreter
runs the code, is it <em>really</em> an error?</p>
</aside></li>
<li>
<p><strong>Avoid crashing or hanging.</strong> Syntax errors are a fact of life, and
language tools have to be robust in the face of them. Segfaulting or getting
stuck in an infinite loop isn&rsquo;t allowed. While the source may not be valid
<em>code</em>, it&rsquo;s still a valid <em>input to the parser</em> because users use the
parser to learn what syntax is allowed.</p>
</li>
</ul>
<p>Those are the table stakes if you want to get in the parser game at all, but you
really want to raise the ante beyond that. A decent parser should:</p>
<ul>
<li>
<p><strong>Be fast.</strong> Computers are thousands of times faster than they were when
parser technology was first invented. The days of needing to optimize your
parser so that it could get through an entire source file during a coffee
break are over. But programmer expectations have risen as quickly, if not
faster. They expect their editors to reparse files in milliseconds after
every keystroke.</p>
</li>
<li>
<p><strong>Report as many distinct errors as there are.</strong> Aborting after the first
error is easy to implement, but it&rsquo;s annoying for users if every time they
fix what they think is the one error in a file, a new one appears. They
want to see them all.</p>
</li>
<li>
<p><strong>Minimize <em>cascaded</em> errors.</strong> Once a single error is found, the parser no
longer really knows what&rsquo;s going on. It tries to get itself back on track
and keep going, but if it gets confused, it may report a slew of ghost
errors that don&rsquo;t indicate other real problems in the code. When the first
error is fixed, those phantoms disappear, because they reflect only the
parser&rsquo;s own confusion. Cascaded errors are annoying because they can scare
the user into thinking their code is in a worse state than it is.</p>
</li>
</ul>
<p>The last two points are in tension. We want to report as many separate errors as
we can, but we don&rsquo;t want to report ones that are merely side effects of an
earlier one.</p>
<p>The way a parser responds to an error and keeps going to look for later errors
is called <strong>error recovery</strong>. This was a hot research topic in the &rsquo;60s. Back
then, you&rsquo;d hand a stack of punch cards to the secretary and come back the next
day to see if the compiler succeeded. With an iteration loop that slow, you
<em>really</em> wanted to find every single error in your code in one pass.</p>
<p>Today, when parsers complete before you&rsquo;ve even finished typing, it&rsquo;s less of an
issue. Simple, fast error recovery is fine.</p>
<h3><a href="#panic-mode-error-recovery" id="panic-mode-error-recovery"><small>6&#8202;.&#8202;3&#8202;.&#8202;1</small>Panic mode error recovery</a></h3>
<aside name="panic">
<p>You know you want to push it.</p><img src="image/parsing-expressions/panic.png" alt="A big shiny 'PANIC' button." />
</aside>
<p>Of all the recovery techniques devised in yesteryear, the one that best stood
the test of time is called<span class="em">&mdash;</span>somewhat alarmingly<span class="em">&mdash;</span><span name="panic"><strong>panic
mode</strong></span>. As soon as the parser detects an error, it enters panic mode. It
knows at least one token doesn&rsquo;t make sense given its current state in the
middle of some stack of grammar productions.</p>
<p>Before it can get back to parsing, it needs to get its state and the sequence of
forthcoming tokens aligned such that the next token does match the rule being
parsed. This process is called <strong>synchronization</strong>.</p>
<p>To do that, we select some rule in the grammar that will mark the
synchronization point. The parser fixes its parsing state by jumping out of any
nested productions until it gets back to that rule. Then it synchronizes the
token stream by discarding tokens until it reaches one that can appear at that
point in the rule.</p>
<p>Any additional real syntax errors hiding in those discarded tokens aren&rsquo;t
reported, but it also means that any mistaken cascaded errors that are side
effects of the initial error aren&rsquo;t <em>falsely</em> reported either, which is a decent
trade-off.</p>
<p>The traditional place in the grammar to synchronize is between statements. We
don&rsquo;t have those yet, so we won&rsquo;t actually synchronize in this chapter, but
we&rsquo;ll get the machinery in place for later.</p>
<h3><a href="#entering-panic-mode" id="entering-panic-mode"><small>6&#8202;.&#8202;3&#8202;.&#8202;2</small>Entering panic mode</a></h3>
<p>Back before we went on this side trip around error recovery, we were writing the
code to parse a parenthesized expression. After parsing the expression, the
parser looks for the closing <code>)</code> by calling <code>consume()</code>. Here, finally, is that
method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>match</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Token</span> <span class="i">consume</span>(<span class="t">TokenType</span> <span class="i">type</span>, <span class="t">String</span> <span class="i">message</span>) {
    <span class="k">if</span> (<span class="i">check</span>(<span class="i">type</span>)) <span class="k">return</span> <span class="i">advance</span>();

    <span class="k">throw</span> <span class="i">error</span>(<span class="i">peek</span>(), <span class="i">message</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>match</em>()</div>

<p>It&rsquo;s similar to <code>match()</code> in that it checks to see if the next token is of the
expected type. If so, it consumes the token and everything is groovy. If some
other token is there, then we&rsquo;ve hit an error. We report it by calling this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>previous</em>()</div>
<pre>  <span class="k">private</span> <span class="t">ParseError</span> <span class="i">error</span>(<span class="t">Token</span> <span class="i">token</span>, <span class="t">String</span> <span class="i">message</span>) {
    <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">token</span>, <span class="i">message</span>);
    <span class="k">return</span> <span class="k">new</span> <span class="t">ParseError</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>previous</em>()</div>

<p>First, that shows the error to the user by calling:</p>
<div class="codehilite"><div class="source-file"><em>lox/Lox.java</em><br>
add after <em>report</em>()</div>
<pre>  <span class="k">static</span> <span class="t">void</span> <span class="i">error</span>(<span class="t">Token</span> <span class="i">token</span>, <span class="t">String</span> <span class="i">message</span>) {
    <span class="k">if</span> (<span class="i">token</span>.<span class="i">type</span> == <span class="t">TokenType</span>.<span class="i">EOF</span>) {
      <span class="i">report</span>(<span class="i">token</span>.<span class="i">line</span>, <span class="s">&quot; at end&quot;</span>, <span class="i">message</span>);
    } <span class="k">else</span> {
      <span class="i">report</span>(<span class="i">token</span>.<span class="i">line</span>, <span class="s">&quot; at &#39;&quot;</span> + <span class="i">token</span>.<span class="i">lexeme</span> + <span class="s">&quot;&#39;&quot;</span>, <span class="i">message</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, add after <em>report</em>()</div>

<p>This reports an error at a given token. It shows the token&rsquo;s location and the
token itself. This will come in handy later since we use tokens throughout the
interpreter to track locations in code.</p>
<p>After we report the error, the user knows about their mistake, but what does the
<em>parser</em> do next? Back in <code>error()</code>, we create and return a ParseError, an
instance of this new class:</p>
<div class="codehilite"><pre class="insert-before">class Parser {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
nest inside class <em>Parser</em></div>
<pre class="insert">  <span class="k">private</span> <span class="k">static</span> <span class="k">class</span> <span class="t">ParseError</span> <span class="k">extends</span> <span class="t">RuntimeException</span> {}

</pre><pre class="insert-after">  private final List&lt;Token&gt; tokens;
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, nest inside class <em>Parser</em></div>

<p>This is a simple sentinel class we use to unwind the parser. The <code>error()</code>
method <em>returns</em> the error instead of <em>throwing</em> it because we want to let the
calling method inside the parser decide whether to unwind or not. Some parse
errors occur in places where the parser isn&rsquo;t likely to get into a weird state
and we don&rsquo;t need to <span name="production">synchronize</span>. In those
places, we simply report the error and keep on truckin&rsquo;.</p>
<p>For example, Lox limits the number of arguments you can pass to a function. If
you pass too many, the parser needs to report that error, but it can and should
simply keep on parsing the extra arguments instead of freaking out and going
into panic mode.</p>
<aside name="production">
<p>Another way to handle common syntax errors is with <strong>error productions</strong>. You
augment the grammar with a rule that <em>successfully</em> matches the <em>erroneous</em>
syntax. The parser safely parses it but then reports it as an error instead of
producing a syntax tree.</p>
<p>For example, some languages have a unary <code>+</code> operator, like <code>+123</code>, but Lox does
not. Instead of getting confused when the parser stumbles onto a <code>+</code> at the
beginning of an expression, we could extend the unary rule to allow it.</p>
<div class="codehilite"><pre><span class="i">unary</span> → ( <span class="s">&quot;!&quot;</span> | <span class="s">&quot;-&quot;</span> | <span class="s">&quot;+&quot;</span> ) <span class="i">unary</span>
      | <span class="i">primary</span> ;
</pre></div>
<p>This lets the parser consume <code>+</code> without going into panic mode or leaving the
parser in a weird state.</p>
<p>Error productions work well because you, the parser author, know <em>how</em> the code
is wrong and what the user was likely trying to do. That means you can give a
more helpful message to get the user back on track, like, &ldquo;Unary &lsquo;+&rsquo; expressions
are not supported.&rdquo; Mature parsers tend to accumulate error productions like
barnacles since they help users fix common mistakes.</p>
</aside>
<p>In our case, though, the syntax error is nasty enough that we want to panic and
synchronize. Discarding tokens is pretty easy, but how do we synchronize the
parser&rsquo;s own state?</p>
<h3><a href="#synchronizing-a-recursive-descent-parser" id="synchronizing-a-recursive-descent-parser"><small>6&#8202;.&#8202;3&#8202;.&#8202;3</small>Synchronizing a recursive descent parser</a></h3>
<p>With recursive descent, the parser&rsquo;s state<span class="em">&mdash;</span>which rules it is in the middle of
recognizing<span class="em">&mdash;</span>is not stored explicitly in fields. Instead, we use Java&rsquo;s
own call stack to track what the parser is doing. Each rule in the middle of
being parsed is a call frame on the stack. In order to reset that state, we need
to clear out those call frames.</p>
<p>The natural way to do that in Java is exceptions. When we want to synchronize,
we <em>throw</em> that ParseError object. Higher up in the method for the grammar rule
we are synchronizing to, we&rsquo;ll catch it. Since we synchronize on statement
boundaries, we&rsquo;ll catch the exception there. After the exception is caught, the
parser is in the right state. All that&rsquo;s left is to synchronize the tokens.</p>
<p>We want to discard tokens until we&rsquo;re right at the beginning of the next
statement. That boundary is pretty easy to spot<span class="em">&mdash;</span>it&rsquo;s one of the main reasons
we picked it. <em>After</em> a semicolon, we&rsquo;re <span name="semicolon">probably</span>
finished with a statement. Most statements start with a keyword<span class="em">&mdash;</span><code>for</code>, <code>if</code>,
<code>return</code>, <code>var</code>, etc. When the <em>next</em> token is any of those, we&rsquo;re probably
about to start a statement.</p>
<aside name="semicolon">
<p>I say &ldquo;probably&rdquo; because we could hit a semicolon separating clauses in a <code>for</code>
loop. Our synchronization isn&rsquo;t perfect, but that&rsquo;s OK. We&rsquo;ve already reported
the first error precisely, so everything after that is kind of &ldquo;best effort&rdquo;.</p>
</aside>
<p>This method encapsulates that logic:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>error</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">synchronize</span>() {
    <span class="i">advance</span>();

    <span class="k">while</span> (!<span class="i">isAtEnd</span>()) {
      <span class="k">if</span> (<span class="i">previous</span>().<span class="i">type</span> == <span class="i">SEMICOLON</span>) <span class="k">return</span>;

      <span class="k">switch</span> (<span class="i">peek</span>().<span class="i">type</span>) {
        <span class="k">case</span> <span class="i">CLASS</span>:
        <span class="k">case</span> <span class="i">FUN</span>:
        <span class="k">case</span> <span class="i">VAR</span>:
        <span class="k">case</span> <span class="i">FOR</span>:
        <span class="k">case</span> <span class="i">IF</span>:
        <span class="k">case</span> <span class="i">WHILE</span>:
        <span class="k">case</span> <span class="i">PRINT</span>:
        <span class="k">case</span> <span class="i">RETURN</span>:
          <span class="k">return</span>;
      }

      <span class="i">advance</span>();
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>error</em>()</div>

<p>It discards tokens until it thinks it has found a statement boundary. After
catching a ParseError, we&rsquo;ll call this and then we are hopefully back in sync.
When it works well, we have discarded tokens that would have likely caused
cascaded errors anyway, and now we can parse the rest of the file starting at
the next statement.</p>
<p>Alas, we don&rsquo;t get to see this method in action, since we don&rsquo;t have statements
yet. We&rsquo;ll get to that <a href="statements-and-state.html">in a couple of chapters</a>. For now, if an
error occurs, we&rsquo;ll panic and unwind all the way to the top and stop parsing.
Since we can parse only a single expression anyway, that&rsquo;s no big loss.</p>
<h2><a href="#wiring-up-the-parser" id="wiring-up-the-parser"><small>6&#8202;.&#8202;4</small>Wiring up the Parser</a></h2>
<p>We are mostly done parsing expressions now. There is one other place where we
need to add a little error handling. As the parser descends through the parsing
methods for each grammar rule, it eventually hits <code>primary()</code>. If none of the
cases in there match, it means we are sitting on a token that can&rsquo;t start an
expression. We need to handle that error too.</p>
<div class="codehilite"><pre class="insert-before">    if (match(LEFT_PAREN)) {
      Expr expr = expression();
      consume(RIGHT_PAREN, &quot;Expect ')' after expression.&quot;);
      return new Expr.Grouping(expr);
    }
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>primary</em>()</div>
<pre class="insert">

    <span class="k">throw</span> <span class="i">error</span>(<span class="i">peek</span>(), <span class="s">&quot;Expect expression.&quot;</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>primary</em>()</div>

<p>With that, all that remains in the parser is to define an initial method to kick
it off. That method is called, naturally enough, <code>parse()</code>.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>Parser</em>()</div>
<pre>  <span class="t">Expr</span> <span class="i">parse</span>() {
    <span class="k">try</span> {
      <span class="k">return</span> <span class="i">expression</span>();
    } <span class="k">catch</span> (<span class="t">ParseError</span> <span class="i">error</span>) {
      <span class="k">return</span> <span class="k">null</span>;
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>Parser</em>()</div>

<p>We&rsquo;ll revisit this method later when we add statements to the language. For now,
it parses a single expression and returns it. We also have some temporary code
to exit out of panic mode. Syntax error recovery is the parser&rsquo;s job, so we
don&rsquo;t want the ParseError exception to escape into the rest of the interpreter.</p>
<p>When a syntax error does occur, this method returns <code>null</code>. That&rsquo;s OK. The
parser promises not to crash or hang on invalid syntax, but it doesn&rsquo;t promise
to return a <em>usable syntax tree</em> if an error is found. As soon as the parser
reports an error, <code>hadError</code> gets set, and subsequent phases are skipped.</p>
<p>Finally, we can hook up our brand new parser to the main Lox class and try it
out. We still don&rsquo;t have an interpreter, so for now, we&rsquo;ll parse to a syntax
tree and then use the AstPrinter class from the <a href="representing-code.html#a-not-very-pretty-printer">last chapter</a> to
display it.</p>
<p>Delete the old code to print the scanned tokens and replace it with this:</p>
<div class="codehilite"><pre class="insert-before">    List&lt;Token&gt; tokens = scanner.scanTokens();
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>run</em>()<br>
replace 5 lines</div>
<pre class="insert">    <span class="t">Parser</span> <span class="i">parser</span> = <span class="k">new</span> <span class="t">Parser</span>(<span class="i">tokens</span>);
    <span class="t">Expr</span> <span class="i">expression</span> = <span class="i">parser</span>.<span class="i">parse</span>();

    <span class="c">// Stop if there was a syntax error.</span>
    <span class="k">if</span> (<span class="i">hadError</span>) <span class="k">return</span>;

    <span class="t">System</span>.<span class="i">out</span>.<span class="i">println</span>(<span class="k">new</span> <span class="t">AstPrinter</span>().<span class="i">print</span>(<span class="i">expression</span>));
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>run</em>(), replace 5 lines</div>

<p>Congratulations, you have crossed the <span name="harder">threshold</span>! That
really is all there is to handwriting a parser. We&rsquo;ll extend the grammar in
later chapters with assignment, statements, and other stuff, but none of that is
any more complex than the binary operators we tackled here.</p>
<aside name="harder">
<p>It is possible to define a more complex grammar than Lox&rsquo;s that&rsquo;s difficult to
parse using recursive descent. Predictive parsing gets tricky when you may need
to look ahead a large number of tokens to figure out what you&rsquo;re sitting on.</p>
<p>In practice, most languages are designed to avoid that. Even in cases where they
aren&rsquo;t, you can usually hack around it without too much pain. If you can parse
C++ using recursive descent<span class="em">&mdash;</span>which many C++ compilers do<span class="em">&mdash;</span>you can parse
anything.</p>
</aside>
<p>Fire up the interpreter and type in some expressions. See how it handles
precedence and associativity correctly? Not bad for less than 200 lines of code.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>In C, a block is a statement form that allows you to pack a series of
statements where a single one is expected. The <a href="https://en.wikipedia.org/wiki/Comma_operator">comma operator</a> is an
analogous syntax for expressions. A comma-separated series of expressions
can be given where a single expression is expected (except inside a function
call&rsquo;s argument list). At runtime, the comma operator evaluates the left
operand and discards the result. Then it evaluates and returns the right
operand.</p>
<p>Add support for comma expressions. Give them the same precedence and
associativity as in C. Write the grammar, and then implement the necessary
parsing code.</p>
</li>
<li>
<p>Likewise, add support for the C-style conditional or &ldquo;ternary&rdquo; operator
<code>?:</code>. What precedence level is allowed between the <code>?</code> and <code>:</code>? Is the whole
operator left-associative or right-associative?</p>
</li>
<li>
<p>Add error productions to handle each binary operator appearing without a
left-hand operand. In other words, detect a binary operator appearing at the
beginning of an expression. Report that as an error, but also parse and
discard a right-hand operand with the appropriate precedence.</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Logic Versus History</a></h2>
<p>Let&rsquo;s say we decide to add bitwise <code>&amp;</code> and <code>|</code> operators to Lox. Where should we
put them in the precedence hierarchy? C<span class="em">&mdash;</span>and most languages that follow in C&rsquo;s
footsteps<span class="em">&mdash;</span>place them below <code>==</code>. This is widely considered a mistake because
it means common operations like testing a flag require parentheses.</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">flags</span> &amp; <span class="a">FLAG_MASK</span> == <span class="a">SOME_FLAG</span>) { ... } <span class="c">// Wrong.</span>
<span class="k">if</span> ((<span class="i">flags</span> &amp; <span class="a">FLAG_MASK</span>) == <span class="a">SOME_FLAG</span>) { ... } <span class="c">// Right.</span>
</pre></div>
<p>Should we fix this for Lox and put bitwise operators higher up the precedence
table than C does? There are two strategies we can take.</p>
<p>You almost never want to use the result of an <code>==</code> expression as the operand to
a bitwise operator. By making bitwise bind tighter, users don&rsquo;t need to
parenthesize as often. So if we do that, and users assume the precedence is
chosen logically to minimize parentheses, they&rsquo;re likely to infer it correctly.</p>
<p>This kind of internal consistency makes the language easier to learn because
there are fewer edge cases and exceptions users have to stumble into and then
correct. That&rsquo;s good, because before users can use our language, they have to
load all of that syntax and semantics into their heads. A simpler, more rational
language <em>makes sense</em>.</p>
<p>But, for many users there is an even faster shortcut to getting our language&rsquo;s
ideas into their wetware<span class="em">&mdash;</span><em>use concepts they already know</em>. Many newcomers to
our language will be coming from some other language or languages. If our
language uses some of the same syntax or semantics as those, there is much less
for the user to learn (and <em>unlearn</em>).</p>
<p>This is particularly helpful with syntax. You may not remember it well today,
but way back when you learned your very first programming language, code
probably looked alien and unapproachable. Only through painstaking effort did
you learn to read and accept it. If you design a novel syntax for your new
language, you force users to start that process all over again.</p>
<p>Taking advantage of what users already know is one of the most powerful tools
you can use to ease adoption of your language. It&rsquo;s almost impossible to
overestimate how valuable this is. But it faces you with a nasty problem: What
happens when the thing the users all know <em>kind of sucks</em>? C&rsquo;s bitwise operator
precedence is a mistake that doesn&rsquo;t make sense. But it&rsquo;s a <em>familiar</em> mistake
that millions have already gotten used to and learned to live with.</p>
<p>Do you stay true to your language&rsquo;s own internal logic and ignore history? Do
you start from a blank slate and first principles? Or do you weave your language
into the rich tapestry of programming history and give your users a leg up by
starting from something they already know?</p>
<p>There is no perfect answer here, only trade-offs. You and I are obviously biased
towards liking novel languages, so our natural inclination is to burn the
history books and start our own story.</p>
<p>In practice, it&rsquo;s often better to make the most of what users already know.
Getting them to come to your language requires a big leap. The smaller you can
make that chasm, the more people will be willing to cross it. But you can&rsquo;t
<em>always</em> stick to history, or your language won&rsquo;t have anything new and
compelling to give people a <em>reason</em> to jump over.</p>
</div>

<footer>
<a href="evaluating-expressions.html" class="next">
  Next Chapter: &ldquo;Evaluating Expressions&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/representing-code.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Representing Code &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Representing Code<small>5</small></a></h3>

<ul>
    <li><a href="#context-free-grammars"><small>5.1</small> Context-Free Grammars</a></li>
    <li><a href="#implementing-syntax-trees"><small>5.2</small> Implementing Syntax Trees</a></li>
    <li><a href="#working-with-trees"><small>5.3</small> Working with Trees</a></li>
    <li><a href="#a-not-very-pretty-printer"><small>5.4</small> A (Not Very) Pretty Printer</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="scanning.html" title="Scanning" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="parsing-expressions.html" title="Parsing Expressions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="scanning.html" title="Scanning" class="prev">←</a>
<a href="parsing-expressions.html" title="Parsing Expressions" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Representing Code<small>5</small></a></h3>

<ul>
    <li><a href="#context-free-grammars"><small>5.1</small> Context-Free Grammars</a></li>
    <li><a href="#implementing-syntax-trees"><small>5.2</small> Implementing Syntax Trees</a></li>
    <li><a href="#working-with-trees"><small>5.3</small> Working with Trees</a></li>
    <li><a href="#a-not-very-pretty-printer"><small>5.4</small> A (Not Very) Pretty Printer</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="scanning.html" title="Scanning" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="parsing-expressions.html" title="Parsing Expressions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">5</div>
  <h1>Representing Code</h1>

<blockquote>
<p>To dwellers in a wood, almost every species of tree has its voice as well as
its feature.
<cite>Thomas Hardy, <em>Under the Greenwood Tree</em></cite></p>
</blockquote>
<p>In the <a href="scanning.html">last chapter</a>, we took the raw source code as a string and
transformed it into a slightly higher-level representation: a series of tokens.
The parser we&rsquo;ll write in the <a href="parsing-expressions.html">next chapter</a> takes those tokens and
transforms them yet again, into an even richer, more complex representation.</p>
<p>Before we can produce that representation, we need to define it. That&rsquo;s the
subject of this chapter. Along the way, we&rsquo;ll <span name="boring">cover</span>
some theory around formal grammars, feel the difference between functional and
object-oriented programming, go over a couple of design patterns, and do some
metaprogramming.</p>
<aside name="boring">
<p>I was so worried about this being one of the most boring chapters in the book
that I kept stuffing more fun ideas into it until I ran out of room.</p>
</aside>
<p>Before we do all that, let&rsquo;s focus on the main goal<span class="em">&mdash;</span>a representation for
code. It should be simple for the parser to produce and easy for the
interpreter to consume. If you haven&rsquo;t written a parser or interpreter yet,
those requirements aren&rsquo;t exactly illuminating. Maybe your intuition can help.
What is your brain doing when you play the part of a <em>human</em> interpreter? How do
you mentally evaluate an arithmetic expression like this:</p>
<div class="codehilite"><pre><span class="n">1</span> + <span class="n">2</span> * <span class="n">3</span> - <span class="n">4</span>
</pre></div>
<p>Because you understand the order of operations<span class="em">&mdash;</span>the old &ldquo;<a href="https://en.wikipedia.org/wiki/Order_of_operations#Mnemonics">Please Excuse My
Dear Aunt Sally</a>&rdquo; stuff<span class="em">&mdash;</span>you know that the multiplication is evaluated
before the addition or subtraction. One way to visualize that precedence is
using a tree. Leaf nodes are numbers, and interior nodes are operators with
branches for each of their operands.</p>
<p>In order to evaluate an arithmetic node, you need to know the numeric values of
its subtrees, so you have to evaluate those first. That means working your way
from the leaves up to the root<span class="em">&mdash;</span>a <em>post-order</em> traversal:</p>
<p><span name="tree-steps"></span></p><img src="image/representing-code/tree-evaluate.png" alt="Evaluating the tree from the bottom up." />
<aside name="tree-steps">
<p>A. Starting with the full tree, evaluate the bottom-most operation, <code>2 * 3</code>.</p>
<p>B. Now we can evaluate the <code>+</code>.</p>
<p>C. Next, the <code>-</code>.</p>
<p>D. The final answer.</p>
</aside>
<p>If I gave you an arithmetic expression, you could draw one of these trees pretty
easily. Given a tree, you can evaluate it without breaking a sweat. So it
intuitively seems like a workable representation of our code is a <span
name="only">tree</span> that matches the grammatical structure<span class="em">&mdash;</span>the operator
nesting<span class="em">&mdash;</span>of the language.</p>
<aside name="only">
<p>That&rsquo;s not to say a tree is the <em>only</em> possible representation of our code. In
<a href="a-bytecode-virtual-machine.html">Part III</a>, we&rsquo;ll generate bytecode, another representation that isn&rsquo;t as
human friendly but is closer to the machine.</p>
</aside>
<p>We need to get more precise about what that grammar is then. Like lexical
grammars in the last chapter, there is a long ton of theory around syntactic
grammars. We&rsquo;re going into that theory a little more than we did when scanning
because it turns out to be a useful tool throughout much of the interpreter.
We start by moving one level up the <a href="https://en.wikipedia.org/wiki/Chomsky_hierarchy">Chomsky hierarchy</a><span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h2><a href="#context-free-grammars" id="context-free-grammars"><small>5&#8202;.&#8202;1</small>Context-Free Grammars</a></h2>
<p>In the last chapter, the formalism we used for defining the lexical grammar<span class="em">&mdash;</span>the rules for how characters get grouped into tokens<span class="em">&mdash;</span>was called a <em>regular
language</em>. That was fine for our scanner, which emits a flat sequence of tokens.
But regular languages aren&rsquo;t powerful enough to handle expressions which can
nest arbitrarily deeply.</p>
<p>We need a bigger hammer, and that hammer is a <strong>context-free grammar</strong>
(<strong>CFG</strong>). It&rsquo;s the next heaviest tool in the toolbox of
<strong><a href="https://en.wikipedia.org/wiki/Formal_grammar">formal grammars</a></strong>. A formal grammar takes a set of atomic pieces it calls
its &ldquo;alphabet&rdquo;. Then it defines a (usually infinite) set of &ldquo;strings&rdquo; that are
&ldquo;in&rdquo; the grammar. Each string is a sequence of &ldquo;letters&rdquo; in the alphabet.</p>
<p>I&rsquo;m using all those quotes because the terms get a little confusing as you move
from lexical to syntactic grammars. In our scanner&rsquo;s grammar, the alphabet
consists of individual characters and the strings are the valid lexemes<span class="em">&mdash;</span>roughly &ldquo;words&rdquo;. In the syntactic grammar we&rsquo;re talking about now, we&rsquo;re at a
different level of granularity. Now each &ldquo;letter&rdquo; in the alphabet is an entire
token and a &ldquo;string&rdquo; is a sequence of <em>tokens</em><span class="em">&mdash;</span>an entire expression.</p>
<p>Oof. Maybe a table will help:</p><table>
<thead>
<tr>
  <td>Terminology</td>
  <td></td>
  <td>Lexical grammar</td>
  <td>Syntactic grammar</td>
</tr>
</thead>
<tbody>
<tr>
  <td>The &ldquo;alphabet&rdquo; is<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.</span></td>
  <td>&rarr;&ensp;</td>
  <td>Characters</td>
  <td>Tokens</td>
</tr>
<tr>
  <td>A &ldquo;string&rdquo; is<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.</span></td>
  <td>&rarr;&ensp;</td>
  <td>Lexeme or token</td>
  <td>Expression</td>
</tr>
<tr>
  <td>It&rsquo;s implemented by the<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.</span></td>
  <td>&rarr;&ensp;</td>
  <td>Scanner</td>
  <td>Parser</td>
</tr>
</tbody>
</table>
<p>A formal grammar&rsquo;s job is to specify which strings are valid and which aren&rsquo;t.
If we were defining a grammar for English sentences, &ldquo;eggs are tasty for
breakfast&rdquo; would be in the grammar, but &ldquo;tasty breakfast for are eggs&rdquo; would
probably not.</p>
<h3><a href="#rules-for-grammars" id="rules-for-grammars"><small>5&#8202;.&#8202;1&#8202;.&#8202;1</small>Rules for grammars</a></h3>
<p>How do we write down a grammar that contains an infinite number of valid
strings? We obviously can&rsquo;t list them all out. Instead, we create a finite set
of rules. You can think of them as a game that you can &ldquo;play&rdquo; in one of two
directions.</p>
<p>If you start with the rules, you can use them to <em>generate</em> strings that are in
the grammar. Strings created this way are called <strong>derivations</strong> because each is
<em>derived</em> from the rules of the grammar. In each step of the game, you pick a
rule and follow what it tells you to do. Most of the lingo around formal
grammars comes from playing them in this direction. Rules are called
<strong>productions</strong> because they <em>produce</em> strings in the grammar.</p>
<p>Each production in a context-free grammar has a <strong>head</strong><span class="em">&mdash;</span>its <span
name="name">name</span><span class="em">&mdash;</span>and a <strong>body</strong>, which describes what it generates. In
its pure form, the body is simply a list of symbols. Symbols come in two
delectable flavors:</p>
<aside name="name">
<p>Restricting heads to a single symbol is a defining feature of context-free
grammars. More powerful formalisms like <strong><a href="https://en.wikipedia.org/wiki/Unrestricted_grammar">unrestricted grammars</a></strong> allow a
sequence of symbols in the head as well as in the body.</p>
</aside>
<ul>
<li>
<p>A <strong>terminal</strong> is a letter from the grammar&rsquo;s alphabet. You can think of it
like a literal value. In the syntactic grammar we&rsquo;re defining, the terminals
are individual lexemes<span class="em">&mdash;</span>tokens coming from the scanner like <code>if</code> or
<code>1234</code>.</p>
<p>These are called &ldquo;terminals&rdquo;, in the sense of an &ldquo;end point&rdquo; because they
don&rsquo;t lead to any further &ldquo;moves&rdquo; in the game. You simply produce that one
symbol.</p>
</li>
<li>
<p>A <strong>nonterminal</strong> is a named reference to another rule in the grammar. It
means &ldquo;play that rule and insert whatever it produces here&rdquo;. In this way,
the grammar composes.</p>
</li>
</ul>
<p>There is one last refinement: you may have multiple rules with the same name.
When you reach a nonterminal with that name, you are allowed to pick any of the
rules for it, whichever floats your boat.</p>
<p>To make this concrete, we need a <span name="turtles">way</span> to write down
these production rules. People have been trying to crystallize grammar all the
way back to Pāṇini&rsquo;s <em>Ashtadhyayi</em>, which codified Sanskrit grammar a mere
couple thousand years ago. Not much progress happened until John Backus and
company needed a notation for specifying ALGOL 58 and came up with
<a href="https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form"><strong>Backus-Naur form</strong></a> (<strong>BNF</strong>). Since then, nearly everyone uses some
flavor of BNF, tweaked to their own tastes.</p>
<p>I tried to come up with something clean. Each rule is a name, followed by an
arrow (<code>→</code>), followed by a sequence of symbols, and finally ending with a
semicolon (<code>;</code>). Terminals are quoted strings, and nonterminals are lowercase
words.</p>
<aside name="turtles">
<p>Yes, we need to define a syntax to use for the rules that define our syntax.
Should we specify that <em>metasyntax</em> too? What notation do we use for <em>it?</em> It&rsquo;s
languages all the way down!</p>
</aside>
<p>Using that, here&rsquo;s a grammar for <span name="breakfast">breakfast</span> menus:</p>
<aside name="breakfast">
<p>Yes, I really am going to be using breakfast examples throughout this entire
book. Sorry.</p>
</aside>
<div class="codehilite"><pre><span class="i">breakfast</span>  → <span class="i">protein</span> <span class="s">&quot;with&quot;</span> <span class="i">breakfast</span> <span class="s">&quot;on the side&quot;</span> ;
<span class="i">breakfast</span>  → <span class="i">protein</span> ;
<span class="i">breakfast</span>  → <span class="i">bread</span> ;

<span class="i">protein</span>    → <span class="i">crispiness</span> <span class="s">&quot;crispy&quot;</span> <span class="s">&quot;bacon&quot;</span> ;
<span class="i">protein</span>    → <span class="s">&quot;sausage&quot;</span> ;
<span class="i">protein</span>    → <span class="i">cooked</span> <span class="s">&quot;eggs&quot;</span> ;

<span class="i">crispiness</span> → <span class="s">&quot;really&quot;</span> ;
<span class="i">crispiness</span> → <span class="s">&quot;really&quot;</span> <span class="i">crispiness</span> ;

<span class="i">cooked</span>     → <span class="s">&quot;scrambled&quot;</span> ;
<span class="i">cooked</span>     → <span class="s">&quot;poached&quot;</span> ;
<span class="i">cooked</span>     → <span class="s">&quot;fried&quot;</span> ;

<span class="i">bread</span>      → <span class="s">&quot;toast&quot;</span> ;
<span class="i">bread</span>      → <span class="s">&quot;biscuits&quot;</span> ;
<span class="i">bread</span>      → <span class="s">&quot;English muffin&quot;</span> ;
</pre></div>
<p>We can use this grammar to generate random breakfasts. Let&rsquo;s play a round and
see how it works. By age-old convention, the game starts with the first rule in
the grammar, here <code>breakfast</code>. There are three productions for that, and we
randomly pick the first one. Our resulting string looks like:</p>
<div class="codehilite"><pre>protein &quot;with&quot; breakfast &quot;on the side&quot;
</pre></div>
<p>We need to expand that first nonterminal, <code>protein</code>, so we pick a production for
that. Let&rsquo;s pick:</p>
<div class="codehilite"><pre><span class="i">protein</span> → <span class="i">cooked</span> <span class="s">&quot;eggs&quot;</span> ;
</pre></div>
<p>Next, we need a production for <code>cooked</code>, and so we pick <code>"poached"</code>. That&rsquo;s a
terminal, so we add that. Now our string looks like:</p>
<div class="codehilite"><pre>&quot;poached&quot; &quot;eggs&quot; &quot;with&quot; breakfast &quot;on the side&quot;
</pre></div>
<p>The next non-terminal is <code>breakfast</code> again. The first <code>breakfast</code> production we
chose recursively refers back to the <code>breakfast</code> rule. Recursion in the grammar
is a good sign that the language being defined is context-free instead of
regular. In particular, recursion where the recursive nonterminal has
productions on <span name="nest">both</span> sides implies that the language is
not regular.</p>
<aside name="nest">
<p>Imagine that we&rsquo;ve recursively expanded the <code>breakfast</code> rule here several times,
like &ldquo;bacon with bacon with bacon with<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>&rdquo; In order to complete the string
correctly, we need to add an <em>equal</em> number of &ldquo;on the side&rdquo; bits to the end.
Tracking the number of required trailing parts is beyond the capabilities of a
regular grammar. Regular grammars can express <em>repetition</em>, but they can&rsquo;t <em>keep
count</em> of how many repetitions there are, which is necessary to ensure that the
string has the same number of <code>with</code> and <code>on the side</code> parts.</p>
</aside>
<p>We could keep picking the first production for <code>breakfast</code> over and over again
yielding all manner of breakfasts like &ldquo;bacon with sausage with scrambled eggs
with bacon<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>&rdquo; We won&rsquo;t though. This time we&rsquo;ll pick <code>bread</code>. There are three
rules for that, each of which contains only a terminal. We&rsquo;ll pick &ldquo;English
muffin&rdquo;.</p>
<p>With that, every nonterminal in the string has been expanded until it finally
contains only terminals and we&rsquo;re left with:</p><img src="image/representing-code/breakfast.png" alt='"Playing" the grammar to generate a string.' />
<p>Throw in some ham and Hollandaise, and you&rsquo;ve got eggs Benedict.</p>
<p>Any time we hit a rule that had multiple productions, we just picked one
arbitrarily. It is this flexibility that allows a short number of grammar rules
to encode a combinatorially larger set of strings. The fact that a rule can
refer to itself<span class="em">&mdash;</span>directly or indirectly<span class="em">&mdash;</span>kicks it up even more, letting us
pack an infinite number of strings into a finite grammar.</p>
<h3><a href="#enhancing-our-notation" id="enhancing-our-notation"><small>5&#8202;.&#8202;1&#8202;.&#8202;2</small>Enhancing our notation</a></h3>
<p>Stuffing an infinite set of strings in a handful of rules is pretty fantastic,
but let&rsquo;s take it further. Our notation works, but it&rsquo;s tedious. So, like any
good language designer, we&rsquo;ll sprinkle a little syntactic sugar on top<span class="em">&mdash;</span>some
extra convenience notation. In addition to terminals and nonterminals, we&rsquo;ll
allow a few other kinds of expressions in the body of a rule:</p>
<ul>
<li>
<p>Instead of repeating the rule name each time we want to add another
production for it, we&rsquo;ll allow a series of productions separated by a pipe
(<code>|</code>).</p>
<div class="codehilite"><pre><span class="i">bread</span> → <span class="s">&quot;toast&quot;</span> | <span class="s">&quot;biscuits&quot;</span> | <span class="s">&quot;English muffin&quot;</span> ;
</pre></div>
</li>
<li>
<p>Further, we&rsquo;ll allow parentheses for grouping and then allow <code>|</code> within that
to select one from a series of options within the middle of a production.</p>
<div class="codehilite"><pre><span class="i">protein</span> → ( <span class="s">&quot;scrambled&quot;</span> | <span class="s">&quot;poached&quot;</span> | <span class="s">&quot;fried&quot;</span> ) <span class="s">&quot;eggs&quot;</span> ;
</pre></div>
</li>
<li>
<p>Using recursion to support repeated sequences of symbols has a certain
appealing <span name="purity">purity</span>, but it&rsquo;s kind of a chore to
make a separate named sub-rule each time we want to loop. So, we also use a
postfix <code>*</code> to allow the previous symbol or group to be repeated zero or
more times.</p>
<div class="codehilite"><pre><span class="i">crispiness</span> → <span class="s">&quot;really&quot;</span> <span class="s">&quot;really&quot;</span>* ;
</pre></div>
</li>
</ul>
<aside name="purity">
<p>This is how the Scheme programming language works. It has no built-in looping
functionality at all. Instead, <em>all</em> repetition is expressed in terms of
recursion.</p>
</aside>
<ul>
<li>
<p>A postfix <code>+</code> is similar, but requires the preceding production to appear
at least once.</p>
<div class="codehilite"><pre><span class="i">crispiness</span> → <span class="s">&quot;really&quot;</span>+ ;
</pre></div>
</li>
<li>
<p>A postfix <code>?</code> is for an optional production. The thing before it can appear
zero or one time, but not more.</p>
<div class="codehilite"><pre><span class="i">breakfast</span> → <span class="i">protein</span> ( <span class="s">&quot;with&quot;</span> <span class="i">breakfast</span> <span class="s">&quot;on the side&quot;</span> )? ;
</pre></div>
</li>
</ul>
<p>With all of those syntactic niceties, our breakfast grammar condenses down to:</p>
<div class="codehilite"><pre><span class="i">breakfast</span> → <span class="i">protein</span> ( <span class="s">&quot;with&quot;</span> <span class="i">breakfast</span> <span class="s">&quot;on the side&quot;</span> )?
          | <span class="i">bread</span> ;

<span class="i">protein</span>   → <span class="s">&quot;really&quot;</span>+ <span class="s">&quot;crispy&quot;</span> <span class="s">&quot;bacon&quot;</span>
          | <span class="s">&quot;sausage&quot;</span>
          | ( <span class="s">&quot;scrambled&quot;</span> | <span class="s">&quot;poached&quot;</span> | <span class="s">&quot;fried&quot;</span> ) <span class="s">&quot;eggs&quot;</span> ;

<span class="i">bread</span>     → <span class="s">&quot;toast&quot;</span> | <span class="s">&quot;biscuits&quot;</span> | <span class="s">&quot;English muffin&quot;</span> ;
</pre></div>
<p>Not too bad, I hope. If you&rsquo;re used to grep or using <a href="https://en.wikipedia.org/wiki/Regular_expression#Standards">regular
expressions</a> in your text editor, most of the punctuation should be
familiar. The main difference is that symbols here represent entire tokens, not
single characters.</p>
<p>We&rsquo;ll use this notation throughout the rest of the book to precisely describe
Lox&rsquo;s grammar. As you work on programming languages, you&rsquo;ll find that
context-free grammars (using this or <a href="https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form">EBNF</a> or some other notation) help you
crystallize your informal syntax design ideas. They are also a handy medium for
communicating with other language hackers about syntax.</p>
<p>The rules and productions we define for Lox are also our guide to the tree data
structure we&rsquo;re going to implement to represent code in memory. Before we can do
that, we need an actual grammar for Lox, or at least enough of one for us to get
started.</p>
<h3><a href="#a-grammar-for-lox-expressions" id="a-grammar-for-lox-expressions"><small>5&#8202;.&#8202;1&#8202;.&#8202;3</small>A Grammar for Lox expressions</a></h3>
<p>In the previous chapter, we did Lox&rsquo;s entire lexical grammar in one fell swoop.
Every keyword and bit of punctuation is there. The syntactic grammar is larger,
and it would be a real bore to grind through the entire thing before we actually
get our interpreter up and running.</p>
<p>Instead, we&rsquo;ll crank through a subset of the language in the next couple of
chapters. Once we have that mini-language represented, parsed, and interpreted,
then later chapters will progressively add new features to it, including the new
syntax. For now, we are going to worry about only a handful of expressions:</p>
<ul>
<li>
<p><strong>Literals.</strong> Numbers, strings, Booleans, and <code>nil</code>.</p>
</li>
<li>
<p><strong>Unary expressions.</strong> A prefix <code>!</code> to perform a logical not, and <code>-</code> to
negate a number.</p>
</li>
<li>
<p><strong>Binary expressions.</strong> The infix arithmetic (<code>+</code>, <code>-</code>, <code>*</code>, <code>/</code>) and logic
operators (<code>==</code>, <code>!=</code>, <code>&lt;</code>, <code>&lt;=</code>, <code>&gt;</code>, <code>&gt;=</code>) we know and love.</p>
</li>
<li>
<p><strong>Parentheses.</strong> A pair of <code>(</code> and <code>)</code> wrapped around an expression.</p>
</li>
</ul>
<p>That gives us enough syntax for expressions like:</p>
<div class="codehilite"><pre><span class="n">1</span> - (<span class="n">2</span> * <span class="n">3</span>) &lt; <span class="n">4</span> == <span class="k">false</span>
</pre></div>
<p>Using our handy dandy new notation, here&rsquo;s a grammar for those:</p>
<div class="codehilite"><pre><span class="i">expression</span>     → <span class="i">literal</span>
               | <span class="i">unary</span>
               | <span class="i">binary</span>
               | <span class="i">grouping</span> ;

<span class="i">literal</span>        → <span class="t">NUMBER</span> | <span class="t">STRING</span> | <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span> ;
<span class="i">grouping</span>       → <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span> ;
<span class="i">unary</span>          → ( <span class="s">&quot;-&quot;</span> | <span class="s">&quot;!&quot;</span> ) <span class="i">expression</span> ;
<span class="i">binary</span>         → <span class="i">expression</span> <span class="i">operator</span> <span class="i">expression</span> ;
<span class="i">operator</span>       → <span class="s">&quot;==&quot;</span> | <span class="s">&quot;!=&quot;</span> | <span class="s">&quot;&lt;&quot;</span> | <span class="s">&quot;&lt;=&quot;</span> | <span class="s">&quot;&gt;&quot;</span> | <span class="s">&quot;&gt;=&quot;</span>
               | <span class="s">&quot;+&quot;</span>  | <span class="s">&quot;-&quot;</span>  | <span class="s">&quot;*&quot;</span> | <span class="s">&quot;/&quot;</span> ;
</pre></div>
<p>There&rsquo;s one bit of extra <span name="play">metasyntax</span> here. In addition
to quoted strings for terminals that match exact lexemes, we <code>CAPITALIZE</code>
terminals that are a single lexeme whose text representation may vary. <code>NUMBER</code>
is any number literal, and <code>STRING</code> is any string literal. Later, we&rsquo;ll do the
same for <code>IDENTIFIER</code>.</p>
<p>This grammar is actually ambiguous, which we&rsquo;ll see when we get to parsing it.
But it&rsquo;s good enough for now.</p>
<aside name="play">
<p>If you&rsquo;re so inclined, try using this grammar to generate a few expressions like
we did with the breakfast grammar before. Do the resulting expressions look
right to you? Can you make it generate anything wrong like <code>1 + / 3</code>?</p>
</aside>
<h2><a href="#implementing-syntax-trees" id="implementing-syntax-trees"><small>5&#8202;.&#8202;2</small>Implementing Syntax Trees</a></h2>
<p>Finally, we get to write some code. That little expression grammar is our
skeleton. Since the grammar is recursive<span class="em">&mdash;</span>note how <code>grouping</code>, <code>unary</code>, and
<code>binary</code> all refer back to <code>expression</code><span class="em">&mdash;</span>our data structure will form a tree.
Since this structure represents the syntax of our language, it&rsquo;s called a <span
name="ast"><strong>syntax tree</strong></span>.</p>
<aside name="ast">
<p>In particular, we&rsquo;re defining an <strong>abstract syntax tree</strong> (<strong>AST</strong>). In a
<strong>parse tree</strong>, every single grammar production becomes a node in the tree. An
AST elides productions that aren&rsquo;t needed by later phases.</p>
</aside>
<p>Our scanner used a single Token class to represent all kinds of lexemes. To
distinguish the different kinds<span class="em">&mdash;</span>think the number <code>123</code> versus the string
<code>"123"</code><span class="em">&mdash;</span>we included a simple TokenType enum. Syntax trees are not so <span
name="token-data">homogeneous</span>. Unary expressions have a single operand,
binary expressions have two, and literals have none.</p>
<p>We <em>could</em> mush that all together into a single Expression class with an
arbitrary list of children. Some compilers do. But I like getting the most out
of Java&rsquo;s type system. So we&rsquo;ll define a base class for expressions. Then, for
each kind of expression<span class="em">&mdash;</span>each production under <code>expression</code><span class="em">&mdash;</span>we create a
subclass that has fields for the nonterminals specific to that rule. This way,
we get a compile error if we, say, try to access the second operand of a unary
expression.</p>
<aside name="token-data">
<p>Tokens aren&rsquo;t entirely homogeneous either. Tokens for literals store the value,
but other kinds of lexemes don&rsquo;t need that state. I have seen scanners that use
different classes for literals and other kinds of lexemes, but I figured I&rsquo;d
keep things simpler.</p>
</aside>
<p>Something like this:</p>
<div class="codehilite"><pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">abstract</span> <span class="k">class</span> <span class="t">Expr</span> {<span name="expr"> </span>
  <span class="k">static</span> <span class="k">class</span> <span class="t">Binary</span> <span class="k">extends</span> <span class="t">Expr</span> {
    <span class="t">Binary</span>(<span class="t">Expr</span> <span class="i">left</span>, <span class="t">Token</span> <span class="i">operator</span>, <span class="t">Expr</span> <span class="i">right</span>) {
      <span class="k">this</span>.<span class="i">left</span> = <span class="i">left</span>;
      <span class="k">this</span>.<span class="i">operator</span> = <span class="i">operator</span>;
      <span class="k">this</span>.<span class="i">right</span> = <span class="i">right</span>;
    }

    <span class="k">final</span> <span class="t">Expr</span> <span class="i">left</span>;
    <span class="k">final</span> <span class="t">Token</span> <span class="i">operator</span>;
    <span class="k">final</span> <span class="t">Expr</span> <span class="i">right</span>;
  }

  <span class="c">// Other expressions...</span>
}
</pre></div>
<aside name="expr">
<p>I avoid abbreviations in my code because they trip up a reader who doesn&rsquo;t know
what they stand for. But in compilers I&rsquo;ve looked at, &ldquo;Expr&rdquo; and &ldquo;Stmt&rdquo; are so
ubiquitous that I may as well start getting you used to them now.</p>
</aside>
<p>Expr is the base class that all expression classes inherit from. As you can see
from <code>Binary</code>, the subclasses are nested inside of it. There&rsquo;s no technical need
for this, but it lets us cram all of the classes into a single Java file.</p>
<h3><a href="#disoriented-objects" id="disoriented-objects"><small>5&#8202;.&#8202;2&#8202;.&#8202;1</small>Disoriented objects</a></h3>
<p>You&rsquo;ll note that, much like the Token class, there aren&rsquo;t any methods here. It&rsquo;s
a dumb structure. Nicely typed, but merely a bag of data. This feels strange in
an object-oriented language like Java. Shouldn&rsquo;t the class <em>do stuff</em>?</p>
<p>The problem is that these tree classes aren&rsquo;t owned by any single domain. Should
they have methods for parsing since that&rsquo;s where the trees are created? Or
interpreting since that&rsquo;s where they are consumed? Trees span the border between
those territories, which means they are really owned by <em>neither</em>.</p>
<p>In fact, these types exist to enable the parser and interpreter to
<em>communicate</em>. That lends itself to types that are simply data with no
associated behavior. This style is very natural in functional languages like
Lisp and ML where <em>all</em> data is separate from behavior, but it feels odd in
Java.</p>
<p>Functional programming aficionados right now are jumping up to exclaim &ldquo;See!
Object-oriented languages are a bad fit for an interpreter!&rdquo; I won&rsquo;t go that
far. You&rsquo;ll recall that the scanner itself was admirably suited to
object-orientation. It had all of the mutable state to keep track of where it
was in the source code, a well-defined set of public methods, and a handful of
private helpers.</p>
<p>My feeling is that each phase or part of the interpreter works fine in an
object-oriented style. It is the data structures that flow between them that are
stripped of behavior.</p>
<h3><a href="#metaprogramming-the-trees" id="metaprogramming-the-trees"><small>5&#8202;.&#8202;2&#8202;.&#8202;2</small>Metaprogramming the trees</a></h3>
<p>Java can express behavior-less classes, but I wouldn&rsquo;t say that it&rsquo;s
particularly great at it. Eleven lines of code to stuff three fields in an
object is pretty tedious, and when we&rsquo;re all done, we&rsquo;re going to have 21 of
these classes.</p>
<p>I don&rsquo;t want to waste your time or my ink writing all that down. Really, what is
the essence of each subclass? A name, and a list of typed fields. That&rsquo;s it.
We&rsquo;re smart language hackers, right? Let&rsquo;s <span
name="automate">automate</span>.</p>
<aside name="automate">
<p>Picture me doing an awkward robot dance when you read that. &ldquo;AU-TO-MATE.&rdquo;</p>
</aside>
<p>Instead of tediously handwriting each class definition, field declaration,
constructor, and initializer, we&rsquo;ll hack together a <span
name="python">script</span> that does it for us. It has a description of each
tree type<span class="em">&mdash;</span>its name and fields<span class="em">&mdash;</span>and it prints out the Java code needed to
define a class with that name and state.</p>
<p>This script is a tiny Java command-line app that generates a file named
&ldquo;Expr.java&rdquo;:</p>
<aside name="python">
<p>I got the idea of scripting the syntax tree classes from Jim Hugunin, creator of
Jython and IronPython.</p>
<p>An actual scripting language would be a better fit for this than Java, but I&rsquo;m
trying not to throw too many languages at you.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>tool/GenerateAst.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.tool</span>;

<span class="k">import</span> <span class="i">java.io.IOException</span>;
<span class="k">import</span> <span class="i">java.io.PrintWriter</span>;
<span class="k">import</span> <span class="i">java.util.Arrays</span>;
<span class="k">import</span> <span class="i">java.util.List</span>;

<span class="k">public</span> <span class="k">class</span> <span class="t">GenerateAst</span> {
  <span class="k">public</span> <span class="k">static</span> <span class="t">void</span> <span class="i">main</span>(<span class="t">String</span>[] <span class="i">args</span>) <span class="k">throws</span> <span class="t">IOException</span> {
    <span class="k">if</span> (<span class="i">args</span>.<span class="i">length</span> != <span class="n">1</span>) {
      <span class="t">System</span>.<span class="i">err</span>.<span class="i">println</span>(<span class="s">&quot;Usage: generate_ast &lt;output directory&gt;&quot;</span>);
      <span class="t">System</span>.<span class="i">exit</span>(<span class="n">64</span>);
    }
    <span class="t">String</span> <span class="i">outputDir</span> = <span class="i">args</span>[<span class="n">0</span>];
  }
}
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, create new file</div>

<p>Note that this file is in a different package, <code>.tool</code> instead of <code>.lox</code>. This
script isn&rsquo;t part of the interpreter itself. It&rsquo;s a tool <em>we</em>, the people
hacking on the interpreter, run ourselves to generate the syntax tree classes.
When it&rsquo;s done, we treat &ldquo;Expr.java&rdquo; like any other file in the implementation.
We are merely automating how that file gets authored.</p>
<p>To generate the classes, it needs to have some description of each type and its
fields.</p>
<div class="codehilite"><pre class="insert-before">    String outputDir = args[0];
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">    <span class="i">defineAst</span>(<span class="i">outputDir</span>, <span class="s">&quot;Expr&quot;</span>, <span class="t">Arrays</span>.<span class="i">asList</span>(
      <span class="s">&quot;Binary   : Expr left, Token operator, Expr right&quot;</span>,
      <span class="s">&quot;Grouping : Expr expression&quot;</span>,
      <span class="s">&quot;Literal  : Object value&quot;</span>,
      <span class="s">&quot;Unary    : Token operator, Expr right&quot;</span>
    ));
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<p>For brevity&rsquo;s sake, I jammed the descriptions of the expression types into
strings. Each is the name of the class followed by <code>:</code> and the list of fields,
separated by commas. Each field has a type and a name.</p>
<p>The first thing <code>defineAst()</code> needs to do is output the base Expr class.</p>
<div class="codehilite"><div class="source-file"><em>tool/GenerateAst.java</em><br>
add after <em>main</em>()</div>
<pre>  <span class="k">private</span> <span class="k">static</span> <span class="t">void</span> <span class="i">defineAst</span>(
      <span class="t">String</span> <span class="i">outputDir</span>, <span class="t">String</span> <span class="i">baseName</span>, <span class="t">List</span>&lt;<span class="t">String</span>&gt; <span class="i">types</span>)
      <span class="k">throws</span> <span class="t">IOException</span> {
    <span class="t">String</span> <span class="i">path</span> = <span class="i">outputDir</span> + <span class="s">&quot;/&quot;</span> + <span class="i">baseName</span> + <span class="s">&quot;.java&quot;</span>;
    <span class="t">PrintWriter</span> <span class="i">writer</span> = <span class="k">new</span> <span class="t">PrintWriter</span>(<span class="i">path</span>, <span class="s">&quot;UTF-8&quot;</span>);

    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;package com.craftinginterpreters.lox;&quot;</span>);
    <span class="i">writer</span>.<span class="i">println</span>();
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;import java.util.List;&quot;</span>);
    <span class="i">writer</span>.<span class="i">println</span>();
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;abstract class &quot;</span> + <span class="i">baseName</span> + <span class="s">&quot; {&quot;</span>);

    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;}&quot;</span>);
    <span class="i">writer</span>.<span class="i">close</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, add after <em>main</em>()</div>

<p>When we call this, <code>baseName</code> is &ldquo;Expr&rdquo;, which is both the name of the class and
the name of the file it outputs. We pass this as an argument instead of
hardcoding the name because we&rsquo;ll add a separate family of classes later for
statements.</p>
<p>Inside the base class, we define each subclass.</p>
<div class="codehilite"><pre class="insert-before">    writer.println(&quot;abstract class &quot; + baseName + &quot; {&quot;);

</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>defineAst</em>()</div>
<pre class="insert">    <span class="c">// The AST classes.</span>
    <span class="k">for</span> (<span class="t">String</span> <span class="i">type</span> : <span class="i">types</span>) {
      <span class="t">String</span> <span class="i">className</span> = <span class="i">type</span>.<span class="i">split</span>(<span class="s">&quot;:&quot;</span>)[<span class="n">0</span>].<span class="i">trim</span>();
      <span class="t">String</span> <span class="i">fields</span> = <span class="i">type</span>.<span class="i">split</span>(<span class="s">&quot;:&quot;</span>)[<span class="n">1</span>].<span class="i">trim</span>();<span name="robust"> </span>
      <span class="i">defineType</span>(<span class="i">writer</span>, <span class="i">baseName</span>, <span class="i">className</span>, <span class="i">fields</span>);
    }
</pre><pre class="insert-after">    writer.println(&quot;}&quot;);
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>defineAst</em>()</div>

<aside name="robust">
<p>This isn&rsquo;t the world&rsquo;s most elegant string manipulation code, but that&rsquo;s fine.
It only runs on the exact set of class definitions we give it. Robustness ain&rsquo;t
a priority.</p>
</aside>
<p>That code, in turn, calls:</p>
<div class="codehilite"><div class="source-file"><em>tool/GenerateAst.java</em><br>
add after <em>defineAst</em>()</div>
<pre>  <span class="k">private</span> <span class="k">static</span> <span class="t">void</span> <span class="i">defineType</span>(
      <span class="t">PrintWriter</span> <span class="i">writer</span>, <span class="t">String</span> <span class="i">baseName</span>,
      <span class="t">String</span> <span class="i">className</span>, <span class="t">String</span> <span class="i">fieldList</span>) {
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;  static class &quot;</span> + <span class="i">className</span> + <span class="s">&quot; extends &quot;</span> +
        <span class="i">baseName</span> + <span class="s">&quot; {&quot;</span>);

    <span class="c">// Constructor.</span>
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;    &quot;</span> + <span class="i">className</span> + <span class="s">&quot;(&quot;</span> + <span class="i">fieldList</span> + <span class="s">&quot;) {&quot;</span>);

    <span class="c">// Store parameters in fields.</span>
    <span class="t">String</span>[] <span class="i">fields</span> = <span class="i">fieldList</span>.<span class="i">split</span>(<span class="s">&quot;, &quot;</span>);
    <span class="k">for</span> (<span class="t">String</span> <span class="i">field</span> : <span class="i">fields</span>) {
      <span class="t">String</span> <span class="i">name</span> = <span class="i">field</span>.<span class="i">split</span>(<span class="s">&quot; &quot;</span>)[<span class="n">1</span>];
      <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;      this.&quot;</span> + <span class="i">name</span> + <span class="s">&quot; = &quot;</span> + <span class="i">name</span> + <span class="s">&quot;;&quot;</span>);
    }

    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;    }&quot;</span>);

    <span class="c">// Fields.</span>
    <span class="i">writer</span>.<span class="i">println</span>();
    <span class="k">for</span> (<span class="t">String</span> <span class="i">field</span> : <span class="i">fields</span>) {
      <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;    final &quot;</span> + <span class="i">field</span> + <span class="s">&quot;;&quot;</span>);
    }

    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;  }&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, add after <em>defineAst</em>()</div>

<p>There we go. All of that glorious Java boilerplate is done. It declares each
field in the class body. It defines a constructor for the class with parameters
for each field and initializes them in the body.</p>
<p>Compile and run this Java program now and it <span name="longer">blasts</span>
out a new &ldquo;.java&rdquo; file containing a few dozen lines of code. That file&rsquo;s
about to get even longer.</p>
<aside name="longer">
<p><a href="appendix-ii.html">Appendix II</a> contains the code generated by this script once we&rsquo;ve finished
implementing jlox and defined all of its syntax tree nodes.</p>
</aside>
<h2><a href="#working-with-trees" id="working-with-trees"><small>5&#8202;.&#8202;3</small>Working with Trees</a></h2>
<p>Put on your imagination hat for a moment. Even though we aren&rsquo;t there yet,
consider what the interpreter will do with the syntax trees. Each kind of
expression in Lox behaves differently at runtime. That means the interpreter
needs to select a different chunk of code to handle each expression type. With
tokens, we can simply switch on the TokenType. But we don&rsquo;t have a &ldquo;type&rdquo; enum
for the syntax trees, just a separate Java class for each one.</p>
<p>We could write a long chain of type tests:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">expr</span> <span class="k">instanceof</span> <span class="t">Expr</span>.<span class="t">Binary</span>) {
  <span class="c">// ...</span>
} <span class="k">else</span> <span class="k">if</span> (<span class="i">expr</span> <span class="k">instanceof</span> <span class="t">Expr</span>.<span class="t">Grouping</span>) {
  <span class="c">// ...</span>
} <span class="k">else</span> <span class="c">// ...</span>
</pre></div>
<p>But all of those sequential type tests are slow. Expression types whose names
are alphabetically later would take longer to execute because they&rsquo;d fall
through more <code>if</code> cases before finding the right type. That&rsquo;s not my idea of an
elegant solution.</p>
<p>We have a family of classes and we need to associate a chunk of behavior with
each one. The natural solution in an object-oriented language like Java is to
put those behaviors into methods on the classes themselves. We could add an
abstract <span name="interpreter-pattern"><code>interpret()</code></span> method on Expr
which each subclass would then implement to interpret itself.</p>
<aside name="interpreter-pattern">
<p>This exact thing is literally called the <a href="https://en.wikipedia.org/wiki/Interpreter_pattern">&ldquo;Interpreter pattern&rdquo;</a> in
<em>Design Patterns: Elements of Reusable Object-Oriented Software</em>, by Erich
Gamma, et al.</p>
</aside>
<p>This works alright for tiny projects, but it scales poorly. Like I noted before,
these tree classes span a few domains. At the very least, both the parser and
interpreter will mess with them. As <a href="resolving-and-binding.html">you&rsquo;ll see later</a>, we need to
do name resolution on them. If our language was statically typed, we&rsquo;d have a
type checking pass.</p>
<p>If we added instance methods to the expression classes for every one of those
operations, that would smush a bunch of different domains together. That
violates <a href="https://en.wikipedia.org/wiki/Separation_of_concerns">separation of concerns</a> and leads to hard-to-maintain code.</p>
<h3><a href="#the-expression-problem" id="the-expression-problem"><small>5&#8202;.&#8202;3&#8202;.&#8202;1</small>The expression problem</a></h3>
<p>This problem is more fundamental than it may seem at first. We have a handful of
types, and a handful of high-level operations like &ldquo;interpret&rdquo;. For each pair of
type and operation, we need a specific implementation. Picture a table:</p><img src="image/representing-code/table.png" alt="A table where rows are labeled with expression classes, and columns are function names." />
<p>Rows are types, and columns are operations. Each cell represents the unique
piece of code to implement that operation on that type.</p>
<p>An object-oriented language like Java assumes that all of the code in one row
naturally hangs together. It figures all the things you do with a type are
likely related to each other, and the language makes it easy to define them
together as methods inside the same class.</p><img src="image/representing-code/rows.png" alt="The table split into rows for each class." />
<p>This makes it easy to extend the table by adding new rows. Simply define a new
class. No existing code has to be touched. But imagine if you want to add a new
<em>operation</em><span class="em">&mdash;</span>a new column. In Java, that means cracking open each of those
existing classes and adding a method to it.</p>
<p>Functional paradigm languages in the <span name="ml">ML</span> family flip that
around. There, you don&rsquo;t have classes with methods. Types and functions are
totally distinct. To implement an operation for a number of different types, you
define a single function. In the body of that function, you use <em>pattern
matching</em><span class="em">&mdash;</span>sort of a type-based switch on steroids<span class="em">&mdash;</span>to implement the
operation for each type all in one place.</p>
<aside name="ml">
<p>ML, short for &ldquo;metalanguage&rdquo; was created by Robin Milner and friends and forms
one of the main branches in the great programming language family tree. Its
children include SML, Caml, OCaml, Haskell, and F#. Even Scala, Rust, and Swift
bear a strong resemblance.</p>
<p>Much like Lisp, it is one of those languages that is so full of good ideas that
language designers today are still rediscovering them over forty years later.</p>
</aside>
<p>This makes it trivial to add new operations<span class="em">&mdash;</span>simply define another function
that pattern matches on all of the types.</p><img src="image/representing-code/columns.png" alt="The table split into columns for each function." />
<p>But, conversely, adding a new type is hard. You have to go back and add a new
case to all of the pattern matches in all of the existing functions.</p>
<p>Each style has a certain &ldquo;grain&rdquo; to it. That&rsquo;s what the paradigm name literally
says<span class="em">&mdash;</span>an object-oriented language wants you to <em>orient</em> your code along the
rows of types. A functional language instead encourages you to lump each
column&rsquo;s worth of code together into a <em>function</em>.</p>
<p>A bunch of smart language nerds noticed that neither style made it easy to add
<em>both</em> rows and columns to the <span name="multi">table</span>. They called this
difficulty the &ldquo;expression problem&rdquo; because<span class="em">&mdash;</span>like we are now<span class="em">&mdash;</span>they first ran
into it when they were trying to figure out the best way to model expression
syntax tree nodes in a compiler.</p>
<aside name="multi">
<p>Languages with <em>multimethods</em>, like Common Lisp&rsquo;s CLOS, Dylan, and Julia do
support adding both new types and operations easily. What they typically
sacrifice is either static type checking, or separate compilation.</p>
</aside>
<p>People have thrown all sorts of language features, design patterns, and
programming tricks to try to knock that problem down but no perfect language has
finished it off yet. In the meantime, the best we can do is try to pick a
language whose orientation matches the natural architectural seams in the
program we&rsquo;re writing.</p>
<p>Object-orientation works fine for many parts of our interpreter, but these tree
classes rub against the grain of Java. Fortunately, there&rsquo;s a design pattern we
can bring to bear on it.</p>
<h3><a href="#the-visitor-pattern" id="the-visitor-pattern"><small>5&#8202;.&#8202;3&#8202;.&#8202;2</small>The Visitor pattern</a></h3>
<p>The <strong>Visitor pattern</strong> is the most widely misunderstood pattern in all of
<em>Design Patterns</em>, which is really saying something when you look at the
software architecture excesses of the past couple of decades.</p>
<p>The trouble starts with terminology. The pattern isn&rsquo;t about &ldquo;visiting&rdquo;, and the
&ldquo;accept&rdquo; method in it doesn&rsquo;t conjure up any helpful imagery either. Many think
the pattern has to do with traversing trees, which isn&rsquo;t the case at all. We
<em>are</em> going to use it on a set of classes that are tree-like, but that&rsquo;s a
coincidence. As you&rsquo;ll see, the pattern works as well on a single object.</p>
<p>The Visitor pattern is really about approximating the functional style within an
OOP language. It lets us add new columns to that table easily. We can define all
of the behavior for a new operation on a set of types in one place, without
having to touch the types themselves. It does this the same way we solve almost
every problem in computer science: by adding a layer of indirection.</p>
<p>Before we apply it to our auto-generated Expr classes, let&rsquo;s walk through a
simpler example. Say we have two kinds of pastries: <span
name="beignet">beignets</span> and crullers.</p>
<aside name="beignet">
<p>A beignet (pronounced &ldquo;ben-yay&rdquo;, with equal emphasis on both syllables) is a
deep-fried pastry in the same family as doughnuts. When the French colonized
North America in the 1700s, they brought beignets with them. Today, in the US,
they are most strongly associated with the cuisine of New Orleans.</p>
<p>My preferred way to consume them is fresh out of the fryer at Café du Monde,
piled high in powdered sugar, and washed down with a cup of café au lait while I
watch tourists staggering around trying to shake off their hangover from the
previous night&rsquo;s revelry.</p>
</aside>
<div class="codehilite"><pre>  <span class="k">abstract</span> <span class="k">class</span> <span class="t">Pastry</span> {
  }

  <span class="k">class</span> <span class="t">Beignet</span> <span class="k">extends</span> <span class="t">Pastry</span> {
  }

  <span class="k">class</span> <span class="t">Cruller</span> <span class="k">extends</span> <span class="t">Pastry</span> {
  }
</pre></div>

<p>We want to be able to define new pastry operations<span class="em">&mdash;</span>cooking them, eating them,
decorating them, etc.<span class="em">&mdash;</span>without having to add a new method to each class every
time. Here&rsquo;s how we do it. First, we define a separate interface.</p>
<div class="codehilite"><pre>  <span class="k">interface</span> <span class="t">PastryVisitor</span> {
    <span class="t">void</span> <span class="i">visitBeignet</span>(<span class="t">Beignet</span> <span class="i">beignet</span>);<span name="overload"> </span>
    <span class="t">void</span> <span class="i">visitCruller</span>(<span class="t">Cruller</span> <span class="i">cruller</span>);
  }
</pre></div>

<aside name="overload">
<p>In <em>Design Patterns</em>, both of these methods are confusingly named <code>visit()</code>, and
they rely on overloading to distinguish them. This leads some readers to think
that the correct visit method is chosen <em>at runtime</em> based on its parameter
type. That isn&rsquo;t the case. Unlike over<em>riding</em>, over<em>loading</em> is statically
dispatched at compile time.</p>
<p>Using distinct names for each method makes the dispatch more obvious, and also
shows you how to apply this pattern in languages that don&rsquo;t support overloading.</p>
</aside>
<p>Each operation that can be performed on pastries is a new class that implements
that interface. It has a concrete method for each type of pastry. That keeps the
code for the operation on both types all nestled snugly together in one class.</p>
<p>Given some pastry, how do we route it to the correct method on the visitor based
on its type? Polymorphism to the rescue! We add this method to Pastry:</p>
<div class="codehilite"><pre class="insert-before">  abstract class Pastry {
</pre><pre class="insert">    <span class="k">abstract</span> <span class="t">void</span> <span class="i">accept</span>(<span class="t">PastryVisitor</span> <span class="i">visitor</span>);
</pre><pre class="insert-after">  }
</pre></div>

<p>Each subclass implements it.</p>
<div class="codehilite"><pre class="insert-before">  class Beignet extends Pastry {
</pre><pre class="insert">    <span class="a">@Override</span>
    <span class="t">void</span> <span class="i">accept</span>(<span class="t">PastryVisitor</span> <span class="i">visitor</span>) {
      <span class="i">visitor</span>.<span class="i">visitBeignet</span>(<span class="k">this</span>);
    }
</pre><pre class="insert-after">  }
</pre></div>

<p>And:</p>
<div class="codehilite"><pre class="insert-before">  class Cruller extends Pastry {
</pre><pre class="insert">    <span class="a">@Override</span>
    <span class="t">void</span> <span class="i">accept</span>(<span class="t">PastryVisitor</span> <span class="i">visitor</span>) {
      <span class="i">visitor</span>.<span class="i">visitCruller</span>(<span class="k">this</span>);
    }
</pre><pre class="insert-after">  }
</pre></div>

<p>To perform an operation on a pastry, we call its <code>accept()</code> method and pass in
the visitor for the operation we want to execute. The pastry<span class="em">&mdash;</span>the specific
subclass&rsquo;s overriding implementation of <code>accept()</code><span class="em">&mdash;</span>turns around and calls the
appropriate visit method on the visitor and passes <em>itself</em> to it.</p>
<p>That&rsquo;s the heart of the trick right there. It lets us use polymorphic dispatch
on the <em>pastry</em> classes to select the appropriate method on the <em>visitor</em> class.
In the table, each pastry class is a row, but if you look at all of the methods
for a single visitor, they form a <em>column</em>.</p><img src="image/representing-code/visitor.png" alt="Now all of the cells for one operation are part of the same class, the visitor." />
<p>We added one <code>accept()</code> method to each class, and we can use it for as many
visitors as we want without ever having to touch the pastry classes again. It&rsquo;s
a clever pattern.</p>
<h3><a href="#visitors-for-expressions" id="visitors-for-expressions"><small>5&#8202;.&#8202;3&#8202;.&#8202;3</small>Visitors for expressions</a></h3>
<p>OK, let&rsquo;s weave it into our expression classes. We&rsquo;ll also <span
name="context">refine</span> the pattern a little. In the pastry example, the
visit and <code>accept()</code> methods don&rsquo;t return anything. In practice, visitors often
want to define operations that produce values. But what return type should
<code>accept()</code> have? We can&rsquo;t assume every visitor class wants to produce the same
type, so we&rsquo;ll use generics to let each implementation fill in a return type.</p>
<aside name="context">
<p>Another common refinement is an additional &ldquo;context&rdquo; parameter that is passed to
the visit methods and then sent back through as a parameter to <code>accept()</code>. That
lets operations take an additional parameter. The visitors we&rsquo;ll define in the
book don&rsquo;t need that, so I omitted it.</p>
</aside>
<p>First, we define the visitor interface. Again, we nest it inside the base class
so that we can keep everything in one file.</p>
<div class="codehilite"><pre class="insert-before">    writer.println(&quot;abstract class &quot; + baseName + &quot; {&quot;);

</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>defineAst</em>()</div>
<pre class="insert">    <span class="i">defineVisitor</span>(<span class="i">writer</span>, <span class="i">baseName</span>, <span class="i">types</span>);

</pre><pre class="insert-after">    // The AST classes.
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>defineAst</em>()</div>

<p>That function generates the visitor interface.</p>
<div class="codehilite"><div class="source-file"><em>tool/GenerateAst.java</em><br>
add after <em>defineAst</em>()</div>
<pre>  <span class="k">private</span> <span class="k">static</span> <span class="t">void</span> <span class="i">defineVisitor</span>(
      <span class="t">PrintWriter</span> <span class="i">writer</span>, <span class="t">String</span> <span class="i">baseName</span>, <span class="t">List</span>&lt;<span class="t">String</span>&gt; <span class="i">types</span>) {
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;  interface Visitor&lt;R&gt; {&quot;</span>);

    <span class="k">for</span> (<span class="t">String</span> <span class="i">type</span> : <span class="i">types</span>) {
      <span class="t">String</span> <span class="i">typeName</span> = <span class="i">type</span>.<span class="i">split</span>(<span class="s">&quot;:&quot;</span>)[<span class="n">0</span>].<span class="i">trim</span>();
      <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;    R visit&quot;</span> + <span class="i">typeName</span> + <span class="i">baseName</span> + <span class="s">&quot;(&quot;</span> +
          <span class="i">typeName</span> + <span class="s">&quot; &quot;</span> + <span class="i">baseName</span>.<span class="i">toLowerCase</span>() + <span class="s">&quot;);&quot;</span>);
    }

    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;  }&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, add after <em>defineAst</em>()</div>

<p>Here, we iterate through all of the subclasses and declare a visit method for
each one. When we define new expression types later, this will automatically
include them.</p>
<p>Inside the base class, we define the abstract <code>accept()</code> method.</p>
<div class="codehilite"><pre class="insert-before">      defineType(writer, baseName, className, fields);
    }
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>defineAst</em>()</div>
<pre class="insert">

    <span class="c">// The base accept() method.</span>
    <span class="i">writer</span>.<span class="i">println</span>();
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;  abstract &lt;R&gt; R accept(Visitor&lt;R&gt; visitor);&quot;</span>);

</pre><pre class="insert-after">    writer.println(&quot;}&quot;);
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>defineAst</em>()</div>

<p>Finally, each subclass implements that and calls the right visit method for its
own type.</p>
<div class="codehilite"><pre class="insert-before">    writer.println(&quot;    }&quot;);
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>defineType</em>()</div>
<pre class="insert">

    <span class="c">// Visitor pattern.</span>
    <span class="i">writer</span>.<span class="i">println</span>();
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;    @Override&quot;</span>);
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;    &lt;R&gt; R accept(Visitor&lt;R&gt; visitor) {&quot;</span>);
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;      return visitor.visit&quot;</span> +
        <span class="i">className</span> + <span class="i">baseName</span> + <span class="s">&quot;(this);&quot;</span>);
    <span class="i">writer</span>.<span class="i">println</span>(<span class="s">&quot;    }&quot;</span>);
</pre><pre class="insert-after">

    // Fields.
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>defineType</em>()</div>

<p>There we go. Now we can define operations on expressions without having to muck
with the classes or our generator script. Compile and run this generator script
to output an updated &ldquo;Expr.java&rdquo; file. It contains a generated Visitor
interface and a set of expression node classes that support the Visitor pattern
using it.</p>
<p>Before we end this rambling chapter, let&rsquo;s implement that Visitor interface and
see the pattern in action.</p>
<h2><a href="#a-not-very-pretty-printer" id="a-not-very-pretty-printer"><small>5&#8202;.&#8202;4</small>A (Not Very) Pretty Printer</a></h2>
<p>When we debug our parser and interpreter, it&rsquo;s often useful to look at a parsed
syntax tree and make sure it has the structure we expect. We could inspect it in
the debugger, but that can be a chore.</p>
<p>Instead, we&rsquo;d like some code that, given a syntax tree, produces an unambiguous
string representation of it. Converting a tree to a string is sort of the
opposite of a parser, and is often called &ldquo;pretty printing&rdquo; when the goal is to
produce a string of text that is valid syntax in the source language.</p>
<p>That&rsquo;s not our goal here. We want the string to very explicitly show the nesting
structure of the tree. A printer that returned <code>1 + 2 * 3</code> isn&rsquo;t super helpful
if what we&rsquo;re trying to debug is whether operator precedence is handled
correctly. We want to know if the <code>+</code> or <code>*</code> is at the top of the tree.</p>
<p>To that end, the string representation we produce isn&rsquo;t going to be Lox syntax.
Instead, it will look a lot like, well, Lisp. Each expression is explicitly
parenthesized, and all of its subexpressions and tokens are contained in that.</p>
<p>Given a syntax tree like:</p><img src="image/representing-code/expression.png" alt="An example syntax tree." />
<p>It produces:</p>
<div class="codehilite"><pre>(* (- 123) (group 45.67))
</pre></div>
<p>Not exactly &ldquo;pretty&rdquo;, but it does show the nesting and grouping explicitly. To
implement this, we define a new class.</p>
<div class="codehilite"><div class="source-file"><em>lox/AstPrinter.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">class</span> <span class="t">AstPrinter</span> <span class="k">implements</span> <span class="t">Expr</span>.<span class="t">Visitor</span>&lt;<span class="t">String</span>&gt; {
  <span class="t">String</span> <span class="i">print</span>(<span class="t">Expr</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">expr</span>.<span class="i">accept</span>(<span class="k">this</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/AstPrinter.java</em>, create new file</div>

<p>As you can see, it implements the visitor interface. That means we need visit
methods for each of the expression types we have so far.</p>
<div class="codehilite"><pre class="insert-before">    return expr.accept(this);
  }
</pre><div class="source-file"><em>lox/AstPrinter.java</em><br>
add after <em>print</em>()</div>
<pre class="insert">

  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">String</span> <span class="i">visitBinaryExpr</span>(<span class="t">Expr</span>.<span class="t">Binary</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">parenthesize</span>(<span class="i">expr</span>.<span class="i">operator</span>.<span class="i">lexeme</span>,
                        <span class="i">expr</span>.<span class="i">left</span>, <span class="i">expr</span>.<span class="i">right</span>);
  }

  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">String</span> <span class="i">visitGroupingExpr</span>(<span class="t">Expr</span>.<span class="t">Grouping</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">parenthesize</span>(<span class="s">&quot;group&quot;</span>, <span class="i">expr</span>.<span class="i">expression</span>);
  }

  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">String</span> <span class="i">visitLiteralExpr</span>(<span class="t">Expr</span>.<span class="t">Literal</span> <span class="i">expr</span>) {
    <span class="k">if</span> (<span class="i">expr</span>.<span class="i">value</span> == <span class="k">null</span>) <span class="k">return</span> <span class="s">&quot;nil&quot;</span>;
    <span class="k">return</span> <span class="i">expr</span>.<span class="i">value</span>.<span class="i">toString</span>();
  }

  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">String</span> <span class="i">visitUnaryExpr</span>(<span class="t">Expr</span>.<span class="t">Unary</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">parenthesize</span>(<span class="i">expr</span>.<span class="i">operator</span>.<span class="i">lexeme</span>, <span class="i">expr</span>.<span class="i">right</span>);
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>lox/AstPrinter.java</em>, add after <em>print</em>()</div>

<p>Literal expressions are easy<span class="em">&mdash;</span>they convert the value to a string with a little
check to handle Java&rsquo;s <code>null</code> standing in for Lox&rsquo;s <code>nil</code>. The other expressions
have subexpressions, so they use this <code>parenthesize()</code> helper method:</p>
<div class="codehilite"><div class="source-file"><em>lox/AstPrinter.java</em><br>
add after <em>visitUnaryExpr</em>()</div>
<pre>  <span class="k">private</span> <span class="t">String</span> <span class="i">parenthesize</span>(<span class="t">String</span> <span class="i">name</span>, <span class="t">Expr</span>... <span class="i">exprs</span>) {
    <span class="t">StringBuilder</span> <span class="i">builder</span> = <span class="k">new</span> <span class="t">StringBuilder</span>();

    <span class="i">builder</span>.<span class="i">append</span>(<span class="s">&quot;(&quot;</span>).<span class="i">append</span>(<span class="i">name</span>);
    <span class="k">for</span> (<span class="t">Expr</span> <span class="i">expr</span> : <span class="i">exprs</span>) {
      <span class="i">builder</span>.<span class="i">append</span>(<span class="s">&quot; &quot;</span>);
      <span class="i">builder</span>.<span class="i">append</span>(<span class="i">expr</span>.<span class="i">accept</span>(<span class="k">this</span>));
    }
    <span class="i">builder</span>.<span class="i">append</span>(<span class="s">&quot;)&quot;</span>);

    <span class="k">return</span> <span class="i">builder</span>.<span class="i">toString</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/AstPrinter.java</em>, add after <em>visitUnaryExpr</em>()</div>

<p>It takes a name and a list of subexpressions and wraps them all up in
parentheses, yielding a string like:</p>
<div class="codehilite"><pre>(+ 1 2)
</pre></div>
<p>Note that it calls <code>accept()</code> on each subexpression and passes in itself. This
is the <span name="tree">recursive</span> step that lets us print an entire
tree.</p>
<aside name="tree">
<p>This recursion is also why people think the Visitor pattern itself has to do
with trees.</p>
</aside>
<p>We don&rsquo;t have a parser yet, so it&rsquo;s hard to see this in action. For now, we&rsquo;ll
hack together a little <code>main()</code> method that manually instantiates a tree and
prints it.</p>
<div class="codehilite"><div class="source-file"><em>lox/AstPrinter.java</em><br>
add after <em>parenthesize</em>()</div>
<pre>  <span class="k">public</span> <span class="k">static</span> <span class="t">void</span> <span class="i">main</span>(<span class="t">String</span>[] <span class="i">args</span>) {
    <span class="t">Expr</span> <span class="i">expression</span> = <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Binary</span>(
        <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Unary</span>(
            <span class="k">new</span> <span class="t">Token</span>(<span class="t">TokenType</span>.<span class="i">MINUS</span>, <span class="s">&quot;-&quot;</span>, <span class="k">null</span>, <span class="n">1</span>),
            <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Literal</span>(<span class="n">123</span>)),
        <span class="k">new</span> <span class="t">Token</span>(<span class="t">TokenType</span>.<span class="i">STAR</span>, <span class="s">&quot;*&quot;</span>, <span class="k">null</span>, <span class="n">1</span>),
        <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Grouping</span>(
            <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Literal</span>(<span class="n">45.67</span>)));

    <span class="t">System</span>.<span class="i">out</span>.<span class="i">println</span>(<span class="k">new</span> <span class="t">AstPrinter</span>().<span class="i">print</span>(<span class="i">expression</span>));
  }
</pre></div>
<div class="source-file-narrow"><em>lox/AstPrinter.java</em>, add after <em>parenthesize</em>()</div>

<p>If we did everything right, it prints:</p>
<div class="codehilite"><pre>(* (- 123) (group 45.67))
</pre></div>
<p>You can go ahead and delete this method. We won&rsquo;t need it. Also, as we add new
syntax tree types, I won&rsquo;t bother showing the necessary visit methods for them
in AstPrinter. If you want to (and you want the Java compiler to not yell at
you), go ahead and add them yourself. It will come in handy in the next chapter
when we start parsing Lox code into syntax trees. Or, if you don&rsquo;t care to
maintain AstPrinter, feel free to delete it. We won&rsquo;t need it again.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Earlier, I said that the <code>|</code>, <code>*</code>, and <code>+</code> forms we added to our grammar
metasyntax were just syntactic sugar. Take this grammar:</p>
<div class="codehilite"><pre><span class="i">expr</span> → <span class="i">expr</span> ( <span class="s">&quot;(&quot;</span> ( <span class="i">expr</span> ( <span class="s">&quot;,&quot;</span> <span class="i">expr</span> )* )? <span class="s">&quot;)&quot;</span> | <span class="s">&quot;.&quot;</span> <span class="t">IDENTIFIER</span> )+
     | <span class="t">IDENTIFIER</span>
     | <span class="t">NUMBER</span>
</pre></div>
<p>Produce a grammar that matches the same language but does not use any of
that notational sugar.</p>
<p><em>Bonus:</em> What kind of expression does this bit of grammar encode?</p>
</li>
<li>
<p>The Visitor pattern lets you emulate the functional style in an
object-oriented language. Devise a complementary pattern for a functional
language. It should let you bundle all of the operations on one type
together and let you define new types easily.</p>
<p>(SML or Haskell would be ideal for this exercise, but Scheme or another Lisp
works as well.)</p>
</li>
<li>
<p>In <a href="https://en.wikipedia.org/wiki/Reverse_Polish_notation">reverse Polish notation</a> (RPN), the operands to an arithmetic
operator are both placed before the operator, so <code>1 + 2</code> becomes <code>1 2 +</code>.
Evaluation proceeds from left to right. Numbers are pushed onto an implicit
stack. An arithmetic operator pops the top two numbers, performs the
operation, and pushes the result. Thus, this:</p>
<div class="codehilite"><pre>(<span class="n">1</span> + <span class="n">2</span>) * (<span class="n">4</span> - <span class="n">3</span>)
</pre></div>
<p>in RPN becomes:</p>
<div class="codehilite"><pre><span class="n">1</span> <span class="n">2</span> + <span class="n">4</span> <span class="n">3</span> - *
</pre></div>
<p>Define a visitor class for our syntax tree classes that takes an expression,
converts it to RPN, and returns the resulting string.</p>
</li>
</ol>
</div>

<footer>
<a href="parsing-expressions.html" class="next">
  Next Chapter: &ldquo;Parsing Expressions&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/resolving-and-binding.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Resolving and Binding &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Resolving and Binding<small>11</small></a></h3>

<ul>
    <li><a href="#static-scope"><small>11.1</small> Static Scope</a></li>
    <li><a href="#semantic-analysis"><small>11.2</small> Semantic Analysis</a></li>
    <li><a href="#a-resolver-class"><small>11.3</small> A Resolver Class</a></li>
    <li><a href="#interpreting-resolved-variables"><small>11.4</small> Interpreting Resolved Variables</a></li>
    <li><a href="#resolution-errors"><small>11.5</small> Resolution Errors</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="functions.html" title="Functions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="classes.html" title="Classes" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="functions.html" title="Functions" class="prev">←</a>
<a href="classes.html" title="Classes" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Resolving and Binding<small>11</small></a></h3>

<ul>
    <li><a href="#static-scope"><small>11.1</small> Static Scope</a></li>
    <li><a href="#semantic-analysis"><small>11.2</small> Semantic Analysis</a></li>
    <li><a href="#a-resolver-class"><small>11.3</small> A Resolver Class</a></li>
    <li><a href="#interpreting-resolved-variables"><small>11.4</small> Interpreting Resolved Variables</a></li>
    <li><a href="#resolution-errors"><small>11.5</small> Resolution Errors</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="functions.html" title="Functions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="classes.html" title="Classes" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">11</div>
  <h1>Resolving and Binding</h1>

<blockquote>
<p>Once in a while you find yourself in an odd situation. You get into it by
degrees and in the most natural way but, when you are right in the midst of
it, you are suddenly astonished and ask yourself how in the world it all came
about.</p>
<p><cite>Thor Heyerdahl, <em>Kon-Tiki</em></cite></p>
</blockquote>
<p>Oh, no! Our language implementation is taking on water! Way back when we <a href="statements-and-state.html">added
variables and blocks</a>, we had scoping nice and tight. But when we
<a href="functions.html">later added closures</a>, a hole opened in our formerly waterproof
interpreter. Most real programs are unlikely to slip through this hole, but as
language implementers, we take a sacred vow to care about correctness even in
the deepest, dampest corners of the semantics.</p>
<p>We will spend this entire chapter exploring that leak, and then carefully
patching it up. In the process, we will gain a more rigorous understanding of
lexical scoping as used by Lox and other languages in the C tradition. We&rsquo;ll
also get a chance to learn about <em>semantic analysis</em><span class="em">&mdash;</span>a powerful technique for
extracting meaning from the user&rsquo;s source code without having to run it.</p>
<h2><a href="#static-scope" id="static-scope"><small>11&#8202;.&#8202;1</small>Static Scope</a></h2>
<p>A quick refresher: Lox, like most modern languages, uses <em>lexical</em> scoping. This
means that you can figure out which declaration a variable name refers to just
by reading the text of the program. For example:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer&quot;</span>;
{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;inner&quot;</span>;
  <span class="k">print</span> <span class="i">a</span>;
}
</pre></div>
<p>Here, we know that the <code>a</code> being printed is the variable declared on the
previous line, and not the global one. Running the program doesn&rsquo;t<span class="em">&mdash;</span><em>can&rsquo;t</em><span class="em">&mdash;</span>affect this. The scope rules are part of the <em>static</em> semantics of the language,
which is why they&rsquo;re also called <em>static scope</em>.</p>
<p>I haven&rsquo;t spelled out those scope rules, but now is the time for <span
name="precise">precision</span>:</p>
<aside name="precise">
<p>This is still nowhere near as precise as a real language specification. Those
docs must be so explicit that even a Martian or an outright malicious programmer
would be forced to implement the correct semantics provided they followed the
letter of the spec.</p>
<p>That exactitude is important when a language may be implemented by competing
companies who want their product to be incompatible with the others to lock
customers onto their platform. For this book, we can thankfully ignore those
kinds of shady shenanigans.</p>
</aside>
<p><strong>A variable usage refers to the preceding declaration with the same name in the
innermost scope that encloses the expression where the variable is used.</strong></p>
<p>There&rsquo;s a lot to unpack in that:</p>
<ul>
<li>
<p>I say &ldquo;variable usage&rdquo; instead of &ldquo;variable expression&rdquo; to cover both
variable expressions and assignments. Likewise with &ldquo;expression where the
variable is used&rdquo;.</p>
</li>
<li>
<p>&ldquo;Preceding&rdquo; means appearing before <em>in the program text</em>.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer&quot;</span>;
{
  <span class="k">print</span> <span class="i">a</span>;
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;inner&quot;</span>;
}
</pre></div>
<p>Here, the <code>a</code> being printed is the outer one since it appears <span
name="hoisting">before</span> the <code>print</code> statement that uses it. In most
cases, in straight line code, the declaration preceding in <em>text</em> will also
precede the usage in <em>time</em>. But that&rsquo;s not always true. As we&rsquo;ll see,
functions may defer a chunk of code such that its <em>dynamic temporal</em>
execution no longer mirrors the <em>static textual</em> ordering.</p>
<aside name="hoisting">
<p>In JavaScript, variables declared using <code>var</code> are implicitly &ldquo;hoisted&rdquo; to
the beginning of the block. Any use of that name in the block will refer to
that variable, even if the use appears before the declaration. When you
write this in JavaScript:</p>
<div class="codehilite"><pre>{
  <span class="i">console</span>.<span class="i">log</span>(<span class="i">a</span>);
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;value&quot;</span>;
}
</pre></div>
<p>It behaves like:</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span>; <span class="c">// Hoist.</span>
  <span class="i">console</span>.<span class="i">log</span>(<span class="i">a</span>);
  <span class="i">a</span> = <span class="s">&quot;value&quot;</span>;
}
</pre></div>
<p>That means that in some cases you can read a variable before its initializer
has run<span class="em">&mdash;</span>an annoying source of bugs. The alternate <code>let</code> syntax for
declaring variables was added later to address this problem.</p>
</aside></li>
<li>
<p>&ldquo;Innermost&rdquo; is there because of our good friend shadowing. There may be more
than one variable with the given name in enclosing scopes, as in:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer&quot;</span>;
{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;inner&quot;</span>;
  <span class="k">print</span> <span class="i">a</span>;
}
</pre></div>
<p>Our rule disambiguates this case by saying the innermost scope wins.</p>
</li>
</ul>
<p>Since this rule makes no mention of any runtime behavior, it implies that a
variable expression always refers to the same declaration through the entire
execution of the program. Our interpreter so far <em>mostly</em> implements the rule
correctly. But when we added closures, an error snuck in.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;global&quot;</span>;
{
  <span class="k">fun</span> <span class="i">showA</span>() {
    <span class="k">print</span> <span class="i">a</span>;
  }

  <span class="i">showA</span>();
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;block&quot;</span>;
  <span class="i">showA</span>();
}
</pre></div>
<p><span name="tricky">Before</span> you type this in and run it, decide what you
think it <em>should</em> print.</p>
<aside name="tricky">
<p>I know, it&rsquo;s a totally pathological, contrived program. It&rsquo;s just <em>weird</em>. No
reasonable person would ever write code like this. Alas, more of your life than
you&rsquo;d expect will be spent dealing with bizarro snippets of code like this if
you stay in the programming language game for long.</p>
</aside>
<p>OK<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>got it? If you&rsquo;re familiar with closures in other languages, you&rsquo;ll expect
it to print &ldquo;global&rdquo; twice. The first call to <code>showA()</code> should definitely print
&ldquo;global&rdquo; since we haven&rsquo;t even reached the declaration of the inner <code>a</code> yet. And
by our rule that a variable expression always resolves to the same variable,
that implies the second call to <code>showA()</code> should print the same thing.</p>
<p>Alas, it prints:</p>
<div class="codehilite"><pre>global
block
</pre></div>
<p>Let me stress that this program never reassigns any variable and contains only a
single <code>print</code> statement. Yet, somehow, that <code>print</code> statement for a
never-assigned variable prints two different values at different points in time.
We definitely broke something somewhere.</p>
<h3><a href="#scopes-and-mutable-environments" id="scopes-and-mutable-environments"><small>11&#8202;.&#8202;1&#8202;.&#8202;1</small>Scopes and mutable environments</a></h3>
<p>In our interpreter, environments are the dynamic manifestation of static scopes.
The two mostly stay in sync with each other<span class="em">&mdash;</span>we create a new environment when
we enter a new scope, and discard it when we leave the scope. There is one other
operation we perform on environments: binding a variable in one. This is where
our bug lies.</p>
<p>Let&rsquo;s walk through that problematic example and see what the environments look
like at each step. First, we declare <code>a</code> in the global scope.</p><img src="image/resolving-and-binding/environment-1.png" alt="The global environment with 'a' defined in it." />
<p>That gives us a single environment with a single variable in it. Then we enter
the block and execute the declaration of <code>showA()</code>.</p><img src="image/resolving-and-binding/environment-2.png" alt="A block environment linking to the global one." />
<p>We get a new environment for the block. In that, we declare one name, <code>showA</code>,
which is bound to the LoxFunction object we create to represent the function.
That object has a <code>closure</code> field that captures the environment where the
function was declared, so it has a reference back to the environment for the
block.</p>
<p>Now we call <code>showA()</code>.</p><img src="image/resolving-and-binding/environment-3.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the global environment." />
<p>The interpreter dynamically creates a new environment for the function body of
<code>showA()</code>. It&rsquo;s empty since that function doesn&rsquo;t declare any variables. The
parent of that environment is the function&rsquo;s closure<span class="em">&mdash;</span>the outer block
environment.</p>
<p>Inside the body of <code>showA()</code>, we print the value of <code>a</code>. The interpreter looks
up this value by walking the chain of environments. It gets all the way
to the global environment before finding it there and printing <code>"global"</code>.
Great.</p>
<p>Next, we declare the second <code>a</code>, this time inside the block.</p><img src="image/resolving-and-binding/environment-4.png" alt="The block environment has both 'a' and 'showA' now." />
<p>It&rsquo;s in the same block<span class="em">&mdash;</span>the same scope<span class="em">&mdash;</span>as <code>showA()</code>, so it goes into the
same environment, which is also the same environment <code>showA()</code>&rsquo;s closure refers
to. This is where it gets interesting. We call <code>showA()</code> again.</p><img src="image/resolving-and-binding/environment-5.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the block environment." />
<p>We create a new empty environment for the body of <code>showA()</code> again, wire it up to
that closure, and run the body. When the interpreter walks the chain of
environments to find <code>a</code>, it now discovers the <em>new</em> <code>a</code> in the block
environment. Boo.</p>
<p>I chose to implement environments in a way that I hoped would agree with your
informal intuition around scopes. We tend to consider all of the code within a
block as being within the same scope, so our interpreter uses a single
environment to represent that. Each environment is a mutable hash table. When a
new local variable is declared, it gets added to the existing environment for
that scope.</p>
<p>That intuition, like many in life, isn&rsquo;t quite right. A block is not necessarily
all the same scope. Consider:</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span>;
  <span class="c">// 1.</span>
  <span class="k">var</span> <span class="i">b</span>;
  <span class="c">// 2.</span>
}
</pre></div>
<p>At the first marked line, only <code>a</code> is in scope. At the second line, both <code>a</code> and
<code>b</code> are. If you define a &ldquo;scope&rdquo; to be a set of declarations, then those are
clearly not the same scope<span class="em">&mdash;</span>they don&rsquo;t contain the same declarations. It&rsquo;s
like each <code>var</code> statement <span name="split">splits</span> the block into two
separate scopes, the scope before the variable is declared and the one after,
which includes the new variable.</p>
<aside name="split">
<p>Some languages make this split explicit. In Scheme and ML, when you declare a
local variable using <code>let</code>, you also delineate the subsequent code where the new
variable is in scope. There is no implicit &ldquo;rest of the block&rdquo;.</p>
</aside>
<p>But in our implementation, environments do act like the entire block is one
scope, just a scope that changes over time. Closures do not like that. When a
function is declared, it captures a reference to the current environment. The
function <em>should</em> capture a frozen snapshot of the environment <em>as it existed at
the moment the function was declared</em>. But instead, in the Java code, it has a
reference to the actual mutable environment object. When a variable is later
declared in the scope that environment corresponds to, the closure sees the new
variable, even though the declaration does <em>not</em> precede the function.</p>
<h3><a href="#persistent-environments" id="persistent-environments"><small>11&#8202;.&#8202;1&#8202;.&#8202;2</small>Persistent environments</a></h3>
<p>There is a style of programming that uses what are called <strong>persistent data
structures</strong>. Unlike the squishy data structures you&rsquo;re familiar with in
imperative programming, a persistent data structure can never be directly
modified. Instead, any &ldquo;modification&rdquo; to an existing structure produces a <span
name="copy">brand</span> new object that contains all of the original data and
the new modification. The original is left unchanged.</p>
<aside name="copy">
<p>This sounds like it might waste tons of memory and time copying the structure
for each operation. In practice, persistent data structures share most of their
data between the different &ldquo;copies&rdquo;.</p>
</aside>
<p>If we were to apply that technique to Environment, then every time you declared
a variable it would return a <em>new</em> environment that contained all of the
previously declared variables along with the one new name. Declaring a variable
would do the implicit &ldquo;split&rdquo; where you have an environment before the variable
is declared and one after:</p><img src="image/resolving-and-binding/split.png" alt="Separate environments before and after the variable is declared." />
<p>A closure retains a reference to the Environment instance in play when the
function was declared. Since any later declarations in that block would produce
new Environment objects, the closure wouldn&rsquo;t see the new variables and our bug
would be fixed.</p>
<p>This is a legit way to solve the problem, and it&rsquo;s the classic way to implement
environments in Scheme interpreters. We could do that for Lox, but it would mean
going back and changing a pile of existing code.</p>
<p>I won&rsquo;t drag you through that. We&rsquo;ll keep the way we represent environments the
same. Instead of making the data more statically structured, we&rsquo;ll bake the
static resolution into the access <em>operation</em> itself.</p>
<h2><a href="#semantic-analysis" id="semantic-analysis"><small>11&#8202;.&#8202;2</small>Semantic Analysis</a></h2>
<p>Our interpreter <strong>resolves</strong> a variable<span class="em">&mdash;</span>tracks down which declaration it
refers to<span class="em">&mdash;</span>each and every time the variable expression is evaluated. If that
variable is swaddled inside a loop that runs a thousand times, that variable
gets re-resolved a thousand times.</p>
<p>We know static scope means that a variable usage always resolves to the same
declaration, which can be determined just by looking at the text. Given that,
why are we doing it dynamically every time? Doing so doesn&rsquo;t just open the hole
that leads to our annoying bug, it&rsquo;s also needlessly slow.</p>
<p>A better solution is to resolve each variable use <em>once</em>. Write a chunk of code
that inspects the user&rsquo;s program, finds every variable mentioned, and figures
out which declaration each refers to. This process is an example of a <strong>semantic
analysis</strong>. Where a parser tells only if a program is grammatically correct (a
<em>syntactic</em> analysis), semantic analysis goes farther and starts to figure out
what pieces of the program actually mean. In this case, our analysis will
resolve variable bindings. We&rsquo;ll know not just that an expression <em>is</em> a
variable, but <em>which</em> variable it is.</p>
<p>There are a lot of ways we could store the binding between a variable and its
declaration. When we get to the C interpreter for Lox, we&rsquo;ll have a <em>much</em> more
efficient way of storing and accessing local variables. But for jlox, I want to
minimize the collateral damage we inflict on our existing codebase. I&rsquo;d hate to
throw out a bunch of mostly fine code.</p>
<p>Instead, we&rsquo;ll store the resolution in a way that makes the most out of our
existing Environment class. Recall how the accesses of <code>a</code> are interpreted in
the problematic example.</p><img src="image/resolving-and-binding/environment-3.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the global environment." />
<p>In the first (correct) evaluation, we look at three environments in the chain
before finding the global declaration of <code>a</code>. Then, when the inner <code>a</code> is later
declared in a block scope, it shadows the global one.</p><img src="image/resolving-and-binding/environment-5.png" alt="An empty environment for showA()'s body linking to the previous two. 'a' is resolved in the block environment." />
<p>The next lookup walks the chain, finds <code>a</code> in the <em>second</em> environment and
stops there. Each environment corresponds to a single lexical scope where
variables are declared. If we could ensure a variable lookup always walked the
<em>same</em> number of links in the environment chain, that would ensure that it
found the same variable in the same scope every time.</p>
<p>To &ldquo;resolve&rdquo; a variable usage, we only need to calculate how many &ldquo;hops&rdquo; away
the declared variable will be in the environment chain. The interesting question
is <em>when</em> to do this calculation<span class="em">&mdash;</span>or, put differently, where in our
interpreter&rsquo;s implementation do we stuff the code for it?</p>
<p>Since we&rsquo;re calculating a static property based on the structure of the source
code, the obvious answer is in the parser. That is the traditional home, and is
where we&rsquo;ll put it later in clox. It would work here too, but I want an excuse to
show you another technique. We&rsquo;ll write our resolver as a separate pass.</p>
<h3><a href="#a-variable-resolution-pass" id="a-variable-resolution-pass"><small>11&#8202;.&#8202;2&#8202;.&#8202;1</small>A variable resolution pass</a></h3>
<p>After the parser produces the syntax tree, but before the interpreter starts
executing it, we&rsquo;ll do a single walk over the tree to resolve all of the
variables it contains. Additional passes between parsing and execution are
common. If Lox had static types, we could slide a type checker in there.
Optimizations are often implemented in separate passes like this too. Basically,
any work that doesn&rsquo;t rely on state that&rsquo;s only available at runtime can be done
in this way.</p>
<p>Our variable resolution pass works like a sort of mini-interpreter. It walks the
tree, visiting each node, but a static analysis is different from a dynamic
execution:</p>
<ul>
<li>
<p><strong>There are no side effects.</strong> When the static analysis visits a print
statement, it doesn&rsquo;t actually print anything. Calls to native functions or
other operations that reach out to the outside world are stubbed out and
have no effect.</p>
</li>
<li>
<p><strong>There is no control flow.</strong> Loops are visited only <span
name="fix">once</span>. Both branches are visited in <code>if</code> statements. Logic
operators are not short-circuited.</p>
</li>
</ul>
<aside name="fix">
<p>Variable resolution touches each node once, so its performance is <em>O(n)</em> where
<em>n</em> is the number of syntax tree nodes. More sophisticated analyses may have
greater complexity, but most are carefully designed to be linear or not far from
it. It&rsquo;s an embarrassing faux pas if your compiler gets exponentially slower as
the user&rsquo;s program grows.</p>
</aside>
<h2><a href="#a-resolver-class" id="a-resolver-class"><small>11&#8202;.&#8202;3</small>A Resolver Class</a></h2>
<p>Like everything in Java, our variable resolution pass is embodied in a class.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.HashMap</span>;
<span class="k">import</span> <span class="i">java.util.List</span>;
<span class="k">import</span> <span class="i">java.util.Map</span>;
<span class="k">import</span> <span class="i">java.util.Stack</span>;

<span class="k">class</span> <span class="t">Resolver</span> <span class="k">implements</span> <span class="t">Expr</span>.<span class="t">Visitor</span>&lt;<span class="t">Void</span>&gt;, <span class="t">Stmt</span>.<span class="t">Visitor</span>&lt;<span class="t">Void</span>&gt; {
  <span class="k">private</span> <span class="k">final</span> <span class="t">Interpreter</span> <span class="i">interpreter</span>;

  <span class="t">Resolver</span>(<span class="t">Interpreter</span> <span class="i">interpreter</span>) {
    <span class="k">this</span>.<span class="i">interpreter</span> = <span class="i">interpreter</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, create new file</div>

<p>Since the resolver needs to visit every node in the syntax tree, it implements
the visitor abstraction we already have in place. Only a few kinds of nodes are
interesting when it comes to resolving variables:</p>
<ul>
<li>
<p>A block statement introduces a new scope for the statements it contains.</p>
</li>
<li>
<p>A function declaration introduces a new scope for its body and binds its
parameters in that scope.</p>
</li>
<li>
<p>A variable declaration adds a new variable to the current scope.</p>
</li>
<li>
<p>Variable and assignment expressions need to have their variables resolved.</p>
</li>
</ul>
<p>The rest of the nodes don&rsquo;t do anything special, but we still need to implement
visit methods for them that traverse into their subtrees. Even though a <code>+</code>
expression doesn&rsquo;t <em>itself</em> have any variables to resolve, either of its
operands might.</p>
<h3><a href="#resolving-blocks" id="resolving-blocks"><small>11&#8202;.&#8202;3&#8202;.&#8202;1</small>Resolving blocks</a></h3>
<p>We start with blocks since they create the local scopes where all the magic
happens.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>Resolver</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitBlockStmt</span>(<span class="t">Stmt</span>.<span class="t">Block</span> <span class="i">stmt</span>) {
    <span class="i">beginScope</span>();
    <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">statements</span>);
    <span class="i">endScope</span>();
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>Resolver</em>()</div>

<p>This begins a new scope, traverses into the statements inside the block, and
then discards the scope. The fun stuff lives in those helper methods. We start
with the simple one.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>Resolver</em>()</div>
<pre>  <span class="t">void</span> <span class="i">resolve</span>(<span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span>) {
    <span class="k">for</span> (<span class="t">Stmt</span> <span class="i">statement</span> : <span class="i">statements</span>) {
      <span class="i">resolve</span>(<span class="i">statement</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>Resolver</em>()</div>

<p>This walks a list of statements and resolves each one. It in turn calls:</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitBlockStmt</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">resolve</span>(<span class="t">Stmt</span> <span class="i">stmt</span>) {
    <span class="i">stmt</span>.<span class="i">accept</span>(<span class="k">this</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitBlockStmt</em>()</div>

<p>While we&rsquo;re at it, let&rsquo;s add another overload that we&rsquo;ll need later for
resolving an expression.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>resolve</em>(Stmt stmt)</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">resolve</span>(<span class="t">Expr</span> <span class="i">expr</span>) {
    <span class="i">expr</span>.<span class="i">accept</span>(<span class="k">this</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>resolve</em>(Stmt stmt)</div>

<p>These methods are similar to the <code>evaluate()</code> and <code>execute()</code> methods in
Interpreter<span class="em">&mdash;</span>they turn around and apply the Visitor pattern to the given
syntax tree node.</p>
<p>The real interesting behavior is around scopes. A new block scope is created
like so:</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>resolve</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">beginScope</span>() {
    <span class="i">scopes</span>.<span class="i">push</span>(<span class="k">new</span> <span class="t">HashMap</span>&lt;<span class="t">String</span>, <span class="t">Boolean</span>&gt;());
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>resolve</em>()</div>

<p>Lexical scopes nest in both the interpreter and the resolver. They behave like a
stack. The interpreter implements that stack using a linked list<span class="em">&mdash;</span>the chain of
Environment objects. In the resolver, we use an actual Java Stack.</p>
<div class="codehilite"><pre class="insert-before">  private final Interpreter interpreter;
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in class <em>Resolver</em></div>
<pre class="insert">  <span class="k">private</span> <span class="k">final</span> <span class="t">Stack</span>&lt;<span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">Boolean</span>&gt;&gt; <span class="i">scopes</span> = <span class="k">new</span> <span class="t">Stack</span>&lt;&gt;();
</pre><pre class="insert-after">

  Resolver(Interpreter interpreter) {
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in class <em>Resolver</em></div>

<p>This field keeps track of the stack of scopes currently, uh, in scope. Each
element in the stack is a Map representing a single block scope. Keys, as in
Environment, are variable names. The values are Booleans, for a reason I&rsquo;ll
explain soon.</p>
<p>The scope stack is only used for local block scopes. Variables declared at the
top level in the global scope are not tracked by the resolver since they are
more dynamic in Lox. When resolving a variable, if we can&rsquo;t find it in the stack
of local scopes, we assume it must be global.</p>
<p>Since scopes are stored in an explicit stack, exiting one is straightforward.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>beginScope</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">endScope</span>() {
    <span class="i">scopes</span>.<span class="i">pop</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>beginScope</em>()</div>

<p>Now we can push and pop a stack of empty scopes. Let&rsquo;s put some things in them.</p>
<h3><a href="#resolving-variable-declarations" id="resolving-variable-declarations"><small>11&#8202;.&#8202;3&#8202;.&#8202;2</small>Resolving variable declarations</a></h3>
<p>Resolving a variable declaration adds a new entry to the current innermost
scope&rsquo;s map. That seems simple, but there&rsquo;s a little dance we need to do.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitBlockStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitVarStmt</span>(<span class="t">Stmt</span>.<span class="t">Var</span> <span class="i">stmt</span>) {
    <span class="i">declare</span>(<span class="i">stmt</span>.<span class="i">name</span>);
    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">initializer</span> != <span class="k">null</span>) {
      <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">initializer</span>);
    }
    <span class="i">define</span>(<span class="i">stmt</span>.<span class="i">name</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitBlockStmt</em>()</div>

<p>We split binding into two steps, declaring then defining, in order to handle
funny edge cases like this:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer&quot;</span>;
{
  <span class="k">var</span> <span class="i">a</span> = <span class="i">a</span>;
}
</pre></div>
<p>What happens when the initializer for a local variable refers to a variable with
the same name as the variable being declared? We have a few options:</p>
<ol>
<li>
<p><strong>Run the initializer, then put the new variable in scope.</strong> Here, the new
local <code>a</code> would be initialized with &ldquo;outer&rdquo;, the value of the <em>global</em> one.
In other words, the previous declaration would desugar to:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">temp</span> = <span class="i">a</span>; <span class="c">// Run the initializer.</span>
<span class="k">var</span> <span class="i">a</span>;        <span class="c">// Declare the variable.</span>
<span class="i">a</span> = <span class="i">temp</span>;     <span class="c">// Initialize it.</span>
</pre></div>
</li>
<li>
<p><strong>Put the new variable in scope, then run the initializer.</strong> This means you
could observe a variable before it&rsquo;s initialized, so we would need to figure
out what value it would have then. Probably <code>nil</code>. That means the new local
<code>a</code> would be re-initialized to its own implicitly initialized value, <code>nil</code>.
Now the desugaring would look like:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span>; <span class="c">// Define the variable.</span>
<span class="i">a</span> = <span class="i">a</span>; <span class="c">// Run the initializer.</span>
</pre></div>
</li>
<li>
<p><strong>Make it an error to reference a variable in its initializer.</strong> Have the
interpreter fail either at compile time or runtime if an initializer
mentions the variable being initialized.</p>
</li>
</ol>
<p>Do either of those first two options look like something a user actually
<em>wants</em>? Shadowing is rare and often an error, so initializing a shadowing
variable based on the value of the shadowed one seems unlikely to be deliberate.</p>
<p>The second option is even less useful. The new variable will <em>always</em> have the
value <code>nil</code>. There is never any point in mentioning it by name. You could use an
explicit <code>nil</code> instead.</p>
<p>Since the first two options are likely to mask user errors, we&rsquo;ll take the
third. Further, we&rsquo;ll make it a compile error instead of a runtime one. That
way, the user is alerted to the problem before any code is run.</p>
<p>In order to do that, as we visit expressions, we need to know if we&rsquo;re inside
the initializer for some variable. We do that by splitting binding into two
steps. The first is <strong>declaring</strong> it.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>endScope</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">declare</span>(<span class="t">Token</span> <span class="i">name</span>) {
    <span class="k">if</span> (<span class="i">scopes</span>.<span class="i">isEmpty</span>()) <span class="k">return</span>;

    <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">Boolean</span>&gt; <span class="i">scope</span> = <span class="i">scopes</span>.<span class="i">peek</span>();
    <span class="i">scope</span>.<span class="i">put</span>(<span class="i">name</span>.<span class="i">lexeme</span>, <span class="k">false</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>endScope</em>()</div>

<p>Declaration adds the variable to the innermost scope so that it shadows any
outer one and so that we know the variable exists. We mark it as &ldquo;not ready yet&rdquo;
by binding its name to <code>false</code> in the scope map. The value associated with a key
in the scope map represents whether or not we have finished resolving that
variable&rsquo;s initializer.</p>
<p>After declaring the variable, we resolve its initializer expression in that same
scope where the new variable now exists but is unavailable. Once the initializer
expression is done, the variable is ready for prime time. We do that by
<strong>defining</strong> it.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>declare</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">define</span>(<span class="t">Token</span> <span class="i">name</span>) {
    <span class="k">if</span> (<span class="i">scopes</span>.<span class="i">isEmpty</span>()) <span class="k">return</span>;
    <span class="i">scopes</span>.<span class="i">peek</span>().<span class="i">put</span>(<span class="i">name</span>.<span class="i">lexeme</span>, <span class="k">true</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>declare</em>()</div>

<p>We set the variable&rsquo;s value in the scope map to <code>true</code> to mark it as fully
initialized and available for use. It&rsquo;s alive! </p>
<h3><a href="#resolving-variable-expressions" id="resolving-variable-expressions"><small>11&#8202;.&#8202;3&#8202;.&#8202;3</small>Resolving variable expressions</a></h3>
<p>Variable declarations<span class="em">&mdash;</span>and function declarations, which we&rsquo;ll get to<span class="em">&mdash;</span>write
to the scope maps. Those maps are read when we resolve variable expressions.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitVarStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitVariableExpr</span>(<span class="t">Expr</span>.<span class="t">Variable</span> <span class="i">expr</span>) {
    <span class="k">if</span> (!<span class="i">scopes</span>.<span class="i">isEmpty</span>() &amp;&amp;
        <span class="i">scopes</span>.<span class="i">peek</span>().<span class="i">get</span>(<span class="i">expr</span>.<span class="i">name</span>.<span class="i">lexeme</span>) == <span class="t">Boolean</span>.<span class="i">FALSE</span>) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">expr</span>.<span class="i">name</span>,
          <span class="s">&quot;Can&#39;t read local variable in its own initializer.&quot;</span>);
    }

    <span class="i">resolveLocal</span>(<span class="i">expr</span>, <span class="i">expr</span>.<span class="i">name</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitVarStmt</em>()</div>

<p>First, we check to see if the variable is being accessed inside its own
initializer. This is where the values in the scope map come into play. If the
variable exists in the current scope but its value is <code>false</code>, that means we
have declared it but not yet defined it. We report that error.</p>
<p>After that check, we actually resolve the variable itself using this helper:</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>define</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">resolveLocal</span>(<span class="t">Expr</span> <span class="i">expr</span>, <span class="t">Token</span> <span class="i">name</span>) {
    <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="i">scopes</span>.<span class="i">size</span>() - <span class="n">1</span>; <span class="i">i</span> &gt;= <span class="n">0</span>; <span class="i">i</span>--) {
      <span class="k">if</span> (<span class="i">scopes</span>.<span class="i">get</span>(<span class="i">i</span>).<span class="i">containsKey</span>(<span class="i">name</span>.<span class="i">lexeme</span>)) {
        <span class="i">interpreter</span>.<span class="i">resolve</span>(<span class="i">expr</span>, <span class="i">scopes</span>.<span class="i">size</span>() - <span class="n">1</span> - <span class="i">i</span>);
        <span class="k">return</span>;
      }
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>define</em>()</div>

<p>This looks, for good reason, a lot like the code in Environment for evaluating a
variable. We start at the innermost scope and work outwards, looking in each map
for a matching name. If we find the variable, we resolve it, passing in the
number of scopes between the current innermost scope and the scope where the
variable was found. So, if the variable was found in the current scope, we
pass in 0. If it&rsquo;s in the immediately enclosing scope, 1. You get the idea.</p>
<p>If we walk through all of the block scopes and never find the variable, we leave
it unresolved and assume it&rsquo;s global. We&rsquo;ll get to the implementation of that
<code>resolve()</code> method a little later. For now, let&rsquo;s keep on cranking through the
other syntax nodes.</p>
<h3><a href="#resolving-assignment-expressions" id="resolving-assignment-expressions"><small>11&#8202;.&#8202;3&#8202;.&#8202;4</small>Resolving assignment expressions</a></h3>
<p>The other expression that references a variable is assignment. Resolving one
looks like this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitVarStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitAssignExpr</span>(<span class="t">Expr</span>.<span class="t">Assign</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">value</span>);
    <span class="i">resolveLocal</span>(<span class="i">expr</span>, <span class="i">expr</span>.<span class="i">name</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitVarStmt</em>()</div>

<p>First, we resolve the expression for the assigned value in case it also contains
references to other variables. Then we use our existing <code>resolveLocal()</code> method
to resolve the variable that&rsquo;s being assigned to.</p>
<h3><a href="#resolving-function-declarations" id="resolving-function-declarations"><small>11&#8202;.&#8202;3&#8202;.&#8202;5</small>Resolving function declarations</a></h3>
<p>Finally, functions. Functions both bind names and introduce a scope. The name of
the function itself is bound in the surrounding scope where the function is
declared. When we step into the function&rsquo;s body, we also bind its parameters
into that inner function scope.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitBlockStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitFunctionStmt</span>(<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">stmt</span>) {
    <span class="i">declare</span>(<span class="i">stmt</span>.<span class="i">name</span>);
    <span class="i">define</span>(<span class="i">stmt</span>.<span class="i">name</span>);

    <span class="i">resolveFunction</span>(<span class="i">stmt</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitBlockStmt</em>()</div>

<p>Similar to <code>visitVariableStmt()</code>, we declare and define the name of the function
in the current scope. Unlike variables, though, we define the name eagerly,
before resolving the function&rsquo;s body. This lets a function recursively refer to
itself inside its own body.</p>
<p>Then we resolve the function&rsquo;s body using this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>resolve</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">resolveFunction</span>(<span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">function</span>) {
    <span class="i">beginScope</span>();
    <span class="k">for</span> (<span class="t">Token</span> <span class="i">param</span> : <span class="i">function</span>.<span class="i">params</span>) {
      <span class="i">declare</span>(<span class="i">param</span>);
      <span class="i">define</span>(<span class="i">param</span>);
    }
    <span class="i">resolve</span>(<span class="i">function</span>.<span class="i">body</span>);
    <span class="i">endScope</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>resolve</em>()</div>

<p>It&rsquo;s a separate method since we will also use it for resolving Lox methods when
we add classes later. It creates a new scope for the body and then binds
variables for each of the function&rsquo;s parameters.</p>
<p>Once that&rsquo;s ready, it resolves the function body in that scope. This is
different from how the interpreter handles function declarations. At <em>runtime</em>,
declaring a function doesn&rsquo;t do anything with the function&rsquo;s body. The body
doesn&rsquo;t get touched until later when the function is called. In a <em>static</em>
analysis, we immediately traverse into the body right then and there.</p>
<h3><a href="#resolving-the-other-syntax-tree-nodes" id="resolving-the-other-syntax-tree-nodes"><small>11&#8202;.&#8202;3&#8202;.&#8202;6</small>Resolving the other syntax tree nodes</a></h3>
<p>That covers the interesting corners of the grammars. We handle every place where
a variable is declared, read, or written, and every place where a scope is
created or destroyed. Even though they aren&rsquo;t affected by variable resolution,
we also need visit methods for all of the other syntax tree nodes in order to
recurse into their subtrees. <span name="boring">Sorry</span> this bit is
boring, but bear with me. We&rsquo;ll go kind of &ldquo;top down&rdquo; and start with statements.</p>
<aside name="boring">
<p>I did say the book would have every single line of code for these interpreters.
I didn&rsquo;t say they&rsquo;d all be exciting.</p>
</aside>
<p>An expression statement contains a single expression to traverse.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitBlockStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitExpressionStmt</span>(<span class="t">Stmt</span>.<span class="t">Expression</span> <span class="i">stmt</span>) {
    <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">expression</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitBlockStmt</em>()</div>

<p>An if statement has an expression for its condition and one or two statements
for the branches.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitFunctionStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitIfStmt</span>(<span class="t">Stmt</span>.<span class="t">If</span> <span class="i">stmt</span>) {
    <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">condition</span>);
    <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">thenBranch</span>);
    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">elseBranch</span> != <span class="k">null</span>) <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">elseBranch</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitFunctionStmt</em>()</div>

<p>Here, we see how resolution is different from interpretation. When we resolve an
<code>if</code> statement, there is no control flow. We resolve the condition and <em>both</em>
branches. Where a dynamic execution steps only into the branch that <em>is</em> run, a
static analysis is conservative<span class="em">&mdash;</span>it analyzes any branch that <em>could</em> be run.
Since either one could be reached at runtime, we resolve both.</p>
<p>Like expression statements, a <code>print</code> statement contains a single subexpression.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitIfStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitPrintStmt</span>(<span class="t">Stmt</span>.<span class="t">Print</span> <span class="i">stmt</span>) {
    <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">expression</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitIfStmt</em>()</div>

<p>Same deal for return.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitPrintStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitReturnStmt</span>(<span class="t">Stmt</span>.<span class="t">Return</span> <span class="i">stmt</span>) {
    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">value</span> != <span class="k">null</span>) {
      <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">value</span>);
    }

    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitPrintStmt</em>()</div>

<p>As in <code>if</code> statements, with a <code>while</code> statement, we resolve its condition and
resolve the body exactly once.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitVarStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitWhileStmt</span>(<span class="t">Stmt</span>.<span class="t">While</span> <span class="i">stmt</span>) {
    <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">condition</span>);
    <span class="i">resolve</span>(<span class="i">stmt</span>.<span class="i">body</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitVarStmt</em>()</div>

<p>That covers all the statements. On to expressions<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<p>Our old friend the binary expression. We traverse into and resolve both
operands.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitAssignExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitBinaryExpr</span>(<span class="t">Expr</span>.<span class="t">Binary</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">left</span>);
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">right</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitAssignExpr</em>()</div>

<p>Calls are similar<span class="em">&mdash;</span>we walk the argument list and resolve them all. The thing
being called is also an expression (usually a variable expression), so that gets
resolved too.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitBinaryExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitCallExpr</span>(<span class="t">Expr</span>.<span class="t">Call</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">callee</span>);

    <span class="k">for</span> (<span class="t">Expr</span> <span class="i">argument</span> : <span class="i">expr</span>.<span class="i">arguments</span>) {
      <span class="i">resolve</span>(<span class="i">argument</span>);
    }

    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitBinaryExpr</em>()</div>

<p>Parentheses are easy.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitCallExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitGroupingExpr</span>(<span class="t">Expr</span>.<span class="t">Grouping</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">expression</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitCallExpr</em>()</div>

<p>Literals are easiest of all.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitGroupingExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitLiteralExpr</span>(<span class="t">Expr</span>.<span class="t">Literal</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitGroupingExpr</em>()</div>

<p>A literal expression doesn&rsquo;t mention any variables and doesn&rsquo;t contain any
subexpressions so there is no work to do.</p>
<p>Since a static analysis does no control flow or short-circuiting, logical
expressions are exactly the same as other binary operators.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitLiteralExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitLogicalExpr</span>(<span class="t">Expr</span>.<span class="t">Logical</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">left</span>);
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">right</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitLiteralExpr</em>()</div>

<p>And, finally, the last node. We resolve its one operand.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>visitLogicalExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitUnaryExpr</span>(<span class="t">Expr</span>.<span class="t">Unary</span> <span class="i">expr</span>) {
    <span class="i">resolve</span>(<span class="i">expr</span>.<span class="i">right</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>visitLogicalExpr</em>()</div>

<p>With all of these visit methods, the Java compiler should be satisfied that
Resolver fully implements Stmt.Visitor and Expr.Visitor. Now is a good time to
take a break, have a snack, maybe a little nap.</p>
<h2><a href="#interpreting-resolved-variables" id="interpreting-resolved-variables"><small>11&#8202;.&#8202;4</small>Interpreting Resolved Variables</a></h2>
<p>Let&rsquo;s see what our resolver is good for. Each time it visits a variable, it
tells the interpreter how many scopes there are between the current scope and
the scope where the variable is defined. At runtime, this corresponds exactly to
the number of <em>environments</em> between the current one and the enclosing one where
the interpreter can find the variable&rsquo;s value. The resolver hands that number to
the interpreter by calling this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>execute</em>()</div>
<pre>  <span class="t">void</span> <span class="i">resolve</span>(<span class="t">Expr</span> <span class="i">expr</span>, <span class="t">int</span> <span class="i">depth</span>) {
    <span class="i">locals</span>.<span class="i">put</span>(<span class="i">expr</span>, <span class="i">depth</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>execute</em>()</div>

<p>We want to store the resolution information somewhere so we can use it when the
variable or assignment expression is later executed, but where? One obvious
place is right in the syntax tree node itself. That&rsquo;s a fine approach, and
that&rsquo;s where many compilers store the results of analyses like this.</p>
<p>We could do that, but it would require mucking around with our syntax tree
generator. Instead, we&rsquo;ll take another common approach and store it off to the
<span name="side">side</span> in a map that associates each syntax tree node
with its resolved data.</p>
<aside name="side">
<p>I <em>think</em> I&rsquo;ve heard this map called a &ldquo;side table&rdquo; since it&rsquo;s a tabular data
structure that stores data separately from the objects it relates to. But
whenever I try to Google for that term, I get pages about furniture.</p>
</aside>
<p>Interactive tools like IDEs often incrementally reparse and re-resolve parts of
the user&rsquo;s program. It may be hard to find all of the bits of state that need
recalculating when they&rsquo;re hiding in the foliage of the syntax tree. A benefit
of storing this data outside of the nodes is that it makes it easy to <em>discard</em>
it<span class="em">&mdash;</span>simply clear the map.</p>
<div class="codehilite"><pre class="insert-before">  private Environment environment = globals;
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em></div>
<pre class="insert">  <span class="k">private</span> <span class="k">final</span> <span class="t">Map</span>&lt;<span class="t">Expr</span>, <span class="t">Integer</span>&gt; <span class="i">locals</span> = <span class="k">new</span> <span class="t">HashMap</span>&lt;&gt;();
</pre><pre class="insert-after">

  Interpreter() {
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em></div>

<p>You might think we&rsquo;d need some sort of nested tree structure to avoid getting
confused when there are multiple expressions that reference the same variable,
but each expression node is its own Java object with its own unique identity. A
single monolithic map doesn&rsquo;t have any trouble keeping them separated.</p>
<p>As usual, using a collection requires us to import a couple of names.</p>
<div class="codehilite"><pre class="insert-before">import java.util.ArrayList;
</pre><div class="source-file"><em>lox/Interpreter.java</em></div>
<pre class="insert"><span class="k">import</span> <span class="i">java.util.HashMap</span>;
</pre><pre class="insert-after">import java.util.List;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em></div>

<p>And:</p>
<div class="codehilite"><pre class="insert-before">import java.util.List;
</pre><div class="source-file"><em>lox/Interpreter.java</em></div>
<pre class="insert"><span class="k">import</span> <span class="i">java.util.Map</span>;
</pre><pre class="insert-after">

class Interpreter implements Expr.Visitor&lt;Object&gt;,
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em></div>

<h3><a href="#accessing-a-resolved-variable" id="accessing-a-resolved-variable"><small>11&#8202;.&#8202;4&#8202;.&#8202;1</small>Accessing a resolved variable</a></h3>
<p>Our interpreter now has access to each variable&rsquo;s resolved location. Finally, we
get to make use of that. We replace the visit method for variable expressions
with this:</p>
<div class="codehilite"><pre class="insert-before">  public Object visitVariableExpr(Expr.Variable expr) {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitVariableExpr</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">return</span> <span class="i">lookUpVariable</span>(<span class="i">expr</span>.<span class="i">name</span>, <span class="i">expr</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitVariableExpr</em>(), replace 1 line</div>

<p>That delegates to:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitVariableExpr</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Object</span> <span class="i">lookUpVariable</span>(<span class="t">Token</span> <span class="i">name</span>, <span class="t">Expr</span> <span class="i">expr</span>) {
    <span class="t">Integer</span> <span class="i">distance</span> = <span class="i">locals</span>.<span class="i">get</span>(<span class="i">expr</span>);
    <span class="k">if</span> (<span class="i">distance</span> != <span class="k">null</span>) {
      <span class="k">return</span> <span class="i">environment</span>.<span class="i">getAt</span>(<span class="i">distance</span>, <span class="i">name</span>.<span class="i">lexeme</span>);
    } <span class="k">else</span> {
      <span class="k">return</span> <span class="i">globals</span>.<span class="i">get</span>(<span class="i">name</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitVariableExpr</em>()</div>

<p>There are a couple of things going on here. First, we look up the resolved
distance in the map. Remember that we resolved only <em>local</em> variables. Globals
are treated specially and don&rsquo;t end up in the map (hence the name <code>locals</code>). So,
if we don&rsquo;t find a distance in the map, it must be global. In that case, we
look it up, dynamically, directly in the global environment. That throws a
runtime error if the variable isn&rsquo;t defined.</p>
<p>If we <em>do</em> get a distance, we have a local variable, and we get to take
advantage of the results of our static analysis. Instead of calling <code>get()</code>, we
call this new method on Environment:</p>
<div class="codehilite"><div class="source-file"><em>lox/Environment.java</em><br>
add after <em>define</em>()</div>
<pre>  <span class="t">Object</span> <span class="i">getAt</span>(<span class="t">int</span> <span class="i">distance</span>, <span class="t">String</span> <span class="i">name</span>) {
    <span class="k">return</span> <span class="i">ancestor</span>(<span class="i">distance</span>).<span class="i">values</span>.<span class="i">get</span>(<span class="i">name</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, add after <em>define</em>()</div>

<p>The old <code>get()</code> method dynamically walks the chain of enclosing environments,
scouring each one to see if the variable might be hiding in there somewhere. But
now we know exactly which environment in the chain will have the variable. We
reach it using this helper method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Environment.java</em><br>
add after <em>define</em>()</div>
<pre>  <span class="t">Environment</span> <span class="i">ancestor</span>(<span class="t">int</span> <span class="i">distance</span>) {
    <span class="t">Environment</span> <span class="i">environment</span> = <span class="k">this</span>;
    <span class="k">for</span> (<span class="t">int</span> <span class="i">i</span> = <span class="n">0</span>; <span class="i">i</span> &lt; <span class="i">distance</span>; <span class="i">i</span>++) {
      <span class="i">environment</span> = <span class="i">environment</span>.<span class="i">enclosing</span>;<span name="coupled"> </span>
    }

    <span class="k">return</span> <span class="i">environment</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, add after <em>define</em>()</div>

<p>This walks a fixed number of hops up the parent chain and returns the
environment there. Once we have that, <code>getAt()</code> simply returns the value of the
variable in that environment&rsquo;s map. It doesn&rsquo;t even have to check to see if the
variable is there<span class="em">&mdash;</span>we know it will be because the resolver already found it
before.</p>
<aside name="coupled">
<p>The way the interpreter assumes the variable is in that map feels like flying
blind. The interpreter code trusts that the resolver did its job and resolved
the variable correctly. This implies a deep coupling between these two classes.
In the resolver, each line of code that touches a scope must have its exact
match in the interpreter for modifying an environment.</p>
<p>I felt that coupling firsthand because as I wrote the code for the book, I
ran into a couple of subtle bugs where the resolver and interpreter code were
slightly out of sync. Tracking those down was difficult. One tool to make that
easier is to have the interpreter explicitly assert<span class="em">&mdash;</span>using Java&rsquo;s assert
statements or some other validation tool<span class="em">&mdash;</span>the contract it expects the resolver
to have already upheld.</p>
</aside>
<h3><a href="#assigning-to-a-resolved-variable" id="assigning-to-a-resolved-variable"><small>11&#8202;.&#8202;4&#8202;.&#8202;2</small>Assigning to a resolved variable</a></h3>
<p>We can also use a variable by assigning to it. The changes to visiting an
assignment expression are similar.</p>
<div class="codehilite"><pre class="insert-before">  public Object visitAssignExpr(Expr.Assign expr) {
    Object value = evaluate(expr.value);
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in <em>visitAssignExpr</em>()<br>
replace 1 line</div>
<pre class="insert">

    <span class="t">Integer</span> <span class="i">distance</span> = <span class="i">locals</span>.<span class="i">get</span>(<span class="i">expr</span>);
    <span class="k">if</span> (<span class="i">distance</span> != <span class="k">null</span>) {
      <span class="i">environment</span>.<span class="i">assignAt</span>(<span class="i">distance</span>, <span class="i">expr</span>.<span class="i">name</span>, <span class="i">value</span>);
    } <span class="k">else</span> {
      <span class="i">globals</span>.<span class="i">assign</span>(<span class="i">expr</span>.<span class="i">name</span>, <span class="i">value</span>);
    }

</pre><pre class="insert-after">    return value;
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in <em>visitAssignExpr</em>(), replace 1 line</div>

<p>Again, we look up the variable&rsquo;s scope distance. If not found, we assume it&rsquo;s
global and handle it the same way as before. Otherwise, we call this new method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Environment.java</em><br>
add after <em>getAt</em>()</div>
<pre>  <span class="t">void</span> <span class="i">assignAt</span>(<span class="t">int</span> <span class="i">distance</span>, <span class="t">Token</span> <span class="i">name</span>, <span class="t">Object</span> <span class="i">value</span>) {
    <span class="i">ancestor</span>(<span class="i">distance</span>).<span class="i">values</span>.<span class="i">put</span>(<span class="i">name</span>.<span class="i">lexeme</span>, <span class="i">value</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, add after <em>getAt</em>()</div>

<p>As <code>getAt()</code> is to <code>get()</code>, <code>assignAt()</code> is to <code>assign()</code>. It walks a fixed
number of environments, and then stuffs the new value in that map.</p>
<p>Those are the only changes to Interpreter. This is why I chose a representation
for our resolved data that was minimally invasive. All of the rest of the nodes
continue working as they did before. Even the code for modifying environments is
unchanged.</p>
<h3><a href="#running-the-resolver" id="running-the-resolver"><small>11&#8202;.&#8202;4&#8202;.&#8202;3</small>Running the resolver</a></h3>
<p>We do need to actually <em>run</em> the resolver, though. We insert the new pass after
the parser does its magic.</p>
<div class="codehilite"><pre class="insert-before">    // Stop if there was a syntax error.
    if (hadError) return;

</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>run</em>()</div>
<pre class="insert">    <span class="t">Resolver</span> <span class="i">resolver</span> = <span class="k">new</span> <span class="t">Resolver</span>(<span class="i">interpreter</span>);
    <span class="i">resolver</span>.<span class="i">resolve</span>(<span class="i">statements</span>);

</pre><pre class="insert-after">    interpreter.interpret(statements);
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>run</em>()</div>

<p>We don&rsquo;t run the resolver if there are any parse errors. If the code has a
syntax error, it&rsquo;s never going to run, so there&rsquo;s little value in resolving it.
If the syntax is clean, we tell the resolver to do its thing. The resolver has a
reference to the interpreter and pokes the resolution data directly into it as
it walks over variables. When the interpreter runs next, it has everything it
needs.</p>
<p>At least, that&rsquo;s true if the resolver <em>succeeds</em>. But what about errors during
resolution?</p>
<h2><a href="#resolution-errors" id="resolution-errors"><small>11&#8202;.&#8202;5</small>Resolution Errors</a></h2>
<p>Since we are doing a semantic analysis pass, we have an opportunity to make
Lox&rsquo;s semantics more precise, and to help users catch bugs early before running
their code. Take a look at this bad boy:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">bad</span>() {
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;first&quot;</span>;
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;second&quot;</span>;
}
</pre></div>
<p>We do allow declaring multiple variables with the same name in the <em>global</em>
scope, but doing so in a local scope is probably a mistake. If they knew the
variable already existed, they would have assigned to it instead of using <code>var</code>.
And if they <em>didn&rsquo;t</em> know it existed, they probably didn&rsquo;t intend to overwrite
the previous one.</p>
<p>We can detect this mistake statically while resolving.</p>
<div class="codehilite"><pre class="insert-before">    Map&lt;String, Boolean&gt; scope = scopes.peek();
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>declare</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">scope</span>.<span class="i">containsKey</span>(<span class="i">name</span>.<span class="i">lexeme</span>)) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">name</span>,
          <span class="s">&quot;Already a variable with this name in this scope.&quot;</span>);
    }

</pre><pre class="insert-after">    scope.put(name.lexeme, false);
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>declare</em>()</div>

<p>When we declare a variable in a local scope, we already know the names of every
variable previously declared in that same scope. If we see a collision, we
report an error.</p>
<h3><a href="#invalid-return-errors" id="invalid-return-errors"><small>11&#8202;.&#8202;5&#8202;.&#8202;1</small>Invalid return errors</a></h3>
<p>Here&rsquo;s another nasty little script:</p>
<div class="codehilite"><pre><span class="k">return</span> <span class="s">&quot;at top level&quot;</span>;
</pre></div>
<p>This executes a <code>return</code> statement, but it&rsquo;s not even inside a function at all.
It&rsquo;s top-level code. I don&rsquo;t know what the user <em>thinks</em> is going to happen, but
I don&rsquo;t think we want Lox to allow this.</p>
<p>We can extend the resolver to detect this statically. Much like we track scopes
as we walk the tree, we can track whether or not the code we are currently
visiting is inside a function declaration.</p>
<div class="codehilite"><pre class="insert-before">  private final Stack&lt;Map&lt;String, Boolean&gt;&gt; scopes = new Stack&lt;&gt;();
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in class <em>Resolver</em></div>
<pre class="insert">  <span class="k">private</span> <span class="t">FunctionType</span> <span class="i">currentFunction</span> = <span class="t">FunctionType</span>.<span class="i">NONE</span>;
</pre><pre class="insert-after">

  Resolver(Interpreter interpreter) {
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in class <em>Resolver</em></div>

<p>Instead of a bare Boolean, we use this funny enum:</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
add after <em>Resolver</em>()</div>
<pre>  <span class="k">private</span> <span class="k">enum</span> <span class="t">FunctionType</span> {
    <span class="i">NONE</span>,
    <span class="i">FUNCTION</span>
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, add after <em>Resolver</em>()</div>

<p>It seems kind of dumb now, but we&rsquo;ll add a couple more cases to it later and
then it will make more sense. When we resolve a function declaration, we pass
that in.</p>
<div class="codehilite"><pre class="insert-before">    define(stmt.name);

</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitFunctionStmt</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">resolveFunction</span>(<span class="i">stmt</span>, <span class="t">FunctionType</span>.<span class="i">FUNCTION</span>);
</pre><pre class="insert-after">    return null;
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitFunctionStmt</em>(), replace 1 line</div>

<p>Over in <code>resolveFunction()</code>, we take that parameter and store it in the field
before resolving the body.</p>
<div class="codehilite"><div class="source-file"><em>lox/Resolver.java</em><br>
method <em>resolveFunction</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">private</span> <span class="t">void</span> <span class="i">resolveFunction</span>(
      <span class="t">Stmt</span>.<span class="t">Function</span> <span class="i">function</span>, <span class="t">FunctionType</span> <span class="i">type</span>) {
    <span class="t">FunctionType</span> <span class="i">enclosingFunction</span> = <span class="i">currentFunction</span>;
    <span class="i">currentFunction</span> = <span class="i">type</span>;

</pre><pre class="insert-after">    beginScope();
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, method <em>resolveFunction</em>(), replace 1 line</div>

<p>We stash the previous value of the field in a local variable first. Remember,
Lox has local functions, so you can nest function declarations arbitrarily
deeply. We need to track not just that we&rsquo;re in a function, but <em>how many</em> we&rsquo;re
in.</p>
<p>We could use an explicit stack of FunctionType values for that, but instead
we&rsquo;ll piggyback on the JVM. We store the previous value in a local on the Java
stack. When we&rsquo;re done resolving the function body, we restore the field to that
value.</p>
<div class="codehilite"><pre class="insert-before">    endScope();
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>resolveFunction</em>()</div>
<pre class="insert">    <span class="i">currentFunction</span> = <span class="i">enclosingFunction</span>;
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>resolveFunction</em>()</div>

<p>Now that we can always tell whether or not we&rsquo;re inside a function declaration,
we check that when resolving a <code>return</code> statement.</p>
<div class="codehilite"><pre class="insert-before">  public Void visitReturnStmt(Stmt.Return stmt) {
</pre><div class="source-file"><em>lox/Resolver.java</em><br>
in <em>visitReturnStmt</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">currentFunction</span> == <span class="t">FunctionType</span>.<span class="i">NONE</span>) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">stmt</span>.<span class="i">keyword</span>, <span class="s">&quot;Can&#39;t return from top-level code.&quot;</span>);
    }

</pre><pre class="insert-after">    if (stmt.value != null) {
</pre></div>
<div class="source-file-narrow"><em>lox/Resolver.java</em>, in <em>visitReturnStmt</em>()</div>

<p>Neat, right?</p>
<p>There&rsquo;s one more piece. Back in the main Lox class that stitches everything
together, we are careful to not run the interpreter if any parse errors are
encountered. That check runs <em>before</em> the resolver so that we don&rsquo;t try to
resolve syntactically invalid code.</p>
<p>But we also need to skip the interpreter if there are resolution errors, so we
add <em>another</em> check.</p>
<div class="codehilite"><pre class="insert-before">    resolver.resolve(statements);
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>run</em>()</div>
<pre class="insert">

    <span class="c">// Stop if there was a resolution error.</span>
    <span class="k">if</span> (<span class="i">hadError</span>) <span class="k">return</span>;
</pre><pre class="insert-after">

    interpreter.interpret(statements);
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>run</em>()</div>

<p>You could imagine doing lots of other analysis in here. For example, if we added
<code>break</code> statements to Lox, we would probably want to ensure they are only used
inside loops.</p>
<p>We could go farther and report warnings for code that isn&rsquo;t necessarily <em>wrong</em>
but probably isn&rsquo;t useful. For example, many IDEs will warn if you have
unreachable code after a <code>return</code> statement, or a local variable whose value is
never read. All of that would be pretty easy to add to our static visiting pass,
or as <span name="separate">separate</span> passes.</p>
<aside name="separate">
<p>The choice of how many different analyses to lump into a single pass is
difficult. Many small isolated passes, each with their own responsibility, are
simpler to implement and maintain. However, there is a real runtime cost to
traversing the syntax tree itself, so bundling multiple analyses into a single
pass is usually faster.</p>
</aside>
<p>But, for now, we&rsquo;ll stick with that limited amount of analysis. The important
part is that we fixed that one weird annoying edge case bug, though it might be
surprising that it took this much work to do it.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Why is it safe to eagerly define the variable bound to a function&rsquo;s name
when other variables must wait until after they are initialized before they
can be used?</p>
</li>
<li>
<p>How do other languages you know handle local variables that refer to the
same name in their initializer, like:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer&quot;</span>;
{
  <span class="k">var</span> <span class="i">a</span> = <span class="i">a</span>;
}
</pre></div>
<p>Is it a runtime error? Compile error? Allowed? Do they treat global
variables differently? Do you agree with their choices? Justify your answer.</p>
</li>
<li>
<p>Extend the resolver to report an error if a local variable is never used.</p>
</li>
<li>
<p>Our resolver calculates <em>which</em> environment the variable is found in, but
it&rsquo;s still looked up by name in that map. A more efficient environment
representation would store local variables in an array and look them up by
index.</p>
<p>Extend the resolver to associate a unique index for each local variable
declared in a scope. When resolving a variable access, look up both the
scope the variable is in and its index and store that. In the interpreter,
use that to quickly access a variable by its index instead of using a map.</p>
</li>
</ol>
</div>

<footer>
<a href="classes.html" class="next">
  Next Chapter: &ldquo;Classes&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/scanning-on-demand.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Scanning on Demand &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Scanning on Demand<small>16</small></a></h3>

<ul>
    <li><a href="#spinning-up-the-interpreter"><small>16.1</small> Spinning Up the Interpreter</a></li>
    <li><a href="#a-token-at-a-time"><small>16.2</small> A Token at a Time</a></li>
    <li><a href="#a-lexical-grammar-for-lox"><small>16.3</small> A Lexical Grammar for Lox</a></li>
    <li><a href="#identifiers-and-keywords"><small>16.4</small> Identifiers and Keywords</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="a-virtual-machine.html" title="A Virtual Machine" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="compiling-expressions.html" title="Compiling Expressions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="a-virtual-machine.html" title="A Virtual Machine" class="prev">←</a>
<a href="compiling-expressions.html" title="Compiling Expressions" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Scanning on Demand<small>16</small></a></h3>

<ul>
    <li><a href="#spinning-up-the-interpreter"><small>16.1</small> Spinning Up the Interpreter</a></li>
    <li><a href="#a-token-at-a-time"><small>16.2</small> A Token at a Time</a></li>
    <li><a href="#a-lexical-grammar-for-lox"><small>16.3</small> A Lexical Grammar for Lox</a></li>
    <li><a href="#identifiers-and-keywords"><small>16.4</small> Identifiers and Keywords</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="a-virtual-machine.html" title="A Virtual Machine" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="compiling-expressions.html" title="Compiling Expressions" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">16</div>
  <h1>Scanning on Demand</h1>

<blockquote>
<p>Literature is idiosyncratic arrangements in horizontal lines in only
twenty-six phonetic symbols, ten Arabic numbers, and about eight punctuation
marks.</p>
<p><cite>Kurt Vonnegut, <em>Like Shaking Hands With God: A Conversation about Writing</em></cite></p>
</blockquote>
<p>Our second interpreter, clox, has three phases<span class="em">&mdash;</span>scanner, compiler, and virtual
machine. A data structure joins each pair of phases. Tokens flow from scanner to
compiler, and chunks of bytecode from compiler to VM. We began our
implementation near the end with <a href="chunks-of-bytecode.html">chunks</a> and the <a href="a-virtual-machine.html">VM</a>. Now, we&rsquo;re going to
hop back to the beginning and build a scanner that makes tokens. In the
<a href="compiling-expressions.html">next chapter</a>, we&rsquo;ll tie the two ends together with our bytecode compiler.</p><img src="image/scanning-on-demand/pipeline.png" alt="Source code &rarr; scanner &rarr; tokens &rarr; compiler &rarr; bytecode chunk &rarr; VM." />
<p>I&rsquo;ll admit, this is not the most exciting chapter in the book. With two
implementations of the same language, there&rsquo;s bound to be some redundancy. I did
sneak in a few interesting differences compared to jlox&rsquo;s scanner. Read on to
see what they are.</p>
<h2><a href="#spinning-up-the-interpreter" id="spinning-up-the-interpreter"><small>16&#8202;.&#8202;1</small>Spinning Up the Interpreter</a></h2>
<p>Now that we&rsquo;re building the front end, we can get clox running like a real
interpreter. No more hand-authored chunks of bytecode. It&rsquo;s time for a REPL and
script loading. Tear out most of the code in <code>main()</code> and replace it with:</p>
<div class="codehilite"><pre class="insert-before">int main(int argc, const char* argv[]) {
  initVM();

</pre><div class="source-file"><em>main.c</em><br>
in <em>main</em>()<br>
replace 26 lines</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">argc</span> == <span class="n">1</span>) {
    <span class="i">repl</span>();
  } <span class="k">else</span> <span class="k">if</span> (<span class="i">argc</span> == <span class="n">2</span>) {
    <span class="i">runFile</span>(<span class="i">argv</span>[<span class="n">1</span>]);
  } <span class="k">else</span> {
    <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;Usage: clox [path]</span><span class="e">\n</span><span class="s">&quot;</span>);
    <span class="i">exit</span>(<span class="n">64</span>);
  }

  <span class="i">freeVM</span>();
</pre><pre class="insert-after">  return 0;
}
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>main</em>(), replace 26 lines</div>

<p>If you pass <span name="args">no arguments</span> to the executable, you are
dropped into the REPL. A single command line argument is understood to be the
path to a script to run.</p>
<aside name="args">
<p>The code tests for one and two arguments, not zero and one, because the first
argument in <code>argv</code> is always the name of the executable being run.</p>
</aside>
<p>We&rsquo;ll need a few system headers, so let&rsquo;s get them all out of the way.</p>
<div class="codehilite"><div class="source-file"><em>main.c</em><br>
add to top of file</div>
<pre class="insert"><span class="a">#include &lt;stdio.h&gt;</span>
<span class="a">#include &lt;stdlib.h&gt;</span>
<span class="a">#include &lt;string.h&gt;</span>

</pre><pre class="insert-after">#include &quot;common.h&quot;
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, add to top of file</div>

<p>Next, we get the REPL up and REPL-ing.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;vm.h&quot;
</pre><div class="source-file"><em>main.c</em></div>
<pre class="insert">

<span class="k">static</span> <span class="t">void</span> <span class="i">repl</span>() {
  <span class="t">char</span> <span class="i">line</span>[<span class="n">1024</span>];
  <span class="k">for</span> (;;) {
    <span class="i">printf</span>(<span class="s">&quot;&gt; &quot;</span>);

    <span class="k">if</span> (!<span class="i">fgets</span>(<span class="i">line</span>, <span class="k">sizeof</span>(<span class="i">line</span>), <span class="i">stdin</span>)) {
      <span class="i">printf</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>);
      <span class="k">break</span>;
    }

    <span class="i">interpret</span>(<span class="i">line</span>);
  }
}
</pre></div>
<div class="source-file-narrow"><em>main.c</em></div>

<p>A quality REPL handles input that spans multiple lines gracefully and doesn&rsquo;t
have a hardcoded line length limit. This REPL here is a little more, ahem,
austere, but it&rsquo;s fine for our purposes.</p>
<p>The real work happens in <code>interpret()</code>. We&rsquo;ll get to that soon, but first let&rsquo;s
take care of loading scripts.</p>
<div class="codehilite"><div class="source-file"><em>main.c</em><br>
add after <em>repl</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">runFile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">path</span>) {
  <span class="t">char</span>* <span class="i">source</span> = <span class="i">readFile</span>(<span class="i">path</span>);
  <span class="t">InterpretResult</span> <span class="i">result</span> = <span class="i">interpret</span>(<span class="i">source</span>);
  <span class="i">free</span>(<span class="i">source</span>);<span name="owner"> </span>

  <span class="k">if</span> (<span class="i">result</span> == <span class="a">INTERPRET_COMPILE_ERROR</span>) <span class="i">exit</span>(<span class="n">65</span>);
  <span class="k">if</span> (<span class="i">result</span> == <span class="a">INTERPRET_RUNTIME_ERROR</span>) <span class="i">exit</span>(<span class="n">70</span>);
}
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, add after <em>repl</em>()</div>

<p>We read the file and execute the resulting string of Lox source code. Then,
based on the result of that, we set the exit code appropriately because we&rsquo;re
scrupulous tool builders and care about little details like that.</p>
<p>We also need to free the source code string because <code>readFile()</code> dynamically
allocates it and passes ownership to its caller. That function looks like this:</p>
<aside name="owner">
<p>C asks us not just to manage memory explicitly, but <em>mentally</em>. We programmers
have to remember the ownership rules and hand-implement them throughout the
program. Java just does it for us. C++ gives us tools to encode the policy
directly so that the compiler validates it for us.</p>
<p>I like C&rsquo;s simplicity, but we pay a real price for it<span class="em">&mdash;</span>the language requires
us to be more conscientious.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>main.c</em><br>
add after <em>repl</em>()</div>
<pre><span class="k">static</span> <span class="t">char</span>* <span class="i">readFile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">path</span>) {
  <span class="a">FILE</span>* <span class="i">file</span> = <span class="i">fopen</span>(<span class="i">path</span>, <span class="s">&quot;rb&quot;</span>);

  <span class="i">fseek</span>(<span class="i">file</span>, <span class="n">0L</span>, <span class="a">SEEK_END</span>);
  <span class="t">size_t</span> <span class="i">fileSize</span> = <span class="i">ftell</span>(<span class="i">file</span>);
  <span class="i">rewind</span>(<span class="i">file</span>);

  <span class="t">char</span>* <span class="i">buffer</span> = (<span class="t">char</span>*)<span class="i">malloc</span>(<span class="i">fileSize</span> + <span class="n">1</span>);
  <span class="t">size_t</span> <span class="i">bytesRead</span> = <span class="i">fread</span>(<span class="i">buffer</span>, <span class="k">sizeof</span>(<span class="t">char</span>), <span class="i">fileSize</span>, <span class="i">file</span>);
  <span class="i">buffer</span>[<span class="i">bytesRead</span>] = <span class="s">&#39;\0&#39;</span>;

  <span class="i">fclose</span>(<span class="i">file</span>);
  <span class="k">return</span> <span class="i">buffer</span>;
}
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, add after <em>repl</em>()</div>

<p>Like a lot of C code, it takes more effort than it seems like it should,
especially for a language expressly designed for operating systems. The
difficult part is that we want to allocate a big enough string to read the whole
file, but we don&rsquo;t know how big the file is until we&rsquo;ve read it.</p>
<p>The code here is the classic trick to solve that. We open the file, but before
reading it, we seek to the very end using <code>fseek()</code>. Then we call <code>ftell()</code>
which tells us how many bytes we are from the start of the file. Since we seeked
(sought?) to the end, that&rsquo;s the size. We rewind back to the beginning, allocate
a string of that <span name="one">size</span>, and read the whole file in a
single batch.</p>
<aside name="one">
<p>Well, that size <em>plus one</em>. Always gotta remember to make room for the null
byte.</p>
</aside>
<p>So we&rsquo;re done, right? Not quite. These function calls, like most calls in the C
standard library, can fail. If this were Java, the failures would be thrown as
exceptions and automatically unwind the stack so we wouldn&rsquo;t <em>really</em> need to
handle them. In C, if we don&rsquo;t check for them, they silently get ignored.</p>
<p>This isn&rsquo;t really a book on good C programming practice, but I hate to encourage
bad style, so let&rsquo;s go ahead and handle the errors. It&rsquo;s good for us, like
eating our vegetables or flossing.</p>
<p>Fortunately, we don&rsquo;t need to do anything particularly clever if a failure
occurs. If we can&rsquo;t correctly read the user&rsquo;s script, all we can really do is
tell the user and exit the interpreter gracefully. First of all, we might fail
to open the file.</p>
<div class="codehilite"><pre class="insert-before">  FILE* file = fopen(path, &quot;rb&quot;);
</pre><div class="source-file"><em>main.c</em><br>
in <em>readFile</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">file</span> == <span class="a">NULL</span>) {
    <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;Could not open file </span><span class="e">\&quot;</span><span class="s">%s</span><span class="e">\&quot;</span><span class="s">.</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">path</span>);
    <span class="i">exit</span>(<span class="n">74</span>);
  }
</pre><pre class="insert-after">

  fseek(file, 0L, SEEK_END);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>readFile</em>()</div>

<p>This can happen if the file doesn&rsquo;t exist or the user doesn&rsquo;t have access to it.
It&rsquo;s pretty common<span class="em">&mdash;</span>people mistype paths all the time.</p>
<p>This failure is much rarer:</p>
<div class="codehilite"><pre class="insert-before">  char* buffer = (char*)malloc(fileSize + 1);
</pre><div class="source-file"><em>main.c</em><br>
in <em>readFile</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">buffer</span> == <span class="a">NULL</span>) {
    <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;Not enough memory to read </span><span class="e">\&quot;</span><span class="s">%s</span><span class="e">\&quot;</span><span class="s">.</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">path</span>);
    <span class="i">exit</span>(<span class="n">74</span>);
  }

</pre><pre class="insert-after">  size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>readFile</em>()</div>

<p>If we can&rsquo;t even allocate enough memory to read the Lox script, the user&rsquo;s
probably got bigger problems to worry about, but we should do our best to at
least let them know.</p>
<p>Finally, the read itself may fail.</p>
<div class="codehilite"><pre class="insert-before">  size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
</pre><div class="source-file"><em>main.c</em><br>
in <em>readFile</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">bytesRead</span> &lt; <span class="i">fileSize</span>) {
    <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;Could not read file </span><span class="e">\&quot;</span><span class="s">%s</span><span class="e">\&quot;</span><span class="s">.</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">path</span>);
    <span class="i">exit</span>(<span class="n">74</span>);
  }

</pre><pre class="insert-after">  buffer[bytesRead] = '\0';
</pre></div>
<div class="source-file-narrow"><em>main.c</em>, in <em>readFile</em>()</div>

<p>This is also unlikely. Actually, the <span name="printf"> calls</span> to
<code>fseek()</code>, <code>ftell()</code>, and <code>rewind()</code> could theoretically fail too, but let&rsquo;s not
go too far off in the weeds, shall we?</p>
<aside name="printf">
<p>Even good old <code>printf()</code> can fail. Yup. How many times have you handled <em>that</em>
error?</p>
</aside>
<h3><a href="#opening-the-compilation-pipeline" id="opening-the-compilation-pipeline"><small>16&#8202;.&#8202;1&#8202;.&#8202;1</small>Opening the compilation pipeline</a></h3>
<p>We&rsquo;ve got ourselves a string of Lox source code, so now we&rsquo;re ready to set up a
pipeline to scan, compile, and execute it. It&rsquo;s driven by <code>interpret()</code>. Right
now, that function runs our old hardcoded test chunk. Let&rsquo;s change it to
something closer to its final incarnation.</p>
<div class="codehilite"><pre class="insert-before">void freeVM();
</pre><div class="source-file"><em>vm.h</em><br>
function <em>interpret</em>()<br>
replace 1 line</div>
<pre class="insert"><span class="t">InterpretResult</span> <span class="i">interpret</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>);
</pre><pre class="insert-after">void push(Value value);
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, function <em>interpret</em>(), replace 1 line</div>

<p>Where before we passed in a Chunk, now we pass in the string of source code.
Here&rsquo;s the new implementation:</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
function <em>interpret</em>()<br>
replace 4 lines</div>
<pre class="insert"><span class="t">InterpretResult</span> <span class="i">interpret</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>) {
  <span class="i">compile</span>(<span class="i">source</span>);
  <span class="k">return</span> <span class="a">INTERPRET_OK</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, function <em>interpret</em>(), replace 4 lines</div>

<p>We won&rsquo;t build the actual <em>compiler</em> yet in this chapter, but we can start
laying out its structure. It lives in a new module.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;
</pre><div class="source-file"><em>vm.c</em></div>
<pre class="insert"><span class="a">#include &quot;compiler.h&quot;</span>
</pre><pre class="insert-after">#include &quot;debug.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em></div>

<p>For now, the one function in it is declared like so:</p>
<div class="codehilite"><div class="source-file"><em>compiler.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_compiler_h</span>
<span class="a">#define clox_compiler_h</span>

<span class="t">void</span> <span class="i">compile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>);

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>compiler.h</em>, create new file</div>

<p>That signature will change, but it gets us going.</p>
<p>The first phase of compilation is scanning<span class="em">&mdash;</span>the thing we&rsquo;re doing in this
chapter<span class="em">&mdash;</span>so right now all the compiler does is set that up.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdio.h&gt;</span>

<span class="a">#include &quot;common.h&quot;</span>
<span class="a">#include &quot;compiler.h&quot;</span>
<span class="a">#include &quot;scanner.h&quot;</span>

<span class="t">void</span> <span class="i">compile</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>) {
  <span class="i">initScanner</span>(<span class="i">source</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, create new file</div>

<p>This will also grow in later chapters, naturally.</p>
<h3><a href="#the-scanner-scans" id="the-scanner-scans"><small>16&#8202;.&#8202;1&#8202;.&#8202;2</small>The scanner scans</a></h3>
<p>There are still a few more feet of scaffolding to stand up before we can start
writing useful code. First, a new header:</p>
<div class="codehilite"><div class="source-file"><em>scanner.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_scanner_h</span>
<span class="a">#define clox_scanner_h</span>

<span class="t">void</span> <span class="i">initScanner</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>);

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>scanner.h</em>, create new file</div>

<p>And its corresponding implementation:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdio.h&gt;</span>
<span class="a">#include &lt;string.h&gt;</span>

<span class="a">#include &quot;common.h&quot;</span>
<span class="a">#include &quot;scanner.h&quot;</span>

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="k">const</span> <span class="t">char</span>* <span class="i">start</span>;
  <span class="k">const</span> <span class="t">char</span>* <span class="i">current</span>;
  <span class="t">int</span> <span class="i">line</span>;
} <span class="t">Scanner</span>;

<span class="t">Scanner</span> <span class="i">scanner</span>;
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, create new file</div>

<p>As our scanner chews through the user&rsquo;s source code, it tracks how far it&rsquo;s
gone. Like we did with the VM, we wrap that state in a struct and then create a
single top-level module variable of that type so we don&rsquo;t have to pass it around
all of the various functions.</p>
<p>There are surprisingly few fields. The <code>start</code> pointer marks the beginning of
the current lexeme being scanned, and <code>current</code> points to the current character
being looked at.</p>
<p><span name="fields"></span></p><img src="image/scanning-on-demand/fields.png" alt="The start and current fields pointing at 'print bacon;'. Start points at 'b' and current points at 'o'." />
<aside name="fields">
<p>Here, we are in the middle of scanning the identifier <code>bacon</code>. The current
character is <code>o</code> and the character we most recently consumed is <code>c</code>.</p>
</aside>
<p>We have a <code>line</code> field to track what line the current lexeme is on for error
reporting. That&rsquo;s it! We don&rsquo;t even keep a pointer to the beginning of the
source code string. The scanner works its way through the code once and is done
after that.</p>
<p>Since we have some state, we should initialize it.</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after variable <em>scanner</em></div>
<pre><span class="t">void</span> <span class="i">initScanner</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">source</span>) {
  <span class="i">scanner</span>.<span class="i">start</span> = <span class="i">source</span>;
  <span class="i">scanner</span>.<span class="i">current</span> = <span class="i">source</span>;
  <span class="i">scanner</span>.<span class="i">line</span> = <span class="n">1</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after variable <em>scanner</em></div>

<p>We start at the very first character on the very first line, like a runner
crouched at the starting line.</p>
<h2><a href="#a-token-at-a-time" id="a-token-at-a-time"><small>16&#8202;.&#8202;2</small>A Token at a Time</a></h2>
<p>In jlox, when the starting gun went off, the scanner raced ahead and eagerly
scanned the whole program, returning a list of tokens. This would be a challenge
in clox. We&rsquo;d need some sort of growable array or list to store the tokens in.
We&rsquo;d need to manage allocating and freeing the tokens, and the collection
itself. That&rsquo;s a lot of code, and a lot of memory churn.</p>
<p>At any point in time, the compiler needs only one or two tokens<span class="em">&mdash;</span>remember our
grammar requires only a single token of lookahead<span class="em">&mdash;</span>so we don&rsquo;t need to keep
them <em>all</em> around at the same time. Instead, the simplest solution is to not
scan a token until the compiler needs one. When the scanner provides one, it
returns the token by value. It doesn&rsquo;t need to dynamically allocate anything<span class="em">&mdash;</span>it can just pass tokens around on the C stack.</p>
<p>Unfortunately, we don&rsquo;t have a compiler yet that can ask the scanner for tokens,
so the scanner will just sit there doing nothing. To kick it into action, we&rsquo;ll
write some temporary code to drive it.</p>
<div class="codehilite"><pre class="insert-before">  initScanner(source);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>compile</em>()</div>
<pre class="insert">  <span class="t">int</span> <span class="i">line</span> = -<span class="n">1</span>;
  <span class="k">for</span> (;;) {
    <span class="t">Token</span> <span class="i">token</span> = <span class="i">scanToken</span>();
    <span class="k">if</span> (<span class="i">token</span>.<span class="i">line</span> != <span class="i">line</span>) {
      <span class="i">printf</span>(<span class="s">&quot;%4d &quot;</span>, <span class="i">token</span>.<span class="i">line</span>);
      <span class="i">line</span> = <span class="i">token</span>.<span class="i">line</span>;
    } <span class="k">else</span> {
      <span class="i">printf</span>(<span class="s">&quot;   | &quot;</span>);
    }
    <span class="i">printf</span>(<span class="s">&quot;%2d &#39;%.*s&#39;</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">token</span>.<span class="i">type</span>, <span class="i">token</span>.<span class="i">length</span>, <span class="i">token</span>.<span class="i">start</span>);<span name="format"> </span>

    <span class="k">if</span> (<span class="i">token</span>.<span class="i">type</span> == <span class="a">TOKEN_EOF</span>) <span class="k">break</span>;
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>compile</em>()</div>

<aside name="format">
<p>That <code>%.*s</code> in the format string is a neat feature. Usually, you set the output
precision<span class="em">&mdash;</span>the number of characters to show<span class="em">&mdash;</span>by placing a number inside the
format string. Using <code>*</code> instead lets you pass the precision as an argument. So
that <code>printf()</code> call prints the first <code>token.length</code> characters of the string at
<code>token.start</code>. We need to limit the length like that because the lexeme points
into the original source string and doesn&rsquo;t have a terminator at the end.</p>
</aside>
<p>This loops indefinitely. Each turn through the loop, it scans one token and
prints it. When it reaches a special &ldquo;end of file&rdquo; token or an error, it stops.
For example, if we run the interpreter on this program:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="n">1</span> + <span class="n">2</span>;
</pre></div>
<p>It prints out:</p>
<div class="codehilite"><pre>   1 31 'print'
   | 21 '1'
   |  7 '+'
   | 21 '2'
   |  8 ';'
   2 39 ''
</pre></div>
<p>The first column is the line number, the second is the numeric value of the
token <span name="token">type</span>, and then finally the lexeme. That last
empty lexeme on line 2 is the EOF token.</p>
<aside name="token">
<p>Yeah, the raw index of the token type isn&rsquo;t exactly human readable, but it&rsquo;s all
C gives us.</p>
</aside>
<p>The goal for the rest of the chapter is to make that blob of code work by
implementing this key function:</p>
<div class="codehilite"><pre class="insert-before">void initScanner(const char* source);
</pre><div class="source-file"><em>scanner.h</em><br>
add after <em>initScanner</em>()</div>
<pre class="insert"><span class="t">Token</span> <span class="i">scanToken</span>();
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>scanner.h</em>, add after <em>initScanner</em>()</div>

<p>Each call scans and returns the next token in the source code. A token looks
like this:</p>
<div class="codehilite"><pre class="insert-before">#define clox_scanner_h
</pre><div class="source-file"><em>scanner.h</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">TokenType</span> <span class="i">type</span>;
  <span class="k">const</span> <span class="t">char</span>* <span class="i">start</span>;
  <span class="t">int</span> <span class="i">length</span>;
  <span class="t">int</span> <span class="i">line</span>;
} <span class="t">Token</span>;
</pre><pre class="insert-after">

void initScanner(const char* source);
</pre></div>
<div class="source-file-narrow"><em>scanner.h</em></div>

<p>It&rsquo;s pretty similar to jlox&rsquo;s Token class. We have an enum identifying what type
of token it is<span class="em">&mdash;</span>number, identifier, <code>+</code> operator, etc. The enum is virtually
identical to the one in jlox, so let&rsquo;s just hammer out the whole thing.</p>
<div class="codehilite"><pre class="insert-before">#ifndef clox_scanner_h
#define clox_scanner_h
</pre><div class="source-file"><em>scanner.h</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">enum</span> {
  <span class="c">// Single-character tokens.</span>
  <span class="a">TOKEN_LEFT_PAREN</span>, <span class="a">TOKEN_RIGHT_PAREN</span>,
  <span class="a">TOKEN_LEFT_BRACE</span>, <span class="a">TOKEN_RIGHT_BRACE</span>,
  <span class="a">TOKEN_COMMA</span>, <span class="a">TOKEN_DOT</span>, <span class="a">TOKEN_MINUS</span>, <span class="a">TOKEN_PLUS</span>,
  <span class="a">TOKEN_SEMICOLON</span>, <span class="a">TOKEN_SLASH</span>, <span class="a">TOKEN_STAR</span>,
  <span class="c">// One or two character tokens.</span>
  <span class="a">TOKEN_BANG</span>, <span class="a">TOKEN_BANG_EQUAL</span>,
  <span class="a">TOKEN_EQUAL</span>, <span class="a">TOKEN_EQUAL_EQUAL</span>,
  <span class="a">TOKEN_GREATER</span>, <span class="a">TOKEN_GREATER_EQUAL</span>,
  <span class="a">TOKEN_LESS</span>, <span class="a">TOKEN_LESS_EQUAL</span>,
  <span class="c">// Literals.</span>
  <span class="a">TOKEN_IDENTIFIER</span>, <span class="a">TOKEN_STRING</span>, <span class="a">TOKEN_NUMBER</span>,
  <span class="c">// Keywords.</span>
  <span class="a">TOKEN_AND</span>, <span class="a">TOKEN_CLASS</span>, <span class="a">TOKEN_ELSE</span>, <span class="a">TOKEN_FALSE</span>,
  <span class="a">TOKEN_FOR</span>, <span class="a">TOKEN_FUN</span>, <span class="a">TOKEN_IF</span>, <span class="a">TOKEN_NIL</span>, <span class="a">TOKEN_OR</span>,
  <span class="a">TOKEN_PRINT</span>, <span class="a">TOKEN_RETURN</span>, <span class="a">TOKEN_SUPER</span>, <span class="a">TOKEN_THIS</span>,
  <span class="a">TOKEN_TRUE</span>, <span class="a">TOKEN_VAR</span>, <span class="a">TOKEN_WHILE</span>,

  <span class="a">TOKEN_ERROR</span>, <span class="a">TOKEN_EOF</span>
} <span class="t">TokenType</span>;
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>scanner.h</em></div>

<p>Aside from prefixing all the names with <code>TOKEN_</code> (since C tosses enum names in
the top-level namespace) the only difference is that extra <code>TOKEN_ERROR</code> type.
What&rsquo;s that about?</p>
<p>There are only a couple of errors that get detected during scanning:
unterminated strings and unrecognized characters. In jlox, the scanner reports
those itself. In clox, the scanner produces a synthetic &ldquo;error&rdquo; token for that
error and passes it over to the compiler. This way, the compiler knows an error
occurred and can kick off error recovery before reporting it.</p>
<p>The novel part in clox&rsquo;s Token type is how it represents the lexeme. In jlox,
each Token stored the lexeme as its own separate little Java string. If we did
that for clox, we&rsquo;d have to figure out how to manage the memory for those
strings. That&rsquo;s especially hard since we pass tokens by value<span class="em">&mdash;</span>multiple tokens could point to the same lexeme string. Ownership gets weird.</p>
<p>Instead, we use the original source string as our character store. We represent
a lexeme by a pointer to its first character and the number of characters it
contains. This means we don&rsquo;t need to worry about managing memory for lexemes at
all and we can freely copy tokens around. As long as the main source code string
<span name="outlive">outlives</span> all of the tokens, everything works fine.</p>
<aside name="outlive">
<p>I don&rsquo;t mean to sound flippant. We really do need to think about and ensure that
the source string, which is created far away over in the &ldquo;main&rdquo; module, has a
long enough lifetime. That&rsquo;s why <code>runFile()</code> doesn&rsquo;t free the string until
<code>interpret()</code> finishes executing the code and returns.</p>
</aside>
<h3><a href="#scanning-tokens" id="scanning-tokens"><small>16&#8202;.&#8202;2&#8202;.&#8202;1</small>Scanning tokens</a></h3>
<p>We&rsquo;re ready to scan some tokens. We&rsquo;ll work our way up to the complete
implementation, starting with this:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>initScanner</em>()</div>
<pre><span class="t">Token</span> <span class="i">scanToken</span>() {
  <span class="i">scanner</span>.<span class="i">start</span> = <span class="i">scanner</span>.<span class="i">current</span>;

  <span class="k">if</span> (<span class="i">isAtEnd</span>()) <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_EOF</span>);

  <span class="k">return</span> <span class="i">errorToken</span>(<span class="s">&quot;Unexpected character.&quot;</span>);
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>initScanner</em>()</div>

<p>Since each call to this function scans a complete token, we know we are at the
beginning of a new token when we enter the function. Thus, we set
<code>scanner.start</code> to point to the current character so we remember where the
lexeme we&rsquo;re about to scan starts.</p>
<p>Then we check to see if we&rsquo;ve reached the end of the source code. If so, we
return an EOF token and stop. This is a sentinel value that signals to the
compiler to stop asking for more tokens.</p>
<p>If we aren&rsquo;t at the end, we do some<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>stuff<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>to scan the next token. But we
haven&rsquo;t written that code yet. We&rsquo;ll get to that soon. If that code doesn&rsquo;t
successfully scan and return a token, then we reach the end of the function.
That must mean we&rsquo;re at a character that the scanner can&rsquo;t recognize, so we
return an error token for that.</p>
<p>This function relies on a couple of helpers, most of which are familiar from
jlox. First up:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>initScanner</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">isAtEnd</span>() {
  <span class="k">return</span> *<span class="i">scanner</span>.<span class="i">current</span> == <span class="s">&#39;\0&#39;</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>initScanner</em>()</div>

<p>We require the source string to be a good null-terminated C string. If the
current character is the null byte, then we&rsquo;ve reached the end.</p>
<p>To create a token, we have this constructor-like function:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>isAtEnd</em>()</div>
<pre><span class="k">static</span> <span class="t">Token</span> <span class="i">makeToken</span>(<span class="t">TokenType</span> <span class="i">type</span>) {
  <span class="t">Token</span> <span class="i">token</span>;
  <span class="i">token</span>.<span class="i">type</span> = <span class="i">type</span>;
  <span class="i">token</span>.<span class="i">start</span> = <span class="i">scanner</span>.<span class="i">start</span>;
  <span class="i">token</span>.<span class="i">length</span> = (<span class="t">int</span>)(<span class="i">scanner</span>.<span class="i">current</span> - <span class="i">scanner</span>.<span class="i">start</span>);
  <span class="i">token</span>.<span class="i">line</span> = <span class="i">scanner</span>.<span class="i">line</span>;
  <span class="k">return</span> <span class="i">token</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>isAtEnd</em>()</div>

<p>It uses the scanner&rsquo;s <code>start</code> and <code>current</code> pointers to capture the token&rsquo;s
lexeme. It sets a couple of other obvious fields then returns the token. It has
a sister function for returning error tokens.</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>makeToken</em>()</div>
<pre><span class="k">static</span> <span class="t">Token</span> <span class="i">errorToken</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">message</span>) {
  <span class="t">Token</span> <span class="i">token</span>;
  <span class="i">token</span>.<span class="i">type</span> = <span class="a">TOKEN_ERROR</span>;
  <span class="i">token</span>.<span class="i">start</span> = <span class="i">message</span>;
  <span class="i">token</span>.<span class="i">length</span> = (<span class="t">int</span>)<span class="i">strlen</span>(<span class="i">message</span>);
  <span class="i">token</span>.<span class="i">line</span> = <span class="i">scanner</span>.<span class="i">line</span>;
  <span class="k">return</span> <span class="i">token</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>makeToken</em>()</div>

<p><span name="axolotl"></span></p>
<aside name="axolotl">
<p>This part of the chapter is pretty dry, so here&rsquo;s a picture of an axolotl.</p><img src="image/scanning-on-demand/axolotl.png" alt="A drawing of an axolotl." />
</aside>
<p>The only difference is that the &ldquo;lexeme&rdquo; points to the error message string
instead of pointing into the user&rsquo;s source code. Again, we need to ensure that
the error message sticks around long enough for the compiler to read it. In
practice, we only ever call this function with C string literals. Those are
constant and eternal, so we&rsquo;re fine.</p>
<p>What we have now is basically a working scanner for a language with an empty
lexical grammar. Since the grammar has no productions, every character is an
error. That&rsquo;s not exactly a fun language to program in, so let&rsquo;s fill in the
rules.</p>
<h2><a href="#a-lexical-grammar-for-lox" id="a-lexical-grammar-for-lox"><small>16&#8202;.&#8202;3</small>A Lexical Grammar for Lox</a></h2>
<p>The simplest tokens are only a single character. We recognize those like so:</p>
<div class="codehilite"><pre class="insert-before">  if (isAtEnd()) return makeToken(TOKEN_EOF);
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">

  <span class="t">char</span> <span class="i">c</span> = <span class="i">advance</span>();

  <span class="k">switch</span> (<span class="i">c</span>) {
    <span class="k">case</span> <span class="s">&#39;(&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_LEFT_PAREN</span>);
    <span class="k">case</span> <span class="s">&#39;)&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_RIGHT_PAREN</span>);
    <span class="k">case</span> <span class="s">&#39;{&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_LEFT_BRACE</span>);
    <span class="k">case</span> <span class="s">&#39;}&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_RIGHT_BRACE</span>);
    <span class="k">case</span> <span class="s">&#39;;&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_SEMICOLON</span>);
    <span class="k">case</span> <span class="s">&#39;,&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_COMMA</span>);
    <span class="k">case</span> <span class="s">&#39;.&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_DOT</span>);
    <span class="k">case</span> <span class="s">&#39;-&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_MINUS</span>);
    <span class="k">case</span> <span class="s">&#39;+&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_PLUS</span>);
    <span class="k">case</span> <span class="s">&#39;/&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_SLASH</span>);
    <span class="k">case</span> <span class="s">&#39;*&#39;</span>: <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_STAR</span>);
  }
</pre><pre class="insert-after">

  return errorToken(&quot;Unexpected character.&quot;);
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>scanToken</em>()</div>

<p>We read the next character from the source code, and then do a straightforward
switch to see if it matches any of Lox&rsquo;s one-character lexemes. To read the next
character, we use a new helper which consumes the current character and returns
it.</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>isAtEnd</em>()</div>
<pre><span class="k">static</span> <span class="t">char</span> <span class="i">advance</span>() {
  <span class="i">scanner</span>.<span class="i">current</span>++;
  <span class="k">return</span> <span class="i">scanner</span>.<span class="i">current</span>[-<span class="n">1</span>];
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>isAtEnd</em>()</div>

<p>Next up are the two-character punctuation tokens like <code>!=</code> and <code>&gt;=</code>. Each of
these also has a corresponding single-character token. That means that when we
see a character like <code>!</code>, we don&rsquo;t know if we&rsquo;re in a <code>!</code> token or a <code>!=</code> until
we look at the next character too. We handle those like so:</p>
<div class="codehilite"><pre class="insert-before">    case '*': return makeToken(TOKEN_STAR);
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="s">&#39;!&#39;</span>:
      <span class="k">return</span> <span class="i">makeToken</span>(
          <span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="a">TOKEN_BANG_EQUAL</span> : <span class="a">TOKEN_BANG</span>);
    <span class="k">case</span> <span class="s">&#39;=&#39;</span>:
      <span class="k">return</span> <span class="i">makeToken</span>(
          <span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="a">TOKEN_EQUAL_EQUAL</span> : <span class="a">TOKEN_EQUAL</span>);
    <span class="k">case</span> <span class="s">&#39;&lt;&#39;</span>:
      <span class="k">return</span> <span class="i">makeToken</span>(
          <span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="a">TOKEN_LESS_EQUAL</span> : <span class="a">TOKEN_LESS</span>);
    <span class="k">case</span> <span class="s">&#39;&gt;&#39;</span>:
      <span class="k">return</span> <span class="i">makeToken</span>(
          <span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="a">TOKEN_GREATER_EQUAL</span> : <span class="a">TOKEN_GREATER</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>scanToken</em>()</div>

<p>After consuming the first character, we look for an <code>=</code>. If found, we consume it
and return the corresponding two-character token. Otherwise, we leave the
current character alone (so it can be part of the <em>next</em> token) and return the
appropriate one-character token.</p>
<p>That logic for conditionally consuming the second character lives here:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>advance</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">match</span>(<span class="t">char</span> <span class="i">expected</span>) {
  <span class="k">if</span> (<span class="i">isAtEnd</span>()) <span class="k">return</span> <span class="k">false</span>;
  <span class="k">if</span> (*<span class="i">scanner</span>.<span class="i">current</span> != <span class="i">expected</span>) <span class="k">return</span> <span class="k">false</span>;
  <span class="i">scanner</span>.<span class="i">current</span>++;
  <span class="k">return</span> <span class="k">true</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>advance</em>()</div>

<p>If the current character is the desired one, we advance and return <code>true</code>.
Otherwise, we return <code>false</code> to indicate it wasn&rsquo;t matched.</p>
<p>Now our scanner supports all of the punctuation-like tokens. Before we get to
the longer ones, let&rsquo;s take a little side trip to handle characters that aren&rsquo;t
part of a token at all.</p>
<h3><a href="#whitespace" id="whitespace"><small>16&#8202;.&#8202;3&#8202;.&#8202;1</small>Whitespace</a></h3>
<p>Our scanner needs to handle spaces, tabs, and newlines, but those characters
don&rsquo;t become part of any token&rsquo;s lexeme. We could check for those inside the
main character switch in <code>scanToken()</code> but it gets a little tricky to ensure
that the function still correctly finds the next token <em>after</em> the whitespace
when you call it. We&rsquo;d have to wrap the whole body of the function in a loop or
something.</p>
<p>Instead, before starting the token, we shunt off to a separate function.</p>
<div class="codehilite"><pre class="insert-before">Token scanToken() {
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">  <span class="i">skipWhitespace</span>();
</pre><pre class="insert-after">  scanner.start = scanner.current;
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>scanToken</em>()</div>

<p>This advances the scanner past any leading whitespace. After this call returns,
we know the very next character is a meaningful one (or we&rsquo;re at the end of the
source code).</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>errorToken</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">skipWhitespace</span>() {
  <span class="k">for</span> (;;) {
    <span class="t">char</span> <span class="i">c</span> = <span class="i">peek</span>();
    <span class="k">switch</span> (<span class="i">c</span>) {
      <span class="k">case</span> <span class="s">&#39; &#39;</span>:
      <span class="k">case</span> <span class="s">&#39;\r&#39;</span>:
      <span class="k">case</span> <span class="s">&#39;\t&#39;</span>:
        <span class="i">advance</span>();
        <span class="k">break</span>;
      <span class="k">default</span>:
        <span class="k">return</span>;
    }
  }
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>errorToken</em>()</div>

<p>It&rsquo;s sort of a separate mini-scanner. It loops, consuming every whitespace
character it encounters. We need to be careful that it does <em>not</em> consume any
<em>non</em>-whitespace characters. To support that, we use this:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>advance</em>()</div>
<pre><span class="k">static</span> <span class="t">char</span> <span class="i">peek</span>() {
  <span class="k">return</span> *<span class="i">scanner</span>.<span class="i">current</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>advance</em>()</div>

<p>This simply returns the current character, but doesn&rsquo;t consume it. The previous
code handles all the whitespace characters except for newlines.</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>skipWhitespace</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="s">&#39;\n&#39;</span>:
        <span class="i">scanner</span>.<span class="i">line</span>++;
        <span class="i">advance</span>();
        <span class="k">break</span>;
</pre><pre class="insert-after">      default:
        return;
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>skipWhitespace</em>()</div>

<p>When we consume one of those, we also bump the current line number.</p>
<h3><a href="#comments" id="comments"><small>16&#8202;.&#8202;3&#8202;.&#8202;2</small>Comments</a></h3>
<p>Comments aren&rsquo;t technically &ldquo;whitespace&rdquo;, if you want to get all precise with
your terminology, but as far as Lox is concerned, they may as well be, so we
skip those too.</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>skipWhitespace</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="s">&#39;/&#39;</span>:
        <span class="k">if</span> (<span class="i">peekNext</span>() == <span class="s">&#39;/&#39;</span>) {
          <span class="c">// A comment goes until the end of the line.</span>
          <span class="k">while</span> (<span class="i">peek</span>() != <span class="s">&#39;\n&#39;</span> &amp;&amp; !<span class="i">isAtEnd</span>()) <span class="i">advance</span>();
        } <span class="k">else</span> {
          <span class="k">return</span>;
        }
        <span class="k">break</span>;
</pre><pre class="insert-after">      default:
        return;
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>skipWhitespace</em>()</div>

<p>Comments start with <code>//</code> in Lox, so as with <code>!=</code> and friends, we need a second
character of lookahead. However, with <code>!=</code>, we still wanted to consume the <code>!</code>
even if the <code>=</code> wasn&rsquo;t found. Comments are different. If we don&rsquo;t find a second
<code>/</code>, then <code>skipWhitespace()</code> needs to not consume the <em>first</em> slash either.</p>
<p>To handle that, we add:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>peek</em>()</div>
<pre><span class="k">static</span> <span class="t">char</span> <span class="i">peekNext</span>() {
  <span class="k">if</span> (<span class="i">isAtEnd</span>()) <span class="k">return</span> <span class="s">&#39;\0&#39;</span>;
  <span class="k">return</span> <span class="i">scanner</span>.<span class="i">current</span>[<span class="n">1</span>];
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>peek</em>()</div>

<p>This is like <code>peek()</code> but for one character past the current one. If the current
character and the next one are both <code>/</code>, we consume them and then any other
characters until the next newline or the end of the source code.</p>
<p>We use <code>peek()</code> to check for the newline but not consume it. That way, the
newline will be the current character on the next turn of the outer loop in
<code>skipWhitespace()</code> and we&rsquo;ll recognize it and increment <code>scanner.line</code>.</p>
<h3><a href="#literal-tokens" id="literal-tokens"><small>16&#8202;.&#8202;3&#8202;.&#8202;3</small>Literal tokens</a></h3>
<p>Number and string tokens are special because they have a runtime value
associated with them. We&rsquo;ll start with strings because they are easy to
recognize<span class="em">&mdash;</span>they always begin with a double quote.</p>
<div class="codehilite"><pre class="insert-before">          match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="s">&#39;&quot;&#39;</span>: <span class="k">return</span> <span class="i">string</span>();
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>scanToken</em>()</div>

<p>That calls a new function.</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>skipWhitespace</em>()</div>
<pre><span class="k">static</span> <span class="t">Token</span> <span class="i">string</span>() {
  <span class="k">while</span> (<span class="i">peek</span>() != <span class="s">&#39;&quot;&#39;</span> &amp;&amp; !<span class="i">isAtEnd</span>()) {
    <span class="k">if</span> (<span class="i">peek</span>() == <span class="s">&#39;\n&#39;</span>) <span class="i">scanner</span>.<span class="i">line</span>++;
    <span class="i">advance</span>();
  }

  <span class="k">if</span> (<span class="i">isAtEnd</span>()) <span class="k">return</span> <span class="i">errorToken</span>(<span class="s">&quot;Unterminated string.&quot;</span>);

  <span class="c">// The closing quote.</span>
  <span class="i">advance</span>();
  <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_STRING</span>);
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>skipWhitespace</em>()</div>

<p>Similar to jlox, we consume characters until we reach the closing quote. We also
track newlines inside the string literal. (Lox supports multi-line strings.)
And, as ever, we gracefully handle running out of source code before we find the
end quote.</p>
<p>The main change here in clox is something that&rsquo;s <em>not</em> present. Again, it
relates to memory management. In jlox, the Token class had a field of type
Object to store the runtime value converted from the literal token&rsquo;s lexeme.</p>
<p>Implementing that in C would require a lot of work. We&rsquo;d need some sort of union
and type tag to tell whether the token contains a string or double value. If
it&rsquo;s a string, we&rsquo;d need to manage the memory for the string&rsquo;s character array
somehow.</p>
<p>Instead of adding that complexity to the scanner, we defer <span
name="convert">converting</span> the literal lexeme to a runtime value until
later. In clox, tokens only store the lexeme<span class="em">&mdash;</span>the character sequence exactly
as it appears in the user&rsquo;s source code. Later in the compiler, we&rsquo;ll convert
that lexeme to a runtime value right when we are ready to store it in the
chunk&rsquo;s constant table.</p>
<aside name="convert">
<p>Doing the lexeme-to-value conversion in the compiler does introduce some
redundancy. The work to scan a number literal is awfully similar to the work
required to convert a sequence of digit characters to a number value. But there
isn&rsquo;t <em>that</em> much redundancy, it isn&rsquo;t in anything performance critical, and it
keeps our scanner simpler.</p>
</aside>
<p>Next up, numbers. Instead of adding a switch case for each of the ten digits
that can start a number, we handle them here:</p>
<div class="codehilite"><pre class="insert-before">  char c = advance();
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">isDigit</span>(<span class="i">c</span>)) <span class="k">return</span> <span class="i">number</span>();
</pre><pre class="insert-after">

  switch (c) {
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>scanToken</em>()</div>

<p>That uses this obvious utility function:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>initScanner</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">isDigit</span>(<span class="t">char</span> <span class="i">c</span>) {
  <span class="k">return</span> <span class="i">c</span> &gt;= <span class="s">&#39;0&#39;</span> &amp;&amp; <span class="i">c</span> &lt;= <span class="s">&#39;9&#39;</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>initScanner</em>()</div>

<p>We finish scanning the number using this:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>skipWhitespace</em>()</div>
<pre><span class="k">static</span> <span class="t">Token</span> <span class="i">number</span>() {
  <span class="k">while</span> (<span class="i">isDigit</span>(<span class="i">peek</span>())) <span class="i">advance</span>();

  <span class="c">// Look for a fractional part.</span>
  <span class="k">if</span> (<span class="i">peek</span>() == <span class="s">&#39;.&#39;</span> &amp;&amp; <span class="i">isDigit</span>(<span class="i">peekNext</span>())) {
    <span class="c">// Consume the &quot;.&quot;.</span>
    <span class="i">advance</span>();

    <span class="k">while</span> (<span class="i">isDigit</span>(<span class="i">peek</span>())) <span class="i">advance</span>();
  }

  <span class="k">return</span> <span class="i">makeToken</span>(<span class="a">TOKEN_NUMBER</span>);
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>skipWhitespace</em>()</div>

<p>It&rsquo;s virtually identical to jlox&rsquo;s version except, again, we don&rsquo;t convert the
lexeme to a double yet.</p>
<h2><a href="#identifiers-and-keywords" id="identifiers-and-keywords"><small>16&#8202;.&#8202;4</small>Identifiers and Keywords</a></h2>
<p>The last batch of tokens are identifiers, both user-defined and reserved. This
section should be fun<span class="em">&mdash;</span>the way we recognize keywords in clox is quite
different from how we did it in jlox, and touches on some important data
structures.</p>
<p>First, though, we have to scan the lexeme. Names start with a letter or
underscore.</p>
<div class="codehilite"><pre class="insert-before">  char c = advance();
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">isAlpha</span>(<span class="i">c</span>)) <span class="k">return</span> <span class="i">identifier</span>();
</pre><pre class="insert-after">  if (isDigit(c)) return number();
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>scanToken</em>()</div>

<p>We recognize those using this:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>initScanner</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">isAlpha</span>(<span class="t">char</span> <span class="i">c</span>) {
  <span class="k">return</span> (<span class="i">c</span> &gt;= <span class="s">&#39;a&#39;</span> &amp;&amp; <span class="i">c</span> &lt;= <span class="s">&#39;z&#39;</span>) ||
         (<span class="i">c</span> &gt;= <span class="s">&#39;A&#39;</span> &amp;&amp; <span class="i">c</span> &lt;= <span class="s">&#39;Z&#39;</span>) ||
          <span class="i">c</span> == <span class="s">&#39;_&#39;</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>initScanner</em>()</div>

<p>Once we&rsquo;ve found an identifier, we scan the rest of it here:</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>skipWhitespace</em>()</div>
<pre><span class="k">static</span> <span class="t">Token</span> <span class="i">identifier</span>() {
  <span class="k">while</span> (<span class="i">isAlpha</span>(<span class="i">peek</span>()) || <span class="i">isDigit</span>(<span class="i">peek</span>())) <span class="i">advance</span>();
  <span class="k">return</span> <span class="i">makeToken</span>(<span class="i">identifierType</span>());
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>skipWhitespace</em>()</div>

<p>After the first letter, we allow digits too, and we keep consuming alphanumerics
until we run out of them. Then we produce a token with the proper type.
Determining that &ldquo;proper&rdquo; type is the unique part of this chapter.</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>skipWhitespace</em>()</div>
<pre><span class="k">static</span> <span class="t">TokenType</span> <span class="i">identifierType</span>() {
  <span class="k">return</span> <span class="a">TOKEN_IDENTIFIER</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>skipWhitespace</em>()</div>

<p>Okay, I guess that&rsquo;s not very exciting yet. That&rsquo;s what it looks like if we
have no reserved words at all. How should we go about recognizing keywords? In
jlox, we stuffed them all in a Java Map and looked them up by name. We don&rsquo;t
have any sort of hash table structure in clox, at least not yet.</p>
<p>A hash table would be overkill anyway. To look up a string in a hash <span
name="hash">table</span>, we need to walk the string to calculate its hash code,
find the corresponding bucket in the hash table, and then do a
character-by-character equality comparison on any string it happens to find
there.</p>
<aside name="hash">
<p>Don&rsquo;t worry if this is unfamiliar to you. When we get to <a href="hash-tables.html">building our own hash
table from scratch</a>, we&rsquo;ll learn all about it in exquisite detail.</p>
</aside>
<p>Let&rsquo;s say we&rsquo;ve scanned the identifier &ldquo;gorgonzola&rdquo;. How much work <em>should</em> we
need to do to tell if that&rsquo;s a reserved word? Well, no Lox keyword starts with
&ldquo;g&rdquo;, so looking at the first character is enough to definitively answer no.
That&rsquo;s a lot simpler than a hash table lookup.</p>
<p>What about &ldquo;cardigan&rdquo;? We do have a keyword in Lox that starts with &ldquo;c&rdquo;:
&ldquo;class&rdquo;. But the second character in &ldquo;cardigan&rdquo;, &ldquo;a&rdquo;, rules that out. What about
&ldquo;forest&rdquo;? Since &ldquo;for&rdquo; is a keyword, we have to go farther in the string before
we can establish that we don&rsquo;t have a reserved word. But, in most cases, only a
character or two is enough to tell we&rsquo;ve got a user-defined name on our hands.
We should be able to recognize that and fail fast.</p>
<p>Here&rsquo;s a visual representation of that branching character-inspection logic:</p>
<p><span name="down"></span></p><img src="image/scanning-on-demand/keywords.png" alt="A trie that contains all of Lox's keywords." />
<aside name="down">
<p>Read down each chain of nodes and you&rsquo;ll see Lox&rsquo;s keywords emerge.</p>
</aside>
<p>We start at the root node. If there is a child node whose letter matches the
first character in the lexeme, we move to that node. Then repeat for the next
letter in the lexeme and so on. If at any point the next letter in the lexeme
doesn&rsquo;t match a child node, then the identifier must not be a keyword and we
stop. If we reach a double-lined box, and we&rsquo;re at the last character of the
lexeme, then we found a keyword.</p>
<h3><a href="#tries-and-state-machines" id="tries-and-state-machines"><small>16&#8202;.&#8202;4&#8202;.&#8202;1</small>Tries and state machines</a></h3>
<p>This tree diagram is an example of a thing called a <span
name="trie"><a href="https://en.wikipedia.org/wiki/Trie"><strong>trie</strong></a></span>. A trie stores a set of strings. Most other
data structures for storing strings contain the raw character arrays and then
wrap them inside some larger construct that helps you search faster. A trie is
different. Nowhere in the trie will you find a whole string.</p>
<aside name="trie">
<p>&ldquo;Trie&rdquo; is one of the most confusing names in CS. Edward Fredkin yanked it out of
the middle of the word &ldquo;retrieval&rdquo;, which means it should be pronounced like
&ldquo;tree&rdquo;. But, uh, there is already a pretty important data structure pronounced
&ldquo;tree&rdquo; <em>which tries are a special case of</em>, so unless you never speak of these
things out loud, no one can tell which one you&rsquo;re talking about. Thus, people
these days often pronounce it like &ldquo;try&rdquo; to avoid the headache.</p>
</aside>
<p>Instead, each string the trie &ldquo;contains&rdquo; is represented as a <em>path</em> through the
tree of character nodes, as in our traversal above. Nodes that match the last
character in a string have a special marker<span class="em">&mdash;</span>the double lined boxes in the
illustration. That way, if your trie contains, say, &ldquo;banquet&rdquo; and &ldquo;ban&rdquo;, you are
able to tell that it does <em>not</em> contain &ldquo;banque&rdquo;<span class="em">&mdash;</span>the &ldquo;e&rdquo; node won&rsquo;t have that
marker, while the &ldquo;n&rdquo; and &ldquo;t&rdquo; nodes will.</p>
<p>Tries are a special case of an even more fundamental data structure: a
<a href="https://en.wikipedia.org/wiki/Deterministic_finite_automaton"><strong>deterministic finite automaton</strong></a> (<strong>DFA</strong>). You might also know these
by other names: <strong>finite state machine</strong>, or just <strong>state machine</strong>. State
machines are rad. They end up useful in everything from <a href="http://gameprogrammingpatterns.com/state.html">game
programming</a> to implementing networking protocols.</p>
<p>In a DFA, you have a set of <em>states</em> with <em>transitions</em> between them, forming a
graph. At any point in time, the machine is &ldquo;in&rdquo; exactly one state. It gets to
other states by following transitions. When you use a DFA for lexical analysis,
each transition is a character that gets matched from the string. Each state
represents a set of allowed characters.</p>
<p>Our keyword tree is exactly a DFA that recognizes Lox keywords. But DFAs are
more powerful than simple trees because they can be arbitrary <em>graphs</em>.
Transitions can form cycles between states. That lets you recognize arbitrarily
long strings. For example, here&rsquo;s a DFA that recognizes number literals:</p>
<p><span name="railroad"></span></p><img src="image/scanning-on-demand/numbers.png" alt="A syntax diagram that recognizes integer and floating point literals." />
<aside name="railroad">
<p>This style of diagram is called a <a href="https://en.wikipedia.org/wiki/Syntax_diagram"><strong>syntax diagram</strong></a> or the
more charming <strong>railroad diagram</strong>. The latter name is because it looks
something like a switching yard for trains.</p>
<p>Back before Backus-Naur Form was a thing, this was one of the predominant ways
of documenting a language&rsquo;s grammar. These days, we mostly use text, but there&rsquo;s
something delightful about the official specification for a <em>textual language</em>
relying on an <em>image</em>.</p>
</aside>
<p>I&rsquo;ve collapsed the nodes for the ten digits together to keep it more readable,
but the basic process works the same<span class="em">&mdash;</span>you work through the path, entering
nodes whenever you consume a corresponding character in the lexeme. If we were
so inclined, we could construct one big giant DFA that does <em>all</em> of the lexical
analysis for Lox, a single state machine that recognizes and spits out all of
the tokens we need.</p>
<p>However, crafting that mega-DFA by <span name="regex">hand</span> would be
challenging. That&rsquo;s why <a href="https://en.wikipedia.org/wiki/Lex_(software)">Lex</a> was created. You give it a simple textual
description of your lexical grammar<span class="em">&mdash;</span>a bunch of regular expressions<span class="em">&mdash;</span>and it
automatically generates a DFA for you and produces a pile of C code that
implements it.</p>
<aside name="regex">
<p>This is also how most regular expression engines in programming languages and
text editors work under the hood. They take your regex string and convert it to
a DFA, which they then use to match strings.</p>
<p>If you want to learn the algorithm to convert a regular expression into a DFA,
<a href="https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools">the dragon book</a> has you covered.</p>
</aside>
<p>We won&rsquo;t go down that road. We already have a perfectly serviceable hand-rolled
scanner. We just need a tiny trie for recognizing keywords. How should we map
that to code?</p>
<p>The absolute simplest <span name="v8">solution</span> is to use a switch
statement for each node with cases for each branch. We&rsquo;ll start with the root
node and handle the easy keywords.</p>
<aside name="v8">
<p>Simple doesn&rsquo;t mean dumb. The same approach is <a href="https://github.com/v8/v8/blob/e77eebfe3b747fb315bd3baad09bec0953e53e68/src/parsing/scanner.cc#L1643">essentially what V8 does</a>,
and that&rsquo;s currently one of the world&rsquo;s most sophisticated, fastest language
implementations.</p>
</aside>
<div class="codehilite"><pre class="insert-before">static TokenType identifierType() {
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>identifierType</em>()</div>
<pre class="insert">  <span class="k">switch</span> (<span class="i">scanner</span>.<span class="i">start</span>[<span class="n">0</span>]) {
    <span class="k">case</span> <span class="s">&#39;a&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">2</span>, <span class="s">&quot;nd&quot;</span>, <span class="a">TOKEN_AND</span>);
    <span class="k">case</span> <span class="s">&#39;c&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">4</span>, <span class="s">&quot;lass&quot;</span>, <span class="a">TOKEN_CLASS</span>);
    <span class="k">case</span> <span class="s">&#39;e&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">3</span>, <span class="s">&quot;lse&quot;</span>, <span class="a">TOKEN_ELSE</span>);
    <span class="k">case</span> <span class="s">&#39;i&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">1</span>, <span class="s">&quot;f&quot;</span>, <span class="a">TOKEN_IF</span>);
    <span class="k">case</span> <span class="s">&#39;n&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">2</span>, <span class="s">&quot;il&quot;</span>, <span class="a">TOKEN_NIL</span>);
    <span class="k">case</span> <span class="s">&#39;o&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">1</span>, <span class="s">&quot;r&quot;</span>, <span class="a">TOKEN_OR</span>);
    <span class="k">case</span> <span class="s">&#39;p&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">4</span>, <span class="s">&quot;rint&quot;</span>, <span class="a">TOKEN_PRINT</span>);
    <span class="k">case</span> <span class="s">&#39;r&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">5</span>, <span class="s">&quot;eturn&quot;</span>, <span class="a">TOKEN_RETURN</span>);
    <span class="k">case</span> <span class="s">&#39;s&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">4</span>, <span class="s">&quot;uper&quot;</span>, <span class="a">TOKEN_SUPER</span>);
    <span class="k">case</span> <span class="s">&#39;v&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">2</span>, <span class="s">&quot;ar&quot;</span>, <span class="a">TOKEN_VAR</span>);
    <span class="k">case</span> <span class="s">&#39;w&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">1</span>, <span class="n">4</span>, <span class="s">&quot;hile&quot;</span>, <span class="a">TOKEN_WHILE</span>);
  }

</pre><pre class="insert-after">  return TOKEN_IDENTIFIER;
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>identifierType</em>()</div>

<p>These are the initial letters that correspond to a single keyword. If we see an
&ldquo;s&rdquo;, the only keyword the identifier could possibly be is <code>super</code>. It might not
be, though, so we still need to check the rest of the letters too. In the tree
diagram, this is basically that straight path hanging off the &ldquo;s&rdquo;.</p>
<p>We won&rsquo;t roll a switch for each of those nodes. Instead, we have a utility
function that tests the rest of a potential keyword&rsquo;s lexeme.</p>
<div class="codehilite"><div class="source-file"><em>scanner.c</em><br>
add after <em>skipWhitespace</em>()</div>
<pre><span class="k">static</span> <span class="t">TokenType</span> <span class="i">checkKeyword</span>(<span class="t">int</span> <span class="i">start</span>, <span class="t">int</span> <span class="i">length</span>,
    <span class="k">const</span> <span class="t">char</span>* <span class="i">rest</span>, <span class="t">TokenType</span> <span class="i">type</span>) {
  <span class="k">if</span> (<span class="i">scanner</span>.<span class="i">current</span> - <span class="i">scanner</span>.<span class="i">start</span> == <span class="i">start</span> + <span class="i">length</span> &amp;&amp;
      <span class="i">memcmp</span>(<span class="i">scanner</span>.<span class="i">start</span> + <span class="i">start</span>, <span class="i">rest</span>, <span class="i">length</span>) == <span class="n">0</span>) {
    <span class="k">return</span> <span class="i">type</span>;
  }

  <span class="k">return</span> <span class="a">TOKEN_IDENTIFIER</span>;
}
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, add after <em>skipWhitespace</em>()</div>

<p>We use this for all of the unbranching paths in the tree. Once we&rsquo;ve found a
prefix that could only be one possible reserved word, we need to verify two
things. The lexeme must be exactly as long as the keyword. If the first letter
is &ldquo;s&rdquo;, the lexeme could still be &ldquo;sup&rdquo; or &ldquo;superb&rdquo;. And the remaining
characters must match exactly<span class="em">&mdash;</span>&ldquo;supar&rdquo; isn&rsquo;t good enough.</p>
<p>If we do have the right number of characters, and they&rsquo;re the ones we want, then
it&rsquo;s a keyword, and we return the associated token type. Otherwise, it must be a
normal identifier.</p>
<p>We have a couple of keywords where the tree branches again after the first
letter. If the lexeme starts with &ldquo;f&rdquo;, it could be <code>false</code>, <code>for</code>, or <code>fun</code>. So
we add another switch for the branches coming off the &ldquo;f&rdquo; node.</p>
<div class="codehilite"><pre class="insert-before">    case 'e': return checkKeyword(1, 3, &quot;lse&quot;, TOKEN_ELSE);
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>identifierType</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="s">&#39;f&#39;</span>:
      <span class="k">if</span> (<span class="i">scanner</span>.<span class="i">current</span> - <span class="i">scanner</span>.<span class="i">start</span> &gt; <span class="n">1</span>) {
        <span class="k">switch</span> (<span class="i">scanner</span>.<span class="i">start</span>[<span class="n">1</span>]) {
          <span class="k">case</span> <span class="s">&#39;a&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">2</span>, <span class="n">3</span>, <span class="s">&quot;lse&quot;</span>, <span class="a">TOKEN_FALSE</span>);
          <span class="k">case</span> <span class="s">&#39;o&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">2</span>, <span class="n">1</span>, <span class="s">&quot;r&quot;</span>, <span class="a">TOKEN_FOR</span>);
          <span class="k">case</span> <span class="s">&#39;u&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">2</span>, <span class="n">1</span>, <span class="s">&quot;n&quot;</span>, <span class="a">TOKEN_FUN</span>);
        }
      }
      <span class="k">break</span>;
</pre><pre class="insert-after">    case 'i': return checkKeyword(1, 1, &quot;f&quot;, TOKEN_IF);
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>identifierType</em>()</div>

<p>Before we switch, we need to check that there even <em>is</em> a second letter. &ldquo;f&rdquo; by
itself is a valid identifier too, after all. The other letter that branches is
&ldquo;t&rdquo;.</p>
<div class="codehilite"><pre class="insert-before">    case 's': return checkKeyword(1, 4, &quot;uper&quot;, TOKEN_SUPER);
</pre><div class="source-file"><em>scanner.c</em><br>
in <em>identifierType</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="s">&#39;t&#39;</span>:
      <span class="k">if</span> (<span class="i">scanner</span>.<span class="i">current</span> - <span class="i">scanner</span>.<span class="i">start</span> &gt; <span class="n">1</span>) {
        <span class="k">switch</span> (<span class="i">scanner</span>.<span class="i">start</span>[<span class="n">1</span>]) {
          <span class="k">case</span> <span class="s">&#39;h&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">2</span>, <span class="n">2</span>, <span class="s">&quot;is&quot;</span>, <span class="a">TOKEN_THIS</span>);
          <span class="k">case</span> <span class="s">&#39;r&#39;</span>: <span class="k">return</span> <span class="i">checkKeyword</span>(<span class="n">2</span>, <span class="n">2</span>, <span class="s">&quot;ue&quot;</span>, <span class="a">TOKEN_TRUE</span>);
        }
      }
      <span class="k">break</span>;
</pre><pre class="insert-after">    case 'v': return checkKeyword(1, 2, &quot;ar&quot;, TOKEN_VAR);
</pre></div>
<div class="source-file-narrow"><em>scanner.c</em>, in <em>identifierType</em>()</div>

<p>That&rsquo;s it. A couple of nested <code>switch</code> statements. Not only is this code <span
name="short">short</span>, but it&rsquo;s very, very fast. It does the minimum amount
of work required to detect a keyword, and bails out as soon as it can tell the
identifier will not be a reserved one.</p>
<p>And with that, our scanner is complete.</p>
<aside name="short">
<p>We sometimes fall into the trap of thinking that performance comes from
complicated data structures, layers of caching, and other fancy optimizations.
But, many times, all that&rsquo;s required is to do less work, and I often find that
writing the simplest code I can is sufficient to accomplish that.</p>
</aside>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Many newer languages support <a href="https://en.wikipedia.org/wiki/String_interpolation"><strong>string interpolation</strong></a>. Inside a
string literal, you have some sort of special delimiters<span class="em">&mdash;</span>most commonly
<code>${</code> at the beginning and <code>}</code> at the end. Between those delimiters, any
expression can appear. When the string literal is executed, the inner
expression is evaluated, converted to a string, and then merged with the
surrounding string literal.</p>
<p>For example, if Lox supported string interpolation, then this<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">drink</span> = <span class="s">&quot;Tea&quot;</span>;
<span class="k">var</span> <span class="i">steep</span> = <span class="n">4</span>;
<span class="k">var</span> <span class="i">cool</span> = <span class="n">2</span>;
<span class="k">print</span> <span class="s">&quot;${drink} will be ready in ${steep + cool} minutes.&quot;</span>;
</pre></div>
<p><span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>would print:</p>
<div class="codehilite"><pre>Tea will be ready in 6 minutes.
</pre></div>
<p>What token types would you define to implement a scanner for string
interpolation? What sequence of tokens would you emit for the above string
literal?</p>
<p>What tokens would you emit for:</p>
<div class="codehilite"><pre>&quot;Nested ${&quot;interpolation?! Are you ${&quot;mad?!&quot;}&quot;}&quot;
</pre></div>
<p>Consider looking at other language implementations that support
interpolation to see how they handle it.</p>
</li>
<li>
<p>Several languages use angle brackets for generics and also have a <code>&gt;&gt;</code> right
shift operator. This led to a classic problem in early versions of C++:</p>
<div class="codehilite"><pre><span class="t">vector</span>&lt;<span class="t">vector</span>&lt;<span class="t">string</span>&gt;&gt; <span class="i">nestedVectors</span>;
</pre></div>
<p>This would produce a compile error because the <code>&gt;&gt;</code> was lexed to a single
right shift token, not two <code>&gt;</code> tokens. Users were forced to avoid this by
putting a space between the closing angle brackets.</p>
<p>Later versions of C++ are smarter and can handle the above code. Java and C#
never had the problem. How do those languages specify and implement this?</p>
</li>
<li>
<p>Many languages, especially later in their evolution, define &ldquo;contextual
keywords&rdquo;. These are identifiers that act like reserved words in some
contexts but can be normal user-defined identifiers in others.</p>
<p>For example, <code>await</code> is a keyword inside an <code>async</code> method in C#, but
in other methods, you can use <code>await</code> as your own identifier.</p>
<p>Name a few contextual keywords from other languages, and the context where
they are meaningful. What are the pros and cons of having contextual
keywords? How would you implement them in your language&rsquo;s front end if you
needed to?</p>
</li>
</ol>
</div>

<footer>
<a href="compiling-expressions.html" class="next">
  Next Chapter: &ldquo;Compiling Expressions&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/scanning.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Scanning &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Scanning<small>4</small></a></h3>

<ul>
    <li><a href="#the-interpreter-framework"><small>4.1</small> The Interpreter Framework</a></li>
    <li><a href="#lexemes-and-tokens"><small>4.2</small> Lexemes and Tokens</a></li>
    <li><a href="#regular-languages-and-expressions"><small>4.3</small> Regular Languages and Expressions</a></li>
    <li><a href="#the-scanner-class"><small>4.4</small> The Scanner Class</a></li>
    <li><a href="#recognizing-lexemes"><small>4.5</small> Recognizing Lexemes</a></li>
    <li><a href="#longer-lexemes"><small>4.6</small> Longer Lexemes</a></li>
    <li><a href="#reserved-words-and-identifiers"><small>4.7</small> Reserved Words and Identifiers</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Implicit Semicolons</a></li>
</ul>


<div class="prev-next">
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="representing-code.html" title="Representing Code" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter" class="prev">←</a>
<a href="representing-code.html" title="Representing Code" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Scanning<small>4</small></a></h3>

<ul>
    <li><a href="#the-interpreter-framework"><small>4.1</small> The Interpreter Framework</a></li>
    <li><a href="#lexemes-and-tokens"><small>4.2</small> Lexemes and Tokens</a></li>
    <li><a href="#regular-languages-and-expressions"><small>4.3</small> Regular Languages and Expressions</a></li>
    <li><a href="#the-scanner-class"><small>4.4</small> The Scanner Class</a></li>
    <li><a href="#recognizing-lexemes"><small>4.5</small> Recognizing Lexemes</a></li>
    <li><a href="#longer-lexemes"><small>4.6</small> Longer Lexemes</a></li>
    <li><a href="#reserved-words-and-identifiers"><small>4.7</small> Reserved Words and Identifiers</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Implicit Semicolons</a></li>
</ul>


<div class="prev-next">
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="representing-code.html" title="Representing Code" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">4</div>
  <h1>Scanning</h1>

<blockquote>
<p>Take big bites. Anything worth doing is worth overdoing.</p>
<p><cite>Robert A. Heinlein, <em>Time Enough for Love</em></cite></p>
</blockquote>
<p>The first step in any compiler or interpreter is <span
name="lexing">scanning</span>. The scanner takes in raw source code as a series
of characters and groups it into a series of chunks we call <strong>tokens</strong>. These
are the meaningful &ldquo;words&rdquo; and &ldquo;punctuation&rdquo; that make up the language&rsquo;s
grammar.</p>
<aside name="lexing">
<p>This task has been variously called &ldquo;scanning&rdquo; and &ldquo;lexing&rdquo; (short for &ldquo;lexical
analysis&rdquo;) over the years. Way back when computers were as big as Winnebagos but
had less memory than your watch, some people used &ldquo;scanner&rdquo; only to refer to the
piece of code that dealt with reading raw source code characters from disk and
buffering them in memory. Then &ldquo;lexing&rdquo; was the subsequent phase that did useful
stuff with the characters.</p>
<p>These days, reading a source file into memory is trivial, so it&rsquo;s rarely a
distinct phase in the compiler. Because of that, the two terms are basically
interchangeable.</p>
</aside>
<p>Scanning is a good starting point for us too because the code isn&rsquo;t very hard<span class="em">&mdash;</span>pretty much a <code>switch</code> statement with delusions of grandeur. It will help us
warm up before we tackle some of the more interesting material later. By the end
of this chapter, we&rsquo;ll have a full-featured, fast scanner that can take any
string of Lox source code and produce the tokens that we&rsquo;ll feed into the parser
in the next chapter.</p>
<h2><a href="#the-interpreter-framework" id="the-interpreter-framework"><small>4&#8202;.&#8202;1</small>The Interpreter Framework</a></h2>
<p>Since this is our first real chapter, before we get to actually scanning some
code we need to sketch out the basic shape of our interpreter, jlox. Everything
starts with a class in Java.</p>
<div class="codehilite"><div class="source-file"><em>lox/Lox.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.io.BufferedReader</span>;
<span class="k">import</span> <span class="i">java.io.IOException</span>;
<span class="k">import</span> <span class="i">java.io.InputStreamReader</span>;
<span class="k">import</span> <span class="i">java.nio.charset.Charset</span>;
<span class="k">import</span> <span class="i">java.nio.file.Files</span>;
<span class="k">import</span> <span class="i">java.nio.file.Paths</span>;
<span class="k">import</span> <span class="i">java.util.List</span>;

<span class="k">public</span> <span class="k">class</span> <span class="t">Lox</span> {
  <span class="k">public</span> <span class="k">static</span> <span class="t">void</span> <span class="i">main</span>(<span class="t">String</span>[] <span class="i">args</span>) <span class="k">throws</span> <span class="t">IOException</span> {
    <span class="k">if</span> (<span class="i">args</span>.<span class="i">length</span> &gt; <span class="n">1</span>) {
      <span class="t">System</span>.<span class="i">out</span>.<span class="i">println</span>(<span class="s">&quot;Usage: jlox [script]&quot;</span>);
      <span class="t">System</span>.<span class="i">exit</span>(<span class="n">64</span>);<span name="64"> </span>
    } <span class="k">else</span> <span class="k">if</span> (<span class="i">args</span>.<span class="i">length</span> == <span class="n">1</span>) {
      <span class="i">runFile</span>(<span class="i">args</span>[<span class="n">0</span>]);
    } <span class="k">else</span> {
      <span class="i">runPrompt</span>();
    }
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, create new file</div>

<aside name="64">
<p>For exit codes, I&rsquo;m using the conventions defined in the UNIX
<a href="https://www.freebsd.org/cgi/man.cgi?query=sysexits&amp;apropos=0&amp;sektion=0&amp;manpath=FreeBSD+4.3-RELEASE&amp;format=html">&ldquo;sysexits.h&rdquo;</a> header. It&rsquo;s the closest thing to a standard I could
find.</p>
</aside>
<p>Stick that in a text file, and go get your IDE or Makefile or whatever set up.
I&rsquo;ll be right here when you&rsquo;re ready. Good? OK!</p>
<p>Lox is a scripting language, which means it executes directly from source. Our
interpreter supports two ways of running code. If you start jlox from the
command line and give it a path to a file, it reads the file and executes it.</p>
<div class="codehilite"><div class="source-file"><em>lox/Lox.java</em><br>
add after <em>main</em>()</div>
<pre>  <span class="k">private</span> <span class="k">static</span> <span class="t">void</span> <span class="i">runFile</span>(<span class="t">String</span> <span class="i">path</span>) <span class="k">throws</span> <span class="t">IOException</span> {
    <span class="t">byte</span>[] <span class="i">bytes</span> = <span class="t">Files</span>.<span class="i">readAllBytes</span>(<span class="t">Paths</span>.<span class="i">get</span>(<span class="i">path</span>));
    <span class="i">run</span>(<span class="k">new</span> <span class="t">String</span>(<span class="i">bytes</span>, <span class="t">Charset</span>.<span class="i">defaultCharset</span>()));
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, add after <em>main</em>()</div>

<p>If you want a more intimate conversation with your interpreter, you can also run
it interactively. Fire up jlox without any arguments, and it drops you into a
prompt where you can enter and execute code one line at a time.</p>
<aside name="repl">
<p>An interactive prompt is also called a &ldquo;REPL&rdquo; (pronounced like &ldquo;rebel&rdquo; but with
a &ldquo;p&rdquo;). The name comes from Lisp where implementing one is as simple as
wrapping a loop around a few built-in functions:</p>
<div class="codehilite"><pre>(<span class="i">print</span> (<span class="i">eval</span> (<span class="i">read</span>)))
</pre></div>
<p>Working outwards from the most nested call, you <strong>R</strong>ead a line of input,
<strong>E</strong>valuate it, <strong>P</strong>rint the result, then <strong>L</strong>oop and do it all over again.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/Lox.java</em><br>
add after <em>runFile</em>()</div>
<pre>  <span class="k">private</span> <span class="k">static</span> <span class="t">void</span> <span class="i">runPrompt</span>() <span class="k">throws</span> <span class="t">IOException</span> {
    <span class="t">InputStreamReader</span> <span class="i">input</span> = <span class="k">new</span> <span class="t">InputStreamReader</span>(<span class="t">System</span>.<span class="i">in</span>);
    <span class="t">BufferedReader</span> <span class="i">reader</span> = <span class="k">new</span> <span class="t">BufferedReader</span>(<span class="i">input</span>);

    <span class="k">for</span> (;;) {<span name="repl"> </span>
      <span class="t">System</span>.<span class="i">out</span>.<span class="i">print</span>(<span class="s">&quot;&gt; &quot;</span>);
      <span class="t">String</span> <span class="i">line</span> = <span class="i">reader</span>.<span class="i">readLine</span>();
      <span class="k">if</span> (<span class="i">line</span> == <span class="k">null</span>) <span class="k">break</span>;
      <span class="i">run</span>(<span class="i">line</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, add after <em>runFile</em>()</div>

<p>The <code>readLine()</code> function, as the name so helpfully implies, reads a line of
input from the user on the command line and returns the result. To kill an
interactive command-line app, you usually type Control-D. Doing so signals an
&ldquo;end-of-file&rdquo; condition to the program. When that happens <code>readLine()</code> returns
<code>null</code>, so we check for that to exit the loop.</p>
<p>Both the prompt and the file runner are thin wrappers around this core function:</p>
<div class="codehilite"><div class="source-file"><em>lox/Lox.java</em><br>
add after <em>runPrompt</em>()</div>
<pre>  <span class="k">private</span> <span class="k">static</span> <span class="t">void</span> <span class="i">run</span>(<span class="t">String</span> <span class="i">source</span>) {
    <span class="t">Scanner</span> <span class="i">scanner</span> = <span class="k">new</span> <span class="t">Scanner</span>(<span class="i">source</span>);
    <span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">tokens</span> = <span class="i">scanner</span>.<span class="i">scanTokens</span>();

    <span class="c">// For now, just print the tokens.</span>
    <span class="k">for</span> (<span class="t">Token</span> <span class="i">token</span> : <span class="i">tokens</span>) {
      <span class="t">System</span>.<span class="i">out</span>.<span class="i">println</span>(<span class="i">token</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, add after <em>runPrompt</em>()</div>

<p>It&rsquo;s not super useful yet since we haven&rsquo;t written the interpreter, but baby
steps, you know? Right now, it prints out the tokens our forthcoming scanner
will emit so that we can see if we&rsquo;re making progress.</p>
<h3><a href="#error-handling" id="error-handling"><small>4&#8202;.&#8202;1&#8202;.&#8202;1</small>Error handling</a></h3>
<p>While we&rsquo;re setting things up, another key piece of infrastructure is <em>error
handling</em>. Textbooks sometimes gloss over this because it&rsquo;s more a practical
matter than a formal computer science-y problem. But if you care about making a
language that&rsquo;s actually <em>usable</em>, then handling errors gracefully is vital.</p>
<p>The tools our language provides for dealing with errors make up a large portion
of its user interface. When the user&rsquo;s code is working, they aren&rsquo;t thinking
about our language at all<span class="em">&mdash;</span>their headspace is all about <em>their program</em>. It&rsquo;s
usually only when things go wrong that they notice our implementation.</p>
<p><span name="errors">When</span> that happens, it&rsquo;s up to us to give the user all
the information they need to understand what went wrong and guide them gently
back to where they are trying to go. Doing that well means thinking about error
handling all through the implementation of our interpreter, starting now.</p>
<aside name="errors">
<p>Having said all that, for <em>this</em> interpreter, what we&rsquo;ll build is pretty bare
bones. I&rsquo;d love to talk about interactive debuggers, static analyzers, and other
fun stuff, but there&rsquo;s only so much ink in the pen.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/Lox.java</em><br>
add after <em>run</em>()</div>
<pre>  <span class="k">static</span> <span class="t">void</span> <span class="i">error</span>(<span class="t">int</span> <span class="i">line</span>, <span class="t">String</span> <span class="i">message</span>) {
    <span class="i">report</span>(<span class="i">line</span>, <span class="s">&quot;&quot;</span>, <span class="i">message</span>);
  }

  <span class="k">private</span> <span class="k">static</span> <span class="t">void</span> <span class="i">report</span>(<span class="t">int</span> <span class="i">line</span>, <span class="t">String</span> <span class="i">where</span>,
                             <span class="t">String</span> <span class="i">message</span>) {
    <span class="t">System</span>.<span class="i">err</span>.<span class="i">println</span>(
        <span class="s">&quot;[line &quot;</span> + <span class="i">line</span> + <span class="s">&quot;] Error&quot;</span> + <span class="i">where</span> + <span class="s">&quot;: &quot;</span> + <span class="i">message</span>);
    <span class="i">hadError</span> = <span class="k">true</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, add after <em>run</em>()</div>

<p>This <code>error()</code> function and its <code>report()</code> helper tells the user some syntax
error occurred on a given line. That is really the bare minimum to be able to
claim you even <em>have</em> error reporting. Imagine if you accidentally left a
dangling comma in some function call and the interpreter printed out:</p>
<div class="codehilite"><pre>Error: Unexpected &quot;,&quot; somewhere in your code. Good luck finding it!
</pre></div>
<p>That&rsquo;s not very helpful. We need to at least point them to the right line. Even
better would be the beginning and end column so they know <em>where</em> in the line.
Even better than <em>that</em> is to <em>show</em> the user the offending line, like:</p>
<div class="codehilite"><pre>Error: Unexpected &quot;,&quot; in argument list.

    15 | function(first, second,);
                               ^-- Here.
</pre></div>
<p>I&rsquo;d love to implement something like that in this book but the honest truth is
that it&rsquo;s a lot of grungy string manipulation code. Very useful for users, but
not super fun to read in a book and not very technically interesting. So we&rsquo;ll
stick with just a line number. In your own interpreters, please do as I say and
not as I do.</p>
<p>The primary reason we&rsquo;re sticking this error reporting function in the main Lox
class is because of that <code>hadError</code> field. It&rsquo;s defined here:</p>
<div class="codehilite"><pre class="insert-before">public class Lox {
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in class <em>Lox</em></div>
<pre class="insert">  <span class="k">static</span> <span class="t">boolean</span> <span class="i">hadError</span> = <span class="k">false</span>;
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in class <em>Lox</em></div>

<p>We&rsquo;ll use this to ensure we don&rsquo;t try to execute code that has a known error.
Also, it lets us exit with a non-zero exit code like a good command line citizen
should.</p>
<div class="codehilite"><pre class="insert-before">    run(new String(bytes, Charset.defaultCharset()));
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>runFile</em>()</div>
<pre class="insert">

    <span class="c">// Indicate an error in the exit code.</span>
    <span class="k">if</span> (<span class="i">hadError</span>) <span class="t">System</span>.<span class="i">exit</span>(<span class="n">65</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>runFile</em>()</div>

<p>We need to reset this flag in the interactive loop. If the user makes a mistake,
it shouldn&rsquo;t kill their entire session.</p>
<div class="codehilite"><pre class="insert-before">      run(line);
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>runPrompt</em>()</div>
<pre class="insert">      <span class="i">hadError</span> = <span class="k">false</span>;
</pre><pre class="insert-after">    }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>runPrompt</em>()</div>

<p>The other reason I pulled the error reporting out here instead of stuffing it
into the scanner and other phases where the error might occur is to remind you
that it&rsquo;s good engineering practice to separate the code that <em>generates</em> the
errors from the code that <em>reports</em> them.</p>
<p>Various phases of the front end will detect errors, but it&rsquo;s not really their
job to know how to present that to a user. In a full-featured language
implementation, you will likely have multiple ways errors get displayed: on
stderr, in an IDE&rsquo;s error window, logged to a file, etc. You don&rsquo;t want that
code smeared all over your scanner and parser.</p>
<p>Ideally, we would have an actual abstraction, some kind of <span
name="reporter">&ldquo;ErrorReporter&rdquo;</span> interface that gets passed to the scanner
and parser so that we can swap out different reporting strategies. For our
simple interpreter here, I didn&rsquo;t do that, but I did at least move the code for
error reporting into a different class.</p>
<aside name="reporter">
<p>I had exactly that when I first implemented jlox. I ended up tearing it out
because it felt over-engineered for the minimal interpreter in this book.</p>
</aside>
<p>With some rudimentary error handling in place, our application shell is ready.
Once we have a Scanner class with a <code>scanTokens()</code> method, we can start running
it. Before we get to that, let&rsquo;s get more precise about what tokens are.</p>
<h2><a href="#lexemes-and-tokens" id="lexemes-and-tokens"><small>4&#8202;.&#8202;2</small>Lexemes and Tokens</a></h2>
<p>Here&rsquo;s a line of Lox code:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">language</span> = <span class="s">&quot;lox&quot;</span>;
</pre></div>
<p>Here, <code>var</code> is the keyword for declaring a variable. That three-character
sequence &ldquo;v-a-r&rdquo; means something. But if we yank three letters out of the
middle of <code>language</code>, like &ldquo;g-u-a&rdquo;, those don&rsquo;t mean anything on their own.</p>
<p>That&rsquo;s what lexical analysis is about. Our job is to scan through the list of
characters and group them together into the smallest sequences that still
represent something. Each of these blobs of characters is called a <strong>lexeme</strong>.
In that example line of code, the lexemes are:</p><img src="image/scanning/lexemes.png" alt="'var', 'language', '=', 'lox', ';'" />
<p>The lexemes are only the raw substrings of the source code. However, in the
process of grouping character sequences into lexemes, we also stumble upon some
other useful information. When we take the lexeme and bundle it together with
that other data, the result is a token. It includes useful stuff like:</p>
<h3><a href="#token-type" id="token-type"><small>4&#8202;.&#8202;2&#8202;.&#8202;1</small>Token type</a></h3>
<p>Keywords are part of the shape of the language&rsquo;s grammar, so the parser often
has code like, &ldquo;If the next token is <code>while</code> then do<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>&rdquo; That means the parser
wants to know not just that it has a lexeme for some identifier, but that it has
a <em>reserved</em> word, and <em>which</em> keyword it is.</p>
<p>The <span name="ugly">parser</span> could categorize tokens from the raw lexeme
by comparing the strings, but that&rsquo;s slow and kind of ugly. Instead, at the
point that we recognize a lexeme, we also remember which <em>kind</em> of lexeme it
represents. We have a different type for each keyword, operator, bit of
punctuation, and literal type.</p>
<aside name="ugly">
<p>After all, string comparison ends up looking at individual characters, and isn&rsquo;t
that the scanner&rsquo;s job?</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/TokenType.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">enum</span> <span class="t">TokenType</span> {
  <span class="c">// Single-character tokens.</span>
  <span class="i">LEFT_PAREN</span>, <span class="i">RIGHT_PAREN</span>, <span class="i">LEFT_BRACE</span>, <span class="i">RIGHT_BRACE</span>,
  <span class="i">COMMA</span>, <span class="i">DOT</span>, <span class="i">MINUS</span>, <span class="i">PLUS</span>, <span class="i">SEMICOLON</span>, <span class="i">SLASH</span>, <span class="i">STAR</span>,

  <span class="c">// One or two character tokens.</span>
  <span class="i">BANG</span>, <span class="i">BANG_EQUAL</span>,
  <span class="i">EQUAL</span>, <span class="i">EQUAL_EQUAL</span>,
  <span class="i">GREATER</span>, <span class="i">GREATER_EQUAL</span>,
  <span class="i">LESS</span>, <span class="i">LESS_EQUAL</span>,

  <span class="c">// Literals.</span>
  <span class="i">IDENTIFIER</span>, <span class="i">STRING</span>, <span class="i">NUMBER</span>,

  <span class="c">// Keywords.</span>
  <span class="i">AND</span>, <span class="i">CLASS</span>, <span class="i">ELSE</span>, <span class="i">FALSE</span>, <span class="i">FUN</span>, <span class="i">FOR</span>, <span class="i">IF</span>, <span class="i">NIL</span>, <span class="i">OR</span>,
  <span class="i">PRINT</span>, <span class="i">RETURN</span>, <span class="i">SUPER</span>, <span class="i">THIS</span>, <span class="i">TRUE</span>, <span class="i">VAR</span>, <span class="i">WHILE</span>,

  <span class="i">EOF</span>
}
</pre></div>
<div class="source-file-narrow"><em>lox/TokenType.java</em>, create new file</div>

<h3><a href="#literal-value" id="literal-value"><small>4&#8202;.&#8202;2&#8202;.&#8202;2</small>Literal value</a></h3>
<p>There are lexemes for literal values<span class="em">&mdash;</span>numbers and strings and the like. Since
the scanner has to walk each character in the literal to correctly identify it,
it can also convert that textual representation of a value to the living runtime
object that will be used by the interpreter later.</p>
<h3><a href="#location-information" id="location-information"><small>4&#8202;.&#8202;2&#8202;.&#8202;3</small>Location information</a></h3>
<p>Back when I was preaching the gospel about error handling, we saw that we need
to tell users <em>where</em> errors occurred. Tracking that starts here. In our simple
interpreter, we note only which line the token appears on, but more
sophisticated implementations include the column and length too.</p>
<aside name="location">
<p>Some token implementations store the location as two numbers: the offset from
the beginning of the source file to the beginning of the lexeme, and the length
of the lexeme. The scanner needs to know these anyway, so there&rsquo;s no overhead to
calculate them.</p>
<p>An offset can be converted to line and column positions later by looking back at
the source file and counting the preceding newlines. That sounds slow, and it
is. However, you need to do it <em>only when you need to actually display a line
and column to the user</em>. Most tokens never appear in an error message. For
those, the less time you spend calculating position information ahead of time,
the better.</p>
</aside>
<p>We take all of this data and wrap it in a class.</p>
<div class="codehilite"><div class="source-file"><em>lox/Token.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">class</span> <span class="t">Token</span> {
  <span class="k">final</span> <span class="t">TokenType</span> <span class="i">type</span>;
  <span class="k">final</span> <span class="t">String</span> <span class="i">lexeme</span>;
  <span class="k">final</span> <span class="t">Object</span> <span class="i">literal</span>;
  <span class="k">final</span> <span class="t">int</span> <span class="i">line</span>;<span name="location"> </span>

  <span class="t">Token</span>(<span class="t">TokenType</span> <span class="i">type</span>, <span class="t">String</span> <span class="i">lexeme</span>, <span class="t">Object</span> <span class="i">literal</span>, <span class="t">int</span> <span class="i">line</span>) {
    <span class="k">this</span>.<span class="i">type</span> = <span class="i">type</span>;
    <span class="k">this</span>.<span class="i">lexeme</span> = <span class="i">lexeme</span>;
    <span class="k">this</span>.<span class="i">literal</span> = <span class="i">literal</span>;
    <span class="k">this</span>.<span class="i">line</span> = <span class="i">line</span>;
  }

  <span class="k">public</span> <span class="t">String</span> <span class="i">toString</span>() {
    <span class="k">return</span> <span class="i">type</span> + <span class="s">&quot; &quot;</span> + <span class="i">lexeme</span> + <span class="s">&quot; &quot;</span> + <span class="i">literal</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/Token.java</em>, create new file</div>

<p>Now we have an object with enough structure to be useful for all of the later
phases of the interpreter.</p>
<h2><a href="#regular-languages-and-expressions" id="regular-languages-and-expressions"><small>4&#8202;.&#8202;3</small>Regular Languages and Expressions</a></h2>
<p>Now that we know what we&rsquo;re trying to produce, let&rsquo;s, well, produce it. The core
of the scanner is a loop. Starting at the first character of the source code,
the scanner figures out what lexeme the character belongs to, and consumes it
and any following characters that are part of that lexeme. When it reaches the
end of that lexeme, it emits a token.</p>
<p>Then it loops back and does it again, starting from the very next character in
the source code. It keeps doing that, eating characters and occasionally, uh,
excreting tokens, until it reaches the end of the input.</p>
<p><span name="alligator"></span></p><img src="image/scanning/lexigator.png" alt="An alligator eating characters and, well, you don't want to know." />
<aside name="alligator">
<p>Lexical analygator.</p>
</aside>
<p>The part of the loop where we look at a handful of characters to figure out
which kind of lexeme it &ldquo;matches&rdquo; may sound familiar. If you know regular
expressions, you might consider defining a regex for each kind of lexeme and
using those to match characters. For example, Lox has the same rules as C for
identifiers (variable names and the like). This regex matches one:</p>
<div class="codehilite"><pre>[a-zA-Z_][a-zA-Z_0-9]*
</pre></div>
<p>If you did think of regular expressions, your intuition is a deep one. The rules
that determine how a particular language groups characters into lexemes are
called its <span name="theory"><strong>lexical grammar</strong></span>. In Lox, as in most
programming languages, the rules of that grammar are simple enough for the
language to be classified a <strong><a href="https://en.wikipedia.org/wiki/Regular_language">regular language</a></strong>. That&rsquo;s the same &ldquo;regular&rdquo;
as in regular expressions.</p>
<aside name="theory">
<p>It pains me to gloss over the theory so much, especially when it&rsquo;s as
interesting as I think the <a href="https://en.wikipedia.org/wiki/Chomsky_hierarchy">Chomsky hierarchy</a> and <a href="https://en.wikipedia.org/wiki/Finite-state_machine">finite-state machines</a>
are. But the honest truth is other books cover this better than I could.
<a href="https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools"><em>Compilers: Principles, Techniques, and Tools</em></a> (universally known as
&ldquo;the dragon book&rdquo;) is the canonical reference.</p>
</aside>
<p>You very precisely <em>can</em> recognize all of the different lexemes for Lox using
regexes if you want to, and there&rsquo;s a pile of interesting theory underlying why
that is and what it means. Tools like <a href="http://dinosaur.compilertools.net/lex/">Lex</a> or
<a href="https://github.com/westes/flex">Flex</a> are designed expressly to let you do this<span class="em">&mdash;</span>throw a handful of regexes
at them, and they give you a complete scanner <span name="lex">back</span>.</p>
<aside name="lex">
<p>Lex was created by Mike Lesk and Eric Schmidt. Yes, the same Eric Schmidt who
was executive chairman of Google. I&rsquo;m not saying programming languages are a
surefire path to wealth and fame, but we <em>can</em> count at least one
mega billionaire among us.</p>
</aside>
<p>Since our goal is to understand how a scanner does what it does, we won&rsquo;t be
delegating that task. We&rsquo;re about handcrafted goods.</p>
<h2><a href="#the-scanner-class" id="the-scanner-class"><small>4&#8202;.&#8202;4</small>The Scanner Class</a></h2>
<p>Without further ado, let&rsquo;s make ourselves a scanner.</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.ArrayList</span>;
<span class="k">import</span> <span class="i">java.util.HashMap</span>;
<span class="k">import</span> <span class="i">java.util.List</span>;
<span class="k">import</span> <span class="i">java.util.Map</span>;

<span class="k">import static</span> <span class="i">com.craftinginterpreters.lox.TokenType.*</span>;<span name="static-import"> </span>

<span class="k">class</span> <span class="t">Scanner</span> {
  <span class="k">private</span> <span class="k">final</span> <span class="t">String</span> <span class="i">source</span>;
  <span class="k">private</span> <span class="k">final</span> <span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">tokens</span> = <span class="k">new</span> <span class="t">ArrayList</span>&lt;&gt;();

  <span class="t">Scanner</span>(<span class="t">String</span> <span class="i">source</span>) {
    <span class="k">this</span>.<span class="i">source</span> = <span class="i">source</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, create new file</div>

<aside name="static-import">
<p>I know static imports are considered bad style by some, but they save me from
having to sprinkle <code>TokenType.</code> all over the scanner and parser. Forgive me, but
every character counts in a book.</p>
</aside>
<p>We store the raw source code as a simple string, and we have a list ready to
fill with tokens we&rsquo;re going to generate. The aforementioned loop that does that
looks like this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>Scanner</em>()</div>
<pre>  <span class="t">List</span>&lt;<span class="t">Token</span>&gt; <span class="i">scanTokens</span>() {
    <span class="k">while</span> (!<span class="i">isAtEnd</span>()) {
      <span class="c">// We are at the beginning of the next lexeme.</span>
      <span class="i">start</span> = <span class="i">current</span>;
      <span class="i">scanToken</span>();
    }

    <span class="i">tokens</span>.<span class="i">add</span>(<span class="k">new</span> <span class="t">Token</span>(<span class="i">EOF</span>, <span class="s">&quot;&quot;</span>, <span class="k">null</span>, <span class="i">line</span>));
    <span class="k">return</span> <span class="i">tokens</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>Scanner</em>()</div>

<p>The scanner works its way through the source code, adding tokens until it runs
out of characters. Then it appends one final &ldquo;end of file&rdquo; token. That isn&rsquo;t
strictly needed, but it makes our parser a little cleaner.</p>
<p>This loop depends on a couple of fields to keep track of where the scanner is in
the source code.</p>
<div class="codehilite"><pre class="insert-before">  private final List&lt;Token&gt; tokens = new ArrayList&lt;&gt;();
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in class <em>Scanner</em></div>
<pre class="insert">  <span class="k">private</span> <span class="t">int</span> <span class="i">start</span> = <span class="n">0</span>;
  <span class="k">private</span> <span class="t">int</span> <span class="i">current</span> = <span class="n">0</span>;
  <span class="k">private</span> <span class="t">int</span> <span class="i">line</span> = <span class="n">1</span>;
</pre><pre class="insert-after">

  Scanner(String source) {
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in class <em>Scanner</em></div>

<p>The <code>start</code> and <code>current</code> fields are offsets that index into the string. The
<code>start</code> field points to the first character in the lexeme being scanned, and
<code>current</code> points at the character currently being considered. The <code>line</code> field
tracks what source line <code>current</code> is on so we can produce tokens that know their
location.</p>
<p>Then we have one little helper function that tells us if we&rsquo;ve consumed all the
characters.</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>scanTokens</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">isAtEnd</span>() {
    <span class="k">return</span> <span class="i">current</span> &gt;= <span class="i">source</span>.<span class="i">length</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>scanTokens</em>()</div>

<h2><a href="#recognizing-lexemes" id="recognizing-lexemes"><small>4&#8202;.&#8202;5</small>Recognizing Lexemes</a></h2>
<p>In each turn of the loop, we scan a single token. This is the real heart of the
scanner. We&rsquo;ll start simple. Imagine if every lexeme were only a single character
long. All you would need to do is consume the next character and pick a token type for
it. Several lexemes <em>are</em> only a single character in Lox, so let&rsquo;s start with
those.</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>scanTokens</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">scanToken</span>() {
    <span class="t">char</span> <span class="i">c</span> = <span class="i">advance</span>();
    <span class="k">switch</span> (<span class="i">c</span>) {
      <span class="k">case</span> <span class="s">&#39;(&#39;</span>: <span class="i">addToken</span>(<span class="i">LEFT_PAREN</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;)&#39;</span>: <span class="i">addToken</span>(<span class="i">RIGHT_PAREN</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;{&#39;</span>: <span class="i">addToken</span>(<span class="i">LEFT_BRACE</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;}&#39;</span>: <span class="i">addToken</span>(<span class="i">RIGHT_BRACE</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;,&#39;</span>: <span class="i">addToken</span>(<span class="i">COMMA</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;.&#39;</span>: <span class="i">addToken</span>(<span class="i">DOT</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;-&#39;</span>: <span class="i">addToken</span>(<span class="i">MINUS</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;+&#39;</span>: <span class="i">addToken</span>(<span class="i">PLUS</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;;&#39;</span>: <span class="i">addToken</span>(<span class="i">SEMICOLON</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;*&#39;</span>: <span class="i">addToken</span>(<span class="i">STAR</span>); <span class="k">break</span>;<span name="slash"> </span>
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>scanTokens</em>()</div>

<aside name="slash">
<p>Wondering why <code>/</code> isn&rsquo;t in here? Don&rsquo;t worry, we&rsquo;ll get to it.</p>
</aside>
<p>Again, we need a couple of helper methods.</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>isAtEnd</em>()</div>
<pre>  <span class="k">private</span> <span class="t">char</span> <span class="i">advance</span>() {
    <span class="k">return</span> <span class="i">source</span>.<span class="i">charAt</span>(<span class="i">current</span>++);
  }

  <span class="k">private</span> <span class="t">void</span> <span class="i">addToken</span>(<span class="t">TokenType</span> <span class="i">type</span>) {
    <span class="i">addToken</span>(<span class="i">type</span>, <span class="k">null</span>);
  }

  <span class="k">private</span> <span class="t">void</span> <span class="i">addToken</span>(<span class="t">TokenType</span> <span class="i">type</span>, <span class="t">Object</span> <span class="i">literal</span>) {
    <span class="t">String</span> <span class="i">text</span> = <span class="i">source</span>.<span class="i">substring</span>(<span class="i">start</span>, <span class="i">current</span>);
    <span class="i">tokens</span>.<span class="i">add</span>(<span class="k">new</span> <span class="t">Token</span>(<span class="i">type</span>, <span class="i">text</span>, <span class="i">literal</span>, <span class="i">line</span>));
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>isAtEnd</em>()</div>

<p>The <code>advance()</code> method consumes the next character in the source file and
returns it. Where <code>advance()</code> is for input, <code>addToken()</code> is for output. It grabs
the text of the current lexeme and creates a new token for it. We&rsquo;ll use the
other overload to handle tokens with literal values soon.</p>
<h3><a href="#lexical-errors" id="lexical-errors"><small>4&#8202;.&#8202;5&#8202;.&#8202;1</small>Lexical errors</a></h3>
<p>Before we get too far in, let&rsquo;s take a moment to think about errors at the
lexical level. What happens if a user throws a source file containing some
characters Lox doesn&rsquo;t use, like <code>@#^</code>, at our interpreter? Right now, those
characters get silently discarded. They aren&rsquo;t used by the Lox language, but
that doesn&rsquo;t mean the interpreter can pretend they aren&rsquo;t there. Instead, we
report an error.</p>
<div class="codehilite"><pre class="insert-before">      case '*': addToken(STAR); break;<span name="slash"> </span>
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">

      <span class="k">default</span>:
        <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">line</span>, <span class="s">&quot;Unexpected character.&quot;</span>);
        <span class="k">break</span>;
</pre><pre class="insert-after">    }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>()</div>

<p>Note that the erroneous character is still <em>consumed</em> by the earlier call to
<code>advance()</code>. That&rsquo;s important so that we don&rsquo;t get stuck in an infinite loop.</p>
<p>Note also that we <span name="shotgun"><em>keep scanning</em></span>. There may be
other errors later in the program. It gives our users a better experience if we
detect as many of those as possible in one go. Otherwise, they see one tiny
error and fix it, only to have the next error appear, and so on. Syntax error
Whac-A-Mole is no fun.</p>
<p>(Don&rsquo;t worry. Since <code>hadError</code> gets set, we&rsquo;ll never try to <em>execute</em> any of the
code, even though we keep going and scan the rest of it.)</p>
<aside name="shotgun">
<p>The code reports each invalid character separately, so this shotguns the user
with a blast of errors if they accidentally paste a big blob of weird text.
Coalescing a run of invalid characters into a single error would give a nicer
user experience.</p>
</aside>
<h3><a href="#operators" id="operators"><small>4&#8202;.&#8202;5&#8202;.&#8202;2</small>Operators</a></h3>
<p>We have single-character lexemes working, but that doesn&rsquo;t cover all of Lox&rsquo;s
operators. What about <code>!</code>? It&rsquo;s a single character, right? Sometimes, yes, but
if the very next character is an equals sign, then we should instead create a
<code>!=</code> lexeme. Note that the <code>!</code> and <code>=</code> are <em>not</em> two independent operators. You
can&rsquo;t write <code>!   =</code> in Lox and have it behave like an inequality operator.
That&rsquo;s why we need to scan <code>!=</code> as a single lexeme. Likewise, <code>&lt;</code>, <code>&gt;</code>, and <code>=</code>
can all be followed by <code>=</code> to create the other equality and comparison
operators.</p>
<p>For all of these, we need to look at the second character.</p>
<div class="codehilite"><pre class="insert-before">      case '*': addToken(STAR); break;<span name="slash"> </span>
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="s">&#39;!&#39;</span>:
        <span class="i">addToken</span>(<span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="i">BANG_EQUAL</span> : <span class="i">BANG</span>);
        <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;=&#39;</span>:
        <span class="i">addToken</span>(<span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="i">EQUAL_EQUAL</span> : <span class="i">EQUAL</span>);
        <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;&lt;&#39;</span>:
        <span class="i">addToken</span>(<span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="i">LESS_EQUAL</span> : <span class="i">LESS</span>);
        <span class="k">break</span>;
      <span class="k">case</span> <span class="s">&#39;&gt;&#39;</span>:
        <span class="i">addToken</span>(<span class="i">match</span>(<span class="s">&#39;=&#39;</span>) ? <span class="i">GREATER_EQUAL</span> : <span class="i">GREATER</span>);
        <span class="k">break</span>;
</pre><pre class="insert-after">

      default:
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>()</div>

<p>Those cases use this new method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>scanToken</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">match</span>(<span class="t">char</span> <span class="i">expected</span>) {
    <span class="k">if</span> (<span class="i">isAtEnd</span>()) <span class="k">return</span> <span class="k">false</span>;
    <span class="k">if</span> (<span class="i">source</span>.<span class="i">charAt</span>(<span class="i">current</span>) != <span class="i">expected</span>) <span class="k">return</span> <span class="k">false</span>;

    <span class="i">current</span>++;
    <span class="k">return</span> <span class="k">true</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>scanToken</em>()</div>

<p>It&rsquo;s like a conditional <code>advance()</code>. We only consume the current character if
it&rsquo;s what we&rsquo;re looking for.</p>
<p>Using <code>match()</code>, we recognize these lexemes in two stages. When we reach, for
example, <code>!</code>, we jump to its switch case. That means we know the lexeme <em>starts</em>
with <code>!</code>. Then we look at the next character to determine if we&rsquo;re on a <code>!=</code> or
merely a <code>!</code>.</p>
<h2><a href="#longer-lexemes" id="longer-lexemes"><small>4&#8202;.&#8202;6</small>Longer Lexemes</a></h2>
<p>We&rsquo;re still missing one operator: <code>/</code> for division. That character needs a
little special handling because comments begin with a slash too.</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="s">&#39;/&#39;</span>:
        <span class="k">if</span> (<span class="i">match</span>(<span class="s">&#39;/&#39;</span>)) {
          <span class="c">// A comment goes until the end of the line.</span>
          <span class="k">while</span> (<span class="i">peek</span>() != <span class="s">&#39;\n&#39;</span> &amp;&amp; !<span class="i">isAtEnd</span>()) <span class="i">advance</span>();
        } <span class="k">else</span> {
          <span class="i">addToken</span>(<span class="i">SLASH</span>);
        }
        <span class="k">break</span>;
</pre><pre class="insert-after">

      default:
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>()</div>

<p>This is similar to the other two-character operators, except that when we find a
second <code>/</code>, we don&rsquo;t end the token yet. Instead, we keep consuming characters
until we reach the end of the line.</p>
<p>This is our general strategy for handling longer lexemes. After we detect the
beginning of one, we shunt over to some lexeme-specific code that keeps eating
characters until it sees the end.</p>
<p>We&rsquo;ve got another helper:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>match</em>()</div>
<pre>  <span class="k">private</span> <span class="t">char</span> <span class="i">peek</span>() {
    <span class="k">if</span> (<span class="i">isAtEnd</span>()) <span class="k">return</span> <span class="s">&#39;\0&#39;</span>;
    <span class="k">return</span> <span class="i">source</span>.<span class="i">charAt</span>(<span class="i">current</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>match</em>()</div>

<p>It&rsquo;s sort of like <code>advance()</code>, but doesn&rsquo;t consume the character. This is called
<span name="match"><strong>lookahead</strong></span>. Since it only looks at the current
unconsumed character, we have <em>one character of lookahead</em>. The smaller this
number is, generally, the faster the scanner runs. The rules of the lexical
grammar dictate how much lookahead we need. Fortunately, most languages in wide
use peek only one or two characters ahead.</p>
<aside name="match">
<p>Technically, <code>match()</code> is doing lookahead too. <code>advance()</code> and <code>peek()</code> are the
fundamental operators and <code>match()</code> combines them.</p>
</aside>
<p>Comments are lexemes, but they aren&rsquo;t meaningful, and the parser doesn&rsquo;t want
to deal with them. So when we reach the end of the comment, we <em>don&rsquo;t</em> call
<code>addToken()</code>. When we loop back around to start the next lexeme, <code>start</code> gets
reset and the comment&rsquo;s lexeme disappears in a puff of smoke.</p>
<p>While we&rsquo;re at it, now&rsquo;s a good time to skip over those other meaningless
characters: newlines and whitespace.</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">

      <span class="k">case</span> <span class="s">&#39; &#39;</span>:
      <span class="k">case</span> <span class="s">&#39;\r&#39;</span>:
      <span class="k">case</span> <span class="s">&#39;\t&#39;</span>:
        <span class="c">// Ignore whitespace.</span>
        <span class="k">break</span>;

      <span class="k">case</span> <span class="s">&#39;\n&#39;</span>:
        <span class="i">line</span>++;
        <span class="k">break</span>;
</pre><pre class="insert-after">

      default:
        Lox.error(line, &quot;Unexpected character.&quot;);
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>()</div>

<p>When encountering whitespace, we simply go back to the beginning of the scan
loop. That starts a new lexeme <em>after</em> the whitespace character. For newlines,
we do the same thing, but we also increment the line counter. (This is why we
used <code>peek()</code> to find the newline ending a comment instead of <code>match()</code>. We want
that newline to get us here so we can update <code>line</code>.)</p>
<p>Our scanner is getting smarter. It can handle fairly free-form code like:</p>
<div class="codehilite"><pre><span class="c">// this is a comment</span>
(( )){} <span class="c">// grouping stuff</span>
!*+-/=&lt;&gt; &lt;= == <span class="c">// operators</span>
</pre></div>
<h3><a href="#string-literals" id="string-literals"><small>4&#8202;.&#8202;6&#8202;.&#8202;1</small>String literals</a></h3>
<p>Now that we&rsquo;re comfortable with longer lexemes, we&rsquo;re ready to tackle literals.
We&rsquo;ll do strings first, since they always begin with a specific character, <code>"</code>.</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">

      <span class="k">case</span> <span class="s">&#39;&quot;&#39;</span>: <span class="i">string</span>(); <span class="k">break</span>;
</pre><pre class="insert-after">

      default:
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>()</div>

<p>That calls:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>scanToken</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">string</span>() {
    <span class="k">while</span> (<span class="i">peek</span>() != <span class="s">&#39;&quot;&#39;</span> &amp;&amp; !<span class="i">isAtEnd</span>()) {
      <span class="k">if</span> (<span class="i">peek</span>() == <span class="s">&#39;\n&#39;</span>) <span class="i">line</span>++;
      <span class="i">advance</span>();
    }

    <span class="k">if</span> (<span class="i">isAtEnd</span>()) {
      <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">line</span>, <span class="s">&quot;Unterminated string.&quot;</span>);
      <span class="k">return</span>;
    }

    <span class="c">// The closing &quot;.</span>
    <span class="i">advance</span>();

    <span class="c">// Trim the surrounding quotes.</span>
    <span class="t">String</span> <span class="i">value</span> = <span class="i">source</span>.<span class="i">substring</span>(<span class="i">start</span> + <span class="n">1</span>, <span class="i">current</span> - <span class="n">1</span>);
    <span class="i">addToken</span>(<span class="i">STRING</span>, <span class="i">value</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>scanToken</em>()</div>

<p>Like with comments, we consume characters until we hit the <code>"</code> that ends the
string. We also gracefully handle running out of input before the string is
closed and report an error for that.</p>
<p>For no particular reason, Lox supports multi-line strings. There are pros and
cons to that, but prohibiting them was a little more complex than allowing them,
so I left them in. That does mean we also need to update <code>line</code> when we hit a
newline inside a string.</p>
<p>Finally, the last interesting bit is that when we create the token, we also
produce the actual string <em>value</em> that will be used later by the interpreter.
Here, that conversion only requires a <code>substring()</code> to strip off the surrounding
quotes. If Lox supported escape sequences like <code>\n</code>, we&rsquo;d unescape those here.</p>
<h3><a href="#number-literals" id="number-literals"><small>4&#8202;.&#8202;6&#8202;.&#8202;2</small>Number literals</a></h3>
<p>All numbers in Lox are floating point at runtime, but both integer and decimal
literals are supported. A number literal is a series of <span
name="minus">digits</span> optionally followed by a <code>.</code> and one or more trailing
digits.</p>
<aside name="minus">
<p>Since we look only for a digit to start a number, that means <code>-123</code> is not a
number <em>literal</em>. Instead, <code>-123</code>, is an <em>expression</em> that applies <code>-</code> to the
number literal <code>123</code>. In practice, the result is the same, though it has one
interesting edge case if we were to add method calls on numbers. Consider:</p>
<div class="codehilite"><pre><span class="k">print</span> -<span class="n">123</span>.<span class="i">abs</span>();
</pre></div>
<p>This prints <code>-123</code> because negation has lower precedence than method calls. We
could fix that by making <code>-</code> part of the number literal. But then consider:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">n</span> = <span class="n">123</span>;
<span class="k">print</span> -<span class="i">n</span>.<span class="i">abs</span>();
</pre></div>
<p>This still produces <code>-123</code>, so now the language seems inconsistent. No matter
what you do, some case ends up weird.</p>
</aside>
<div class="codehilite"><pre><span class="n">1234</span>
<span class="n">12.34</span>
</pre></div>
<p>We don&rsquo;t allow a leading or trailing decimal point, so these are both invalid:</p>
<div class="codehilite"><pre>.<span class="n">1234</span>
<span class="n">1234</span>.
</pre></div>
<p>We could easily support the former, but I left it out to keep things simple. The
latter gets weird if we ever want to allow methods on numbers like <code>123.sqrt()</code>.</p>
<p>To recognize the beginning of a number lexeme, we look for any digit. It&rsquo;s kind
of tedious to add cases for every decimal digit, so we&rsquo;ll stuff it in the
default case instead.</p>
<div class="codehilite"><pre class="insert-before">      default:
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()<br>
replace 1 line</div>
<pre class="insert">        <span class="k">if</span> (<span class="i">isDigit</span>(<span class="i">c</span>)) {
          <span class="i">number</span>();
        } <span class="k">else</span> {
          <span class="t">Lox</span>.<span class="i">error</span>(<span class="i">line</span>, <span class="s">&quot;Unexpected character.&quot;</span>);
        }
</pre><pre class="insert-after">        break;
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>(), replace 1 line</div>

<p>This relies on this little utility:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>peek</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">isDigit</span>(<span class="t">char</span> <span class="i">c</span>) {
    <span class="k">return</span> <span class="i">c</span> &gt;= <span class="s">&#39;0&#39;</span> &amp;&amp; <span class="i">c</span> &lt;= <span class="s">&#39;9&#39;</span>;
  }<span name="is-digit"> </span>
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>peek</em>()</div>

<aside name="is-digit">
<p>The Java standard library provides <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Character.html#isDigit(char)"><code>Character.isDigit()</code></a>, which seems
like a good fit. Alas, that method allows things like Devanagari digits,
full-width numbers, and other funny stuff we don&rsquo;t want.</p>
</aside>
<p>Once we know we are in a number, we branch to a separate method to consume the
rest of the literal, like we do with strings.</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>scanToken</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">number</span>() {
    <span class="k">while</span> (<span class="i">isDigit</span>(<span class="i">peek</span>())) <span class="i">advance</span>();

    <span class="c">// Look for a fractional part.</span>
    <span class="k">if</span> (<span class="i">peek</span>() == <span class="s">&#39;.&#39;</span> &amp;&amp; <span class="i">isDigit</span>(<span class="i">peekNext</span>())) {
      <span class="c">// Consume the &quot;.&quot;</span>
      <span class="i">advance</span>();

      <span class="k">while</span> (<span class="i">isDigit</span>(<span class="i">peek</span>())) <span class="i">advance</span>();
    }

    <span class="i">addToken</span>(<span class="i">NUMBER</span>,
        <span class="t">Double</span>.<span class="i">parseDouble</span>(<span class="i">source</span>.<span class="i">substring</span>(<span class="i">start</span>, <span class="i">current</span>)));
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>scanToken</em>()</div>

<p>We consume as many digits as we find for the integer part of the literal. Then
we look for a fractional part, which is a decimal point (<code>.</code>) followed by at
least one digit. If we do have a fractional part, again, we consume as many
digits as we can find.</p>
<p>Looking past the decimal point requires a second character of lookahead since we
don&rsquo;t want to consume the <code>.</code> until we&rsquo;re sure there is a digit <em>after</em> it. So
we add:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>peek</em>()</div>
<pre>  <span class="k">private</span> <span class="t">char</span> <span class="i">peekNext</span>() {
    <span class="k">if</span> (<span class="i">current</span> + <span class="n">1</span> &gt;= <span class="i">source</span>.<span class="i">length</span>()) <span class="k">return</span> <span class="s">&#39;\0&#39;</span>;
    <span class="k">return</span> <span class="i">source</span>.<span class="i">charAt</span>(<span class="i">current</span> + <span class="n">1</span>);
  }<span name="peek-next"> </span>
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>peek</em>()</div>

<aside name="peek-next">
<p>I could have made <code>peek()</code> take a parameter for the number of characters ahead
to look instead of defining two functions, but that would allow <em>arbitrarily</em>
far lookahead. Providing these two functions makes it clearer to a reader of the
code that our scanner looks ahead at most two characters.</p>
</aside>
<p>Finally, we convert the lexeme to its numeric value. Our interpreter uses Java&rsquo;s
<code>Double</code> type to represent numbers, so we produce a value of that type. We&rsquo;re
using Java&rsquo;s own parsing method to convert the lexeme to a real Java double. We
could implement that ourselves, but, honestly, unless you&rsquo;re trying to cram for
an upcoming programming interview, it&rsquo;s not worth your time.</p>
<p>The remaining literals are Booleans and <code>nil</code>, but we handle those as keywords,
which gets us to<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h2><a href="#reserved-words-and-identifiers" id="reserved-words-and-identifiers"><small>4&#8202;.&#8202;7</small>Reserved Words and Identifiers</a></h2>
<p>Our scanner is almost done. The only remaining pieces of the lexical grammar to
implement are identifiers and their close cousins, the reserved words. You might
think we could match keywords like <code>or</code> in the same way we handle
multiple-character operators like <code>&lt;=</code>.</p>
<div class="codehilite"><pre><span class="k">case</span> <span class="s">&#39;o&#39;</span>:
  <span class="k">if</span> (<span class="i">match</span>(<span class="s">&#39;r&#39;</span>)) {
    <span class="i">addToken</span>(<span class="i">OR</span>);
  }
  <span class="k">break</span>;
</pre></div>
<p>Consider what would happen if a user named a variable <code>orchid</code>. The scanner
would see the first two letters, <code>or</code>, and immediately emit an <code>or</code> keyword
token. This gets us to an important principle called <span
name="maximal"><strong>maximal munch</strong></span>. When two lexical grammar rules can both
match a chunk of code that the scanner is looking at, <em>whichever one matches the
most characters wins</em>.</p>
<p>That rule states that if we can match <code>orchid</code> as an identifier and <code>or</code> as a
keyword, then the former wins. This is also why we tacitly assumed, previously,
that <code>&lt;=</code> should be scanned as a single <code>&lt;=</code> token and not <code>&lt;</code> followed by <code>=</code>.</p>
<aside name="maximal">
<p>Consider this nasty bit of C code:</p>
<div class="codehilite"><pre>---<span class="i">a</span>;
</pre></div>
<p>Is it valid? That depends on how the scanner splits the lexemes. What if the scanner
sees it like this:</p>
<div class="codehilite"><pre>- --<span class="i">a</span>;
</pre></div>
<p>Then it could be parsed. But that would require the scanner to know about the
grammatical structure of the surrounding code, which entangles things more than
we want. Instead, the maximal munch rule says that it is <em>always</em> scanned like:</p>
<div class="codehilite"><pre>-- -<span class="i">a</span>;
</pre></div>
<p>It scans it that way even though doing so leads to a syntax error later in the
parser.</p>
</aside>
<p>Maximal munch means we can&rsquo;t easily detect a reserved word until we&rsquo;ve reached
the end of what might instead be an identifier. After all, a reserved word <em>is</em>
an identifier, it&rsquo;s just one that has been claimed by the language for its own
use. That&rsquo;s where the term <strong>reserved word</strong> comes from.</p>
<p>So we begin by assuming any lexeme starting with a letter or underscore is an
identifier.</p>
<div class="codehilite"><pre class="insert-before">      default:
        if (isDigit(c)) {
          number();
</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>scanToken</em>()</div>
<pre class="insert">        } <span class="k">else</span> <span class="k">if</span> (<span class="i">isAlpha</span>(<span class="i">c</span>)) {
          <span class="i">identifier</span>();
</pre><pre class="insert-after">        } else {
          Lox.error(line, &quot;Unexpected character.&quot;);
        }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>scanToken</em>()</div>

<p>The rest of the code lives over here:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>scanToken</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">identifier</span>() {
    <span class="k">while</span> (<span class="i">isAlphaNumeric</span>(<span class="i">peek</span>())) <span class="i">advance</span>();

    <span class="i">addToken</span>(<span class="i">IDENTIFIER</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>scanToken</em>()</div>

<p>We define that in terms of these helpers:</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
add after <em>peekNext</em>()</div>
<pre>  <span class="k">private</span> <span class="t">boolean</span> <span class="i">isAlpha</span>(<span class="t">char</span> <span class="i">c</span>) {
    <span class="k">return</span> (<span class="i">c</span> &gt;= <span class="s">&#39;a&#39;</span> &amp;&amp; <span class="i">c</span> &lt;= <span class="s">&#39;z&#39;</span>) ||
           (<span class="i">c</span> &gt;= <span class="s">&#39;A&#39;</span> &amp;&amp; <span class="i">c</span> &lt;= <span class="s">&#39;Z&#39;</span>) ||
            <span class="i">c</span> == <span class="s">&#39;_&#39;</span>;
  }

  <span class="k">private</span> <span class="t">boolean</span> <span class="i">isAlphaNumeric</span>(<span class="t">char</span> <span class="i">c</span>) {
    <span class="k">return</span> <span class="i">isAlpha</span>(<span class="i">c</span>) || <span class="i">isDigit</span>(<span class="i">c</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, add after <em>peekNext</em>()</div>

<p>That gets identifiers working. To handle keywords, we see if the identifier&rsquo;s
lexeme is one of the reserved words. If so, we use a token type specific to that
keyword. We define the set of reserved words in a map.</p>
<div class="codehilite"><div class="source-file"><em>lox/Scanner.java</em><br>
in class <em>Scanner</em></div>
<pre>  <span class="k">private</span> <span class="k">static</span> <span class="k">final</span> <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">TokenType</span>&gt; <span class="i">keywords</span>;

  <span class="k">static</span> {
    <span class="i">keywords</span> = <span class="k">new</span> <span class="t">HashMap</span>&lt;&gt;();
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;and&quot;</span>,    <span class="i">AND</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;class&quot;</span>,  <span class="i">CLASS</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;else&quot;</span>,   <span class="i">ELSE</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;false&quot;</span>,  <span class="i">FALSE</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;for&quot;</span>,    <span class="i">FOR</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;fun&quot;</span>,    <span class="i">FUN</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;if&quot;</span>,     <span class="i">IF</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;nil&quot;</span>,    <span class="i">NIL</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;or&quot;</span>,     <span class="i">OR</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;print&quot;</span>,  <span class="i">PRINT</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;return&quot;</span>, <span class="i">RETURN</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;super&quot;</span>,  <span class="i">SUPER</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;this&quot;</span>,   <span class="i">THIS</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;true&quot;</span>,   <span class="i">TRUE</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;var&quot;</span>,    <span class="i">VAR</span>);
    <span class="i">keywords</span>.<span class="i">put</span>(<span class="s">&quot;while&quot;</span>,  <span class="i">WHILE</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in class <em>Scanner</em></div>

<p>Then, after we scan an identifier, we check to see if it matches anything in the
map.</p>
<div class="codehilite"><pre class="insert-before">    while (isAlphaNumeric(peek())) advance();

</pre><div class="source-file"><em>lox/Scanner.java</em><br>
in <em>identifier</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">String</span> <span class="i">text</span> = <span class="i">source</span>.<span class="i">substring</span>(<span class="i">start</span>, <span class="i">current</span>);
    <span class="t">TokenType</span> <span class="i">type</span> = <span class="i">keywords</span>.<span class="i">get</span>(<span class="i">text</span>);
    <span class="k">if</span> (<span class="i">type</span> == <span class="k">null</span>) <span class="i">type</span> = <span class="i">IDENTIFIER</span>;
    <span class="i">addToken</span>(<span class="i">type</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Scanner.java</em>, in <em>identifier</em>(), replace 1 line</div>

<p>If so, we use that keyword&rsquo;s token type. Otherwise, it&rsquo;s a regular user-defined
identifier.</p>
<p>And with that, we now have a complete scanner for the entire Lox lexical
grammar. Fire up the REPL and type in some valid and invalid code. Does it
produce the tokens you expect? Try to come up with some interesting edge cases
and see if it handles them as it should.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>The lexical grammars of Python and Haskell are not <em>regular</em>. What does that
mean, and why aren&rsquo;t they?</p>
</li>
<li>
<p>Aside from separating tokens<span class="em">&mdash;</span>distinguishing <code>print foo</code> from <code>printfoo</code><span class="em">&mdash;</span>spaces aren&rsquo;t used for much in most languages. However, in a couple of
dark corners, a space <em>does</em> affect how code is parsed in CoffeeScript,
Ruby, and the C preprocessor. Where and what effect does it have in each of
those languages?</p>
</li>
<li>
<p>Our scanner here, like most, discards comments and whitespace since those
aren&rsquo;t needed by the parser. Why might you want to write a scanner that does
<em>not</em> discard those? What would it be useful for?</p>
</li>
<li>
<p>Add support to Lox&rsquo;s scanner for C-style <code>/* ... */</code> block comments. Make
sure to handle newlines in them. Consider allowing them to nest. Is adding
support for nesting more work than you expected? Why?</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Implicit Semicolons</a></h2>
<p>Programmers today are spoiled for choice in languages and have gotten picky
about syntax. They want their language to look clean and modern. One bit of
syntactic lichen that almost every new language scrapes off (and some ancient
ones like BASIC never had) is <code>;</code> as an explicit statement terminator.</p>
<p>Instead, they treat a newline as a statement terminator where it makes sense to
do so. The &ldquo;where it makes sense&rdquo; part is the challenging bit. While <em>most</em>
statements are on their own line, sometimes you need to spread a single
statement across a couple of lines. Those intermingled newlines should not be
treated as terminators.</p>
<p>Most of the obvious cases where the newline should be ignored are easy to
detect, but there are a handful of nasty ones:</p>
<ul>
<li>
<p>A return value on the next line:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">condition</span>) <span class="k">return</span>
<span class="s">&quot;value&quot;</span>
</pre></div>
<p>Is &ldquo;value&rdquo; the value being returned, or do we have a <code>return</code> statement with
  no value followed by an expression statement containing a string literal?</p>
</li>
<li>
<p>A parenthesized expression on the next line:</p>
<div class="codehilite"><pre><span class="i">func</span>
(<span class="i">parenthesized</span>)
</pre></div>
<p>Is this a call to <code>func(parenthesized)</code>, or two expression statements, one
  for <code>func</code> and one for a parenthesized expression?</p>
</li>
<li>
<p>A <code>-</code> on the next line:</p>
<div class="codehilite"><pre><span class="i">first</span>
-<span class="i">second</span>
</pre></div>
<p>Is this <code>first - second</code><span class="em">&mdash;</span>an infix subtraction<span class="em">&mdash;</span>or two expression
  statements, one for <code>first</code> and one to negate <code>second</code>?</p>
</li>
</ul>
<p>In all of these, either treating the newline as a separator or not would both
produce valid code, but possibly not the code the user wants. Across languages,
there is an unsettling variety of rules used to decide which newlines are
separators. Here are a couple:</p>
<ul>
<li>
<p><a href="https://www.lua.org/pil/1.1.html">Lua</a> completely ignores newlines, but carefully controls its grammar such
that no separator between statements is needed at all in most cases. This is
perfectly legit:</p>
<div class="codehilite"><pre><span class="i">a</span> = <span class="n">1</span> <span class="i">b</span> = <span class="n">2</span>
</pre></div>
<p>Lua avoids the <code>return</code> problem by requiring a <code>return</code> statement to be the
very last statement in a block. If there is a value after <code>return</code> before
the keyword <code>end</code>, it <em>must</em> be for the <code>return</code>. For the other two cases,
they allow an explicit <code>;</code> and expect users to use that. In practice, that
almost never happens because there&rsquo;s no point in a parenthesized or unary
negation expression statement.</p>
</li>
<li>
<p><a href="https://golang.org/ref/spec#Semicolons">Go</a> handles newlines in the scanner. If a newline appears following one
of a handful of token types that are known to potentially end a statement,
the newline is treated like a semicolon. Otherwise it is ignored. The Go
team provides a canonical code formatter, <a href="https://golang.org/cmd/gofmt/">gofmt</a>, and the ecosystem is
fervent about its use, which ensures that idiomatic styled code works well
with this simple rule.</p>
</li>
<li>
<p><a href="https://docs.python.org/3.5/reference/lexical_analysis.html#implicit-line-joining">Python</a> treats all newlines as significant unless an explicit backslash
is used at the end of a line to continue it to the next line. However,
newlines anywhere inside a pair of brackets (<code>()</code>, <code>[]</code>, or <code>{}</code>) are
ignored. Idiomatic style strongly prefers the latter.</p>
<p>This rule works well for Python because it is a highly statement-oriented
language. In particular, Python&rsquo;s grammar ensures a statement never appears
inside an expression. C does the same, but many other languages which have a
&ldquo;lambda&rdquo; or function literal syntax do not.</p>
<p>An example in JavaScript:</p>
<div class="codehilite"><pre><span class="i">console</span>.<span class="i">log</span>(<span class="k">function</span>() {
  <span class="i">statement</span>();
});
</pre></div>
<p>Here, the <code>console.log()</code> <em>expression</em> contains a function literal which
in turn contains the <em>statement</em> <code>statement();</code>.</p>
<p>Python would need a different set of rules for implicitly joining lines if
you could get back <em>into</em> a <span name="lambda">statement</span> where
newlines should become meaningful while still nested inside brackets.</p>
</li>
</ul>
<aside name="lambda">
<p>And now you know why Python&rsquo;s <code>lambda</code> allows only a single expression body.</p>
</aside>
<ul>
<li>
<p>JavaScript&rsquo;s &ldquo;<a href="https://www.ecma-international.org/ecma-262/5.1/#sec-7.9">automatic semicolon insertion</a>&rdquo; rule is the real odd
one. Where other languages assume most newlines <em>are</em> meaningful and only a
few should be ignored in multi-line statements, JS assumes the opposite. It
treats all of your newlines as meaningless whitespace <em>unless</em> it encounters
a parse error. If it does, it goes back and tries turning the previous
newline into a semicolon to get something grammatically valid.</p>
<p>This design note would turn into a design diatribe if I went into complete
detail about how that even <em>works</em>, much less all the various ways that
JavaScript&rsquo;s &ldquo;solution&rdquo; is a bad idea. It&rsquo;s a mess. JavaScript is the only
language I know where many style guides demand explicit semicolons after
every statement even though the language theoretically lets you elide them.</p>
</li>
</ul>
<p>If you&rsquo;re designing a new language, you almost surely <em>should</em> avoid an explicit
statement terminator. Programmers are creatures of fashion like other humans, and
semicolons are as passé as ALL CAPS KEYWORDS. Just make sure you pick a set of
rules that make sense for your language&rsquo;s particular grammar and idioms. And
don&rsquo;t do what JavaScript did.</p>
</div>

<footer>
<a href="representing-code.html" class="next">
  Next Chapter: &ldquo;Representing Code&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/script.js
================================================
$(function() {
  $("#expand-nav").click(function() {
    $(".expandable").toggleClass("shown");
  });

  $(window).scroll(function() {
    var nav = $("nav.floating");
    if ($(window).scrollTop() > 84) {
      nav.addClass("pinned");
    } else {
      nav.removeClass("pinned");
    }
  });

  $(window).resize(refreshAsides);

  // Since we may not have the height correct for the images, adjust the asides
  // too when an image is loaded.
  $("img").on("load", function() {
    refreshAsides();
  });

  // On the off chance the browser supports the new font loader API, use it.
  if (document.fontloader) {
    document.fontloader.notifyWhenFontsReady(function() {
      refreshAsides();
    });
  }

  // Lame. Just do another refresh after a second when the font is *probably*
  // loaded to hack around the fact that the metrics changed a bit.
  window.setTimeout(refreshAsides, 200);

  refreshAsides();
});

function refreshAsides() {
  $("aside").each(function() {
    var aside = $(this);

    // If the asides are inline, clear their position.
    if ($(document).width() <= 48 * 20) {
      aside.css('top', 'auto');
      return;
    }

    // Find the span the aside should be anchored next to.
    var name = aside.attr("name");
    if (name == null) {
      window.console.log("No name for aside:");
      window.console.log(aside.context);
      return;
    }

    var span = $("span[name='" + name + "']");
    if (span == null) {
      window.console.log("Could not find span for '" + name + "'");
      return;
    }

    // Vertically position the aside next to the span it annotates.
    var pos = span.position();
    if (pos == null) {
      window.console.log("Could not find position for '" + name + "'");
      console.log(span);
      return;
    }

    if (aside.hasClass("bottom")) {
      aside.offset({top: pos.top + 23 - aside.height()});
    } else {
      aside.offset({top: pos.top - 6});
    }
  });
}

================================================
FILE: site/statements-and-state.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Statements and State &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Statements and State<small>8</small></a></h3>

<ul>
    <li><a href="#statements"><small>8.1</small> Statements</a></li>
    <li><a href="#global-variables"><small>8.2</small> Global Variables</a></li>
    <li><a href="#environments"><small>8.3</small> Environments</a></li>
    <li><a href="#assignment"><small>8.4</small> Assignment</a></li>
    <li><a href="#scope"><small>8.5</small> Scope</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Implicit Variable Declaration</a></li>
</ul>


<div class="prev-next">
    <a href="evaluating-expressions.html" title="Evaluating Expressions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="control-flow.html" title="Control Flow" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="evaluating-expressions.html" title="Evaluating Expressions" class="prev">←</a>
<a href="control-flow.html" title="Control Flow" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Statements and State<small>8</small></a></h3>

<ul>
    <li><a href="#statements"><small>8.1</small> Statements</a></li>
    <li><a href="#global-variables"><small>8.2</small> Global Variables</a></li>
    <li><a href="#environments"><small>8.3</small> Environments</a></li>
    <li><a href="#assignment"><small>8.4</small> Assignment</a></li>
    <li><a href="#scope"><small>8.5</small> Scope</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Implicit Variable Declaration</a></li>
</ul>


<div class="prev-next">
    <a href="evaluating-expressions.html" title="Evaluating Expressions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter">&uarr;&nbsp;Up</a>
    <a href="control-flow.html" title="Control Flow" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">8</div>
  <h1>Statements and State</h1>

<blockquote>
<p>All my life, my heart has yearned for a thing I cannot name.
<cite>Andr&eacute; Breton, <em>Mad Love</em></cite></p>
</blockquote>
<p>The interpreter we have so far feels less like programming a real language and
more like punching buttons on a calculator. &ldquo;Programming&rdquo; to me means building
up a system out of smaller pieces. We can&rsquo;t do that yet because we have no way
to bind a name to some data or function. We can&rsquo;t compose software without a way
to refer to the pieces.</p>
<p>To support bindings, our interpreter needs internal state. When you define a
variable at the beginning of the program and use it at the end, the interpreter
has to hold on to the value of that variable in the meantime. So in this
chapter, we will give our interpreter a brain that can not just process, but
<em>remember</em>.</p><img src="image/statements-and-state/brain.png" alt="A brain, presumably remembering stuff." />
<p>State and <span name="expr">statements</span> go hand in hand. Since statements,
by definition, don&rsquo;t evaluate to a value, they need to do something else to be
useful. That something is called a <strong>side effect</strong>. It could mean producing
user-visible output or modifying some state in the interpreter that can be
detected later. The latter makes them a great fit for defining variables or
other named entities.</p>
<aside name="expr">
<p>You could make a language that treats variable declarations as expressions that
both create a binding and produce a value. The only language I know that does
that is Tcl. Scheme seems like a contender, but note that after a <code>let</code>
expression is evaluated, the variable it bound is forgotten. The <code>define</code> syntax
is not an expression.</p>
</aside>
<p>In this chapter, we&rsquo;ll do all of that. We&rsquo;ll define statements that produce
output (<code>print</code>) and create state (<code>var</code>). We&rsquo;ll add expressions to access and
assign to variables. Finally, we&rsquo;ll add blocks and local scope. That&rsquo;s a lot to
stuff into one chapter, but we&rsquo;ll chew through it all one bite at a time.</p>
<h2><a href="#statements" id="statements"><small>8&#8202;.&#8202;1</small>Statements</a></h2>
<p>We start by extending Lox&rsquo;s grammar with statements. They aren&rsquo;t very different
from expressions. We start with the two simplest kinds:</p>
<ol>
<li>
<p>An <strong>expression statement</strong> lets you place an expression where a statement
is expected. They exist to evaluate expressions that have side effects. You
may not notice them, but you use them all the time in <span
name="expr-stmt">C</span>, Java, and other languages. Any time you see a
function or method call followed by a <code>;</code>, you&rsquo;re looking at an expression
statement.</p>
<aside name="expr-stmt">
<p>Pascal is an outlier. It distinguishes between <em>procedures</em> and <em>functions</em>.
Functions return values, but procedures cannot. There is a statement form
for calling a procedure, but functions can only be called where an
expression is expected. There are no expression statements in Pascal.</p>
</aside></li>
<li>
<p>A <strong><code>print</code> statement</strong> evaluates an expression and displays the result to
the user. I admit it&rsquo;s weird to bake printing right into the language
instead of making it a library function. Doing so is a concession to the
fact that we&rsquo;re building this interpreter one chapter at a time and want to
be able to play with it before it&rsquo;s all done. To make print a library
function, we&rsquo;d have to wait until we had all of the machinery for defining
and calling functions <span name="print">before</span> we could witness any
side effects.</p>
<aside name="print">
<p>I will note with only a modicum of defensiveness that BASIC and Python
have dedicated <code>print</code> statements and they are real languages. Granted,
Python did remove their <code>print</code> statement in 3.0<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
</aside></li>
</ol>
<p>New syntax means new grammar rules. In this chapter, we finally gain the ability
to parse an entire Lox script. Since Lox is an imperative, dynamically typed
language, the &ldquo;top level&rdquo; of a script is simply a list of statements. The new
rules are:</p>
<div class="codehilite"><pre><span class="i">program</span>        → <span class="i">statement</span>* <span class="t">EOF</span> ;

<span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">printStmt</span> ;

<span class="i">exprStmt</span>       → <span class="i">expression</span> <span class="s">&quot;;&quot;</span> ;
<span class="i">printStmt</span>      → <span class="s">&quot;print&quot;</span> <span class="i">expression</span> <span class="s">&quot;;&quot;</span> ;
</pre></div>
<p>The first rule is now <code>program</code>, which is the starting point for the grammar and
represents a complete Lox script or REPL entry. A program is a list of
statements followed by the special &ldquo;end of file&rdquo; token. The mandatory end token
ensures the parser consumes the entire input and doesn&rsquo;t silently ignore
erroneous unconsumed tokens at the end of a script.</p>
<p>Right now, <code>statement</code> only has two cases for the two kinds of statements we&rsquo;ve
described. We&rsquo;ll fill in more later in this chapter and in the following ones.
The next step is turning this grammar into something we can store in memory<span class="em">&mdash;</span>syntax trees.</p>
<h3><a href="#statement-syntax-trees" id="statement-syntax-trees"><small>8&#8202;.&#8202;1&#8202;.&#8202;1</small>Statement syntax trees</a></h3>
<p>There is no place in the grammar where both an expression and a statement are
allowed. The operands of, say, <code>+</code> are always expressions, never statements. The
body of a <code>while</code> loop is always a statement.</p>
<p>Since the two syntaxes are disjoint, we don&rsquo;t need a single base class that they
all inherit from. Splitting expressions and statements into separate class
hierarchies enables the Java compiler to help us find dumb mistakes like passing
a statement to a Java method that expects an expression.</p>
<p>That means a new base class for statements. As our elders did before us, we will
use the cryptic name &ldquo;Stmt&rdquo;. With great <span name="foresight">foresight</span>,
I have designed our little AST metaprogramming script in anticipation of this.
That&rsquo;s why we passed in &ldquo;Expr&rdquo; as a parameter to <code>defineAst()</code>. Now we add
another call to define Stmt and its <span name="stmt-ast">subclasses</span>.</p>
<aside name="foresight">
<p>Not really foresight: I wrote all the code for the book before I sliced it into
chapters.</p>
</aside>
<div class="codehilite"><pre class="insert-before">      &quot;Unary    : Token operator, Expr right&quot;
    ));
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">

    <span class="i">defineAst</span>(<span class="i">outputDir</span>, <span class="s">&quot;Stmt&quot;</span>, <span class="t">Arrays</span>.<span class="i">asList</span>(
      <span class="s">&quot;Expression : Expr expression&quot;</span>,
      <span class="s">&quot;Print      : Expr expression&quot;</span>
    ));
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="stmt-ast">
<p>The generated code for the new nodes is in <a href="appendix-ii.html">Appendix II</a>: <a href="appendix-ii.html#expression-statement">Expression statement</a>, <a href="appendix-ii.html#print-statement">Print statement</a>.</p>
</aside>
<p>Run the AST generator script and behold the resulting &ldquo;Stmt.java&rdquo; file with the
syntax tree classes we need for expression and <code>print</code> statements. Don&rsquo;t forget
to add the file to your IDE project or makefile or whatever.</p>
<h3><a href="#parsing-statements" id="parsing-statements"><small>8&#8202;.&#8202;1&#8202;.&#8202;2</small>Parsing statements</a></h3>
<p>The parser&rsquo;s <code>parse()</code> method that parses and returns a single expression was a
temporary hack to get the last chapter up and running. Now that our grammar has
the correct starting rule, <code>program</code>, we can turn <code>parse()</code> into the real deal.</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
method <em>parse</em>()<br>
replace 7 lines</div>
<pre>  <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">parse</span>() {
    <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span> = <span class="k">new</span> <span class="t">ArrayList</span>&lt;&gt;();
    <span class="k">while</span> (!<span class="i">isAtEnd</span>()) {
      <span class="i">statements</span>.<span class="i">add</span>(<span class="i">statement</span>());
    }

    <span class="k">return</span> <span class="i">statements</span>;<span name="parse-error-handling"> </span>
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, method <em>parse</em>(), replace 7 lines</div>

<aside name="parse-error-handling">
<p>What about the code we had in here for catching <code>ParseError</code> exceptions? We&rsquo;ll
put better parse error handling in place soon when we add support for additional
statement types.</p>
</aside>
<p>This parses a series of statements, as many as it can find until it hits the end
of the input. This is a pretty direct translation of the <code>program</code> rule into
recursive descent style. We must also chant a minor prayer to the Java verbosity
gods since we are using ArrayList now.</p>
<div class="codehilite"><pre class="insert-before">package com.craftinginterpreters.lox;

</pre><div class="source-file"><em>lox/Parser.java</em></div>
<pre class="insert"><span class="k">import</span> <span class="i">java.util.ArrayList</span>;
</pre><pre class="insert-after">import java.util.List;
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em></div>

<p>A program is a list of statements, and we parse one of those statements using
this method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>expression</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">statement</span>() {
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">PRINT</span>)) <span class="k">return</span> <span class="i">printStatement</span>();

    <span class="k">return</span> <span class="i">expressionStatement</span>();
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>expression</em>()</div>

<p>A little bare bones, but we&rsquo;ll fill it in with more statement types later. We
determine which specific statement rule is matched by looking at the current
token. A <code>print</code> token means it&rsquo;s obviously a <code>print</code> statement.</p>
<p>If the next token doesn&rsquo;t look like any known kind of statement, we assume it
must be an expression statement. That&rsquo;s the typical final fallthrough case when
parsing a statement, since it&rsquo;s hard to proactively recognize an expression from
its first token.</p>
<p>Each statement kind gets its own method. First <code>print</code>:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>statement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">printStatement</span>() {
    <span class="t">Expr</span> <span class="i">value</span> = <span class="i">expression</span>();
    <span class="i">consume</span>(<span class="i">SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after value.&quot;</span>);
    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Print</span>(<span class="i">value</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>statement</em>()</div>

<p>Since we already matched and consumed the <code>print</code> token itself, we don&rsquo;t need to
do that here. We parse the subsequent expression, consume the terminating
semicolon, and emit the syntax tree.</p>
<p>If we didn&rsquo;t match a <code>print</code> statement, we must have one of these:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>printStatement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">expressionStatement</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">expression</span>();
    <span class="i">consume</span>(<span class="i">SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after expression.&quot;</span>);
    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Expression</span>(<span class="i">expr</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>printStatement</em>()</div>

<p>Similar to the previous method, we parse an expression followed by a semicolon.
We wrap that Expr in a Stmt of the right type and return it.</p>
<h3><a href="#executing-statements" id="executing-statements"><small>8&#8202;.&#8202;1&#8202;.&#8202;3</small>Executing statements</a></h3>
<p>We&rsquo;re running through the previous couple of chapters in microcosm, working our
way through the front end. Our parser can now produce statement syntax trees, so
the next and final step is to interpret them. As in expressions, we use the
Visitor pattern, but we have a new visitor interface, Stmt.Visitor, to
implement since statements have their own base class.</p>
<p>We add that to the list of interfaces Interpreter implements.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
replace 1 line</div>
<pre class="insert"><span class="k">class</span> <span class="t">Interpreter</span> <span class="k">implements</span> <span class="t">Expr</span>.<span class="t">Visitor</span>&lt;<span class="t">Object</span>&gt;,
                             <span class="t">Stmt</span>.<span class="t">Visitor</span>&lt;<span class="t">Void</span>&gt; {
</pre><pre class="insert-after">  void interpret(Expr expression) {<span name="void"> </span>
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, replace 1 line</div>

<aside name="void">
<p>Java doesn&rsquo;t let you use lowercase &ldquo;void&rdquo; as a generic type argument for obscure
reasons having to do with type erasure and the stack. Instead, there is a
separate &ldquo;Void&rdquo; type specifically for this use. Sort of a &ldquo;boxed void&rdquo;, like
&ldquo;Integer&rdquo; is for &ldquo;int&rdquo;.</p>
</aside>
<p>Unlike expressions, statements produce no values, so the return type of the
visit methods is Void, not Object. We have two statement types, and we need a
visit method for each. The easiest is expression statements.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>evaluate</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitExpressionStmt</span>(<span class="t">Stmt</span>.<span class="t">Expression</span> <span class="i">stmt</span>) {
    <span class="i">evaluate</span>(<span class="i">stmt</span>.<span class="i">expression</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>evaluate</em>()</div>

<p>We evaluate the inner expression using our existing <code>evaluate()</code> method and
<span name="discard">discard</span> the value. Then we return <code>null</code>. Java
requires that to satisfy the special capitalized Void return type. Weird, but
what can you do?</p>
<aside name="discard">
<p>Appropriately enough, we discard the value returned by <code>evaluate()</code> by placing
that call inside a <em>Java</em> expression statement.</p>
</aside>
<p>The <code>print</code> statement&rsquo;s visit method isn&rsquo;t much different.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitExpressionStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitPrintStmt</span>(<span class="t">Stmt</span>.<span class="t">Print</span> <span class="i">stmt</span>) {
    <span class="t">Object</span> <span class="i">value</span> = <span class="i">evaluate</span>(<span class="i">stmt</span>.<span class="i">expression</span>);
    <span class="t">System</span>.<span class="i">out</span>.<span class="i">println</span>(<span class="i">stringify</span>(<span class="i">value</span>));
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitExpressionStmt</em>()</div>

<p>Before discarding the expression&rsquo;s value, we convert it to a string using the
<code>stringify()</code> method we introduced in the last chapter and then dump it to
stdout.</p>
<p>Our interpreter is able to visit statements now, but we have some work to do to
feed them to it. First, modify the old <code>interpret()</code> method in the Interpreter
class to accept a list of statements<span class="em">&mdash;</span>in other words, a program.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
method <em>interpret</em>()<br>
replace 8 lines</div>
<pre>  <span class="t">void</span> <span class="i">interpret</span>(<span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span>) {
    <span class="k">try</span> {
      <span class="k">for</span> (<span class="t">Stmt</span> <span class="i">statement</span> : <span class="i">statements</span>) {
        <span class="i">execute</span>(<span class="i">statement</span>);
      }
    } <span class="k">catch</span> (<span class="t">RuntimeError</span> <span class="i">error</span>) {
      <span class="t">Lox</span>.<span class="i">runtimeError</span>(<span class="i">error</span>);
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, method <em>interpret</em>(), replace 8 lines</div>

<p>This replaces the old code which took a single expression. The new code relies
on this tiny helper method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>evaluate</em>()</div>
<pre>  <span class="k">private</span> <span class="t">void</span> <span class="i">execute</span>(<span class="t">Stmt</span> <span class="i">stmt</span>) {
    <span class="i">stmt</span>.<span class="i">accept</span>(<span class="k">this</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>evaluate</em>()</div>

<p>That&rsquo;s the statement analogue to the <code>evaluate()</code> method we have for
expressions. Since we&rsquo;re working with lists now, we need to let Java know.</p>
<div class="codehilite"><pre class="insert-before">package com.craftinginterpreters.lox;
</pre><div class="source-file"><em>lox/Interpreter.java</em></div>
<pre class="insert">

<span class="k">import</span> <span class="i">java.util.List</span>;
</pre><pre class="insert-after">

class Interpreter implements Expr.Visitor&lt;Object&gt;,
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em></div>

<p>The main Lox class is still trying to parse a single expression and pass it to
the interpreter. We fix the parsing line like so:</p>
<div class="codehilite"><pre class="insert-before">    Parser parser = new Parser(tokens);
</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span> = <span class="i">parser</span>.<span class="i">parse</span>();
</pre><pre class="insert-after">

    // Stop if there was a syntax error.
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>run</em>(), replace 1 line</div>

<p>And then replace the call to the interpreter with this:</p>
<div class="codehilite"><pre class="insert-before">    if (hadError) return;

</pre><div class="source-file"><em>lox/Lox.java</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="i">interpreter</span>.<span class="i">interpret</span>(<span class="i">statements</span>);
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Lox.java</em>, in <em>run</em>(), replace 1 line</div>

<p>Basically just plumbing the new syntax through. OK, fire up the interpreter and
give it a try. At this point, it&rsquo;s worth sketching out a little Lox program in a
text file to run as a script. Something like:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="s">&quot;one&quot;</span>;
<span class="k">print</span> <span class="k">true</span>;
<span class="k">print</span> <span class="n">2</span> + <span class="n">1</span>;
</pre></div>
<p>It almost looks like a real program! Note that the REPL, too, now requires you
to enter a full statement instead of a simple expression. Don&rsquo;t forget your
semicolons.</p>
<h2><a href="#global-variables" id="global-variables"><small>8&#8202;.&#8202;2</small>Global Variables</a></h2>
<p>Now that we have statements, we can start working on state. Before we get into
all of the complexity of lexical scoping, we&rsquo;ll start off with the easiest kind
of variables<span class="em">&mdash;</span><span name="globals">globals</span>. We need two new constructs.</p>
<ol>
<li>
<p>A <strong>variable declaration</strong> statement brings a new variable into the world.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">beverage</span> = <span class="s">&quot;espresso&quot;</span>;
</pre></div>
<p>This creates a new binding that associates a name (here &ldquo;beverage&rdquo;) with a
value (here, the string <code>"espresso"</code>).</p>
</li>
<li>
<p>Once that&rsquo;s done, a <strong>variable expression</strong> accesses that binding. When the
identifier &ldquo;beverage&rdquo; is used as an expression, it looks up the value bound
to that name and returns it.</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="i">beverage</span>; <span class="c">// &quot;espresso&quot;.</span>
</pre></div>
</li>
</ol>
<p>Later, we&rsquo;ll add assignment and block scope, but that&rsquo;s enough to get moving.</p>
<aside name="globals">
<p>Global state gets a bad rap. Sure, lots of global state<span class="em">&mdash;</span>especially <em>mutable</em>
state<span class="em">&mdash;</span>makes it hard to maintain large programs. It&rsquo;s good software
engineering to minimize how much you use.</p>
<p>But when you&rsquo;re slapping together a simple programming language or, heck, even
learning your first language, the flat simplicity of global variables helps. My
first language was BASIC and, though I outgrew it eventually, it was nice that I
didn&rsquo;t have to wrap my head around scoping rules before I could make a computer
do fun stuff.</p>
</aside>
<h3><a href="#variable-syntax" id="variable-syntax"><small>8&#8202;.&#8202;2&#8202;.&#8202;1</small>Variable syntax</a></h3>
<p>As before, we&rsquo;ll work through the implementation from front to back, starting
with the syntax. Variable declarations are statements, but they are different
from other statements, and we&rsquo;re going to split the statement grammar in two to
handle them. That&rsquo;s because the grammar restricts where some kinds of statements
are allowed.</p>
<p>The clauses in control flow statements<span class="em">&mdash;</span>think the then and else branches of
an <code>if</code> statement or the body of a <code>while</code><span class="em">&mdash;</span>are each a single statement. But
that statement is not allowed to be one that declares a name. This is OK:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">monday</span>) <span class="k">print</span> <span class="s">&quot;Ugh, already?&quot;</span>;
</pre></div>
<p>But this is not:</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">monday</span>) <span class="k">var</span> <span class="i">beverage</span> = <span class="s">&quot;espresso&quot;</span>;
</pre></div>
<p>We <em>could</em> allow the latter, but it&rsquo;s confusing. What is the scope of that
<code>beverage</code> variable? Does it persist after the <code>if</code> statement? If so, what is
its value on days other than Monday? Does the variable exist at all on those
days?</p>
<p>Code like this is weird, so C, Java, and friends all disallow it. It&rsquo;s as if
there are two levels of <span name="brace">&ldquo;precedence&rdquo;</span> for statements.
Some places where a statement is allowed<span class="em">&mdash;</span>like inside a block or at the top
level<span class="em">&mdash;</span>allow any kind of statement, including declarations. Others allow only
the &ldquo;higher&rdquo; precedence statements that don&rsquo;t declare names.</p>
<aside name="brace">
<p>In this analogy, block statements work sort of like parentheses do for
expressions. A block is itself in the &ldquo;higher&rdquo; precedence level and can be used
anywhere, like in the clauses of an <code>if</code> statement. But the statements it
<em>contains</em> can be lower precedence. You&rsquo;re allowed to declare variables and
other names inside the block. The curlies let you escape back into the full
statement grammar from a place where only some statements are allowed.</p>
</aside>
<p>To accommodate the distinction, we add another rule for kinds of statements that
declare names.</p>
<div class="codehilite"><pre><span class="i">program</span>        → <span class="i">declaration</span>* <span class="t">EOF</span> ;

<span class="i">declaration</span>    → <span class="i">varDecl</span>
               | <span class="i">statement</span> ;

<span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">printStmt</span> ;
</pre></div>
<p>Declaration statements go under the new <code>declaration</code> rule. Right now, it&rsquo;s only
variables, but later it will include functions and classes. Any place where a
declaration is allowed also allows non-declaring statements, so the
<code>declaration</code> rule falls through to <code>statement</code>. Obviously, you can declare
stuff at the top level of a script, so <code>program</code> routes to the new rule.</p>
<p>The rule for declaring a variable looks like:</p>
<div class="codehilite"><pre><span class="i">varDecl</span>        → <span class="s">&quot;var&quot;</span> <span class="t">IDENTIFIER</span> ( <span class="s">&quot;=&quot;</span> <span class="i">expression</span> )? <span class="s">&quot;;&quot;</span> ;
</pre></div>
<p>Like most statements, it starts with a leading keyword. In this case, <code>var</code>.
Then an identifier token for the name of the variable being declared, followed
by an optional initializer expression. Finally, we put a bow on it with the
semicolon.</p>
<p>To access a variable, we define a new kind of primary expression.</p>
<div class="codehilite"><pre><span class="i">primary</span>        → <span class="s">&quot;true&quot;</span> | <span class="s">&quot;false&quot;</span> | <span class="s">&quot;nil&quot;</span>
               | <span class="t">NUMBER</span> | <span class="t">STRING</span>
               | <span class="s">&quot;(&quot;</span> <span class="i">expression</span> <span class="s">&quot;)&quot;</span>
               | <span class="t">IDENTIFIER</span> ;
</pre></div>
<p>That <code>IDENTIFIER</code> clause matches a single identifier token, which is understood
to be the name of the variable being accessed.</p>
<p>These new grammar rules get their corresponding syntax trees. Over in the AST
generator, we add a <span name="var-stmt-ast">new statement</span> node for a
variable declaration.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Expression : Expr expression&quot;,
</pre><pre class="insert-before">      <span class="s">&quot;Print      : Expr expression&quot;</span><span class="insert-comma">,</span>
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()<br>
add <em>&ldquo;,&rdquo;</em> to previous line</div>
<pre class="insert">      <span class="s">&quot;Var        : Token name, Expr initializer&quot;</span>
</pre><pre class="insert-after">    ));
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>(), add <em>&ldquo;,&rdquo;</em> to previous line</div>

<aside name="var-stmt-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#variable-statement">Appendix II</a>.</p>
</aside>
<p>It stores the name token so we know what it&rsquo;s declaring, along with the
initializer expression. (If there isn&rsquo;t an initializer, that field is <code>null</code>.)</p>
<p>Then we add an expression node for accessing a variable.</p>
<div class="codehilite"><pre class="insert-before">      &quot;Literal  : Object value&quot;,
</pre><pre class="insert-before">      <span class="s">&quot;Unary    : Token operator, Expr right&quot;</span><span class="insert-comma">,</span>
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()<br>
add <em>&ldquo;,&rdquo;</em> to previous line</div>
<pre class="insert">      <span class="s">&quot;Variable : Token name&quot;</span>
</pre><pre class="insert-after">    ));
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>(), add <em>&ldquo;,&rdquo;</em> to previous line</div>

<p><span name="var-expr-ast">It&rsquo;s</span> simply a wrapper around the token for the
variable name. That&rsquo;s it. As always, don&rsquo;t forget to run the AST generator
script so that you get updated &ldquo;Expr.java&rdquo; and &ldquo;Stmt.java&rdquo; files.</p>
<aside name="var-expr-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#variable-expression">Appendix II</a>.</p>
</aside>
<h3><a href="#parsing-variables" id="parsing-variables"><small>8&#8202;.&#8202;2&#8202;.&#8202;2</small>Parsing variables</a></h3>
<p>Before we parse variable statements, we need to shift around some code to make
room for the new <code>declaration</code> rule in the grammar. The top level of a program
is now a list of declarations, so the entrypoint method to the parser changes.</p>
<div class="codehilite"><pre class="insert-before">  List&lt;Stmt&gt; parse() {
    List&lt;Stmt&gt; statements = new ArrayList&lt;&gt;();
    while (!isAtEnd()) {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>parse</em>()<br>
replace 1 line</div>
<pre class="insert">      <span class="i">statements</span>.<span class="i">add</span>(<span class="i">declaration</span>());
</pre><pre class="insert-after">    }

    return statements;<span name="parse-error-handling"> </span>
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>parse</em>(), replace 1 line</div>

<p>That calls this new method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>expression</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">declaration</span>() {
    <span class="k">try</span> {
      <span class="k">if</span> (<span class="i">match</span>(<span class="i">VAR</span>)) <span class="k">return</span> <span class="i">varDeclaration</span>();

      <span class="k">return</span> <span class="i">statement</span>();
    } <span class="k">catch</span> (<span class="t">ParseError</span> <span class="i">error</span>) {
      <span class="i">synchronize</span>();
      <span class="k">return</span> <span class="k">null</span>;
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>expression</em>()</div>

<p>Hey, do you remember way back in that <a href="parsing-expressions.html">earlier chapter</a> when we put the
infrastructure in place to do error recovery? We are finally ready to hook that
up.</p>
<p>This <code>declaration()</code> method is the method we call repeatedly when parsing a
series of statements in a block or a script, so it&rsquo;s the right place to
synchronize when the parser goes into panic mode. The whole body of this method
is wrapped in a try block to catch the exception thrown when the parser begins
error recovery. This gets it back to trying to parse the beginning of the next
statement or declaration.</p>
<p>The real parsing happens inside the try block. First, it looks to see if we&rsquo;re
at a variable declaration by looking for the leading <code>var</code> keyword. If not, it
falls through to the existing <code>statement()</code> method that parses <code>print</code> and
expression statements.</p>
<p>Remember how <code>statement()</code> tries to parse an expression statement if no other
statement matches? And <code>expression()</code> reports a syntax error if it can&rsquo;t parse
an expression at the current token? That chain of calls ensures we report an
error if a valid declaration or statement isn&rsquo;t parsed.</p>
<p>When the parser matches a <code>var</code> token, it branches to:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>printStatement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Stmt</span> <span class="i">varDeclaration</span>() {
    <span class="t">Token</span> <span class="i">name</span> = <span class="i">consume</span>(<span class="i">IDENTIFIER</span>, <span class="s">&quot;Expect variable name.&quot;</span>);

    <span class="t">Expr</span> <span class="i">initializer</span> = <span class="k">null</span>;
    <span class="k">if</span> (<span class="i">match</span>(<span class="i">EQUAL</span>)) {
      <span class="i">initializer</span> = <span class="i">expression</span>();
    }

    <span class="i">consume</span>(<span class="i">SEMICOLON</span>, <span class="s">&quot;Expect &#39;;&#39; after variable declaration.&quot;</span>);
    <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Var</span>(<span class="i">name</span>, <span class="i">initializer</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>printStatement</em>()</div>

<p>As always, the recursive descent code follows the grammar rule. The parser has
already matched the <code>var</code> token, so next it requires and consumes an identifier
token for the variable name.</p>
<p>Then, if it sees an <code>=</code> token, it knows there is an initializer expression and
parses it. Otherwise, it leaves the initializer <code>null</code>. Finally, it consumes the
required semicolon at the end of the statement. All this gets wrapped in a
Stmt.Var syntax tree node and we&rsquo;re groovy.</p>
<p>Parsing a variable expression is even easier. In <code>primary()</code>, we look for an
identifier token.</p>
<div class="codehilite"><pre class="insert-before">      return new Expr.Literal(previous().literal);
    }
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>primary</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">match</span>(<span class="i">IDENTIFIER</span>)) {
      <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Variable</span>(<span class="i">previous</span>());
    }
</pre><pre class="insert-after">

    if (match(LEFT_PAREN)) {
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>primary</em>()</div>

<p>That gives us a working front end for declaring and using variables. All that&rsquo;s
left is to feed it into the interpreter. Before we get to that, we need to talk
about where variables live in memory.</p>
<h2><a href="#environments" id="environments"><small>8&#8202;.&#8202;3</small>Environments</a></h2>
<p>The bindings that associate variables to values need to be stored somewhere.
Ever since the Lisp folks invented parentheses, this data structure has been
called an <span name="env"><strong>environment</strong></span>.</p><img src="image/statements-and-state/environment.png" alt="An environment containing two bindings." />
<aside name="env">
<p>I like to imagine the environment literally, as a sylvan wonderland where
variables and values frolic.</p>
</aside>
<p>You can think of it like a <span name="map">map</span> where the keys are
variable names and the values are the variable&rsquo;s, uh, values. In fact, that&rsquo;s
how we&rsquo;ll implement it in Java. We could stuff that map and the code to manage
it right into Interpreter, but since it forms a nicely delineated concept, we&rsquo;ll
pull it out into its own class.</p>
<p>Start a new file and add:</p>
<aside name="map">
<p>Java calls them <strong>maps</strong> or <strong>hashmaps</strong>. Other languages call them <strong>hash
tables</strong>, <strong>dictionaries</strong> (Python and C#), <strong>hashes</strong> (Ruby and Perl),
<strong>tables</strong> (Lua), or <strong>associative arrays</strong> (PHP). Way back when, they were
known as <strong>scatter tables</strong>.</p>
</aside>
<div class="codehilite"><div class="source-file"><em>lox/Environment.java</em><br>
create new file</div>
<pre><span class="k">package</span> <span class="i">com.craftinginterpreters.lox</span>;

<span class="k">import</span> <span class="i">java.util.HashMap</span>;
<span class="k">import</span> <span class="i">java.util.Map</span>;

<span class="k">class</span> <span class="t">Environment</span> {
  <span class="k">private</span> <span class="k">final</span> <span class="t">Map</span>&lt;<span class="t">String</span>, <span class="t">Object</span>&gt; <span class="i">values</span> = <span class="k">new</span> <span class="t">HashMap</span>&lt;&gt;();
}
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, create new file</div>

<p>There&rsquo;s a Java Map in there to store the bindings. It uses bare strings for the
keys, not tokens. A token represents a unit of code at a specific place in the
source text, but when it comes to looking up variables, all identifier tokens
with the same name should refer to the same variable (ignoring scope for now).
Using the raw string ensures all of those tokens refer to the same map key.</p>
<p>There are two operations we need to support. First, a variable definition binds
a new name to a value.</p>
<div class="codehilite"><div class="source-file"><em>lox/Environment.java</em><br>
in class <em>Environment</em></div>
<pre>  <span class="t">void</span> <span class="i">define</span>(<span class="t">String</span> <span class="i">name</span>, <span class="t">Object</span> <span class="i">value</span>) {
    <span class="i">values</span>.<span class="i">put</span>(<span class="i">name</span>, <span class="i">value</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, in class <em>Environment</em></div>

<p>Not exactly brain surgery, but we have made one interesting semantic choice.
When we add the key to the map, we don&rsquo;t check to see if it&rsquo;s already present.
That means that this program works:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;before&quot;</span>;
<span class="k">print</span> <span class="i">a</span>; <span class="c">// &quot;before&quot;.</span>
<span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;after&quot;</span>;
<span class="k">print</span> <span class="i">a</span>; <span class="c">// &quot;after&quot;.</span>
</pre></div>
<p>A variable statement doesn&rsquo;t just define a <em>new</em> variable, it can also be used
to <em>re</em>define an existing variable. We could <span name="scheme">choose</span>
to make this an error instead. The user may not intend to redefine an existing
variable. (If they did mean to, they probably would have used assignment, not
<code>var</code>.) Making redefinition an error would help them find that bug.</p>
<p>However, doing so interacts poorly with the REPL. In the middle of a REPL
session, it&rsquo;s nice to not have to mentally track which variables you&rsquo;ve already
defined. We could allow redefinition in the REPL but not in scripts, but then
users would have to learn two sets of rules, and code copied and pasted from one
form to the other might not work.</p>
<aside name="scheme">
<p>My rule about variables and scoping is, &ldquo;When in doubt, do what Scheme does&rdquo;.
The Scheme folks have probably spent more time thinking about variable scope
than we ever will<span class="em">&mdash;</span>one of the main goals of Scheme was to introduce lexical
scoping to the world<span class="em">&mdash;</span>so it&rsquo;s hard to go wrong if you follow in their
footsteps.</p>
<p>Scheme allows redefining variables at the top level.</p>
</aside>
<p>So, to keep the two modes consistent, we&rsquo;ll allow it<span class="em">&mdash;</span>at least for global
variables. Once a variable exists, we need a way to look it up.</p>
<div class="codehilite"><pre class="insert-before">class Environment {
  private final Map&lt;String, Object&gt; values = new HashMap&lt;&gt;();
</pre><div class="source-file"><em>lox/Environment.java</em><br>
in class <em>Environment</em></div>
<pre class="insert">

  <span class="t">Object</span> <span class="i">get</span>(<span class="t">Token</span> <span class="i">name</span>) {
    <span class="k">if</span> (<span class="i">values</span>.<span class="i">containsKey</span>(<span class="i">name</span>.<span class="i">lexeme</span>)) {
      <span class="k">return</span> <span class="i">values</span>.<span class="i">get</span>(<span class="i">name</span>.<span class="i">lexeme</span>);
    }

    <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">name</span>,
        <span class="s">&quot;Undefined variable &#39;&quot;</span> + <span class="i">name</span>.<span class="i">lexeme</span> + <span class="s">&quot;&#39;.&quot;</span>);
  }

</pre><pre class="insert-after">  void define(String name, Object value) {
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, in class <em>Environment</em></div>

<p>This is a little more semantically interesting. If the variable is found, it
simply returns the value bound to it. But what if it&rsquo;s not? Again, we have a
choice:</p>
<ul>
<li>
<p>Make it a syntax error.</p>
</li>
<li>
<p>Make it a runtime error.</p>
</li>
<li>
<p>Allow it and return some default value like <code>nil</code>.</p>
</li>
</ul>
<p>Lox is pretty lax, but the last option is a little <em>too</em> permissive to me.
Making it a syntax error<span class="em">&mdash;</span>a compile-time error<span class="em">&mdash;</span>seems like a smart choice.
Using an undefined variable is a bug, and the sooner you detect the mistake, the
better.</p>
<p>The problem is that <em>using</em> a variable isn&rsquo;t the same as <em>referring</em> to it. You
can refer to a variable in a chunk of code without immediately evaluating it if
that chunk of code is wrapped inside a function. If we make it a static error to
<em>mention</em> a variable before it&rsquo;s been declared, it becomes much harder to define
recursive functions.</p>
<p>We could accommodate single recursion<span class="em">&mdash;</span>a function that calls itself<span class="em">&mdash;</span>by
declaring the function&rsquo;s own name before we examine its body. But that doesn&rsquo;t
help with mutually recursive procedures that call each other. Consider:</p>
<p><span name="contrived"></span></p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">isOdd</span>(<span class="i">n</span>) {
  <span class="k">if</span> (<span class="i">n</span> == <span class="n">0</span>) <span class="k">return</span> <span class="k">false</span>;
  <span class="k">return</span> <span class="i">isEven</span>(<span class="i">n</span> - <span class="n">1</span>);
}

<span class="k">fun</span> <span class="i">isEven</span>(<span class="i">n</span>) {
  <span class="k">if</span> (<span class="i">n</span> == <span class="n">0</span>) <span class="k">return</span> <span class="k">true</span>;
  <span class="k">return</span> <span class="i">isOdd</span>(<span class="i">n</span> - <span class="n">1</span>);
}
</pre></div>
<aside name="contrived">
<p>Granted, this is probably not the most efficient way to tell if a number is even
or odd (not to mention the bad things that happen if you pass a non-integer or
negative number to them). Bear with me.</p>
</aside>
<p>The <code>isEven()</code> function isn&rsquo;t defined by the <span name="declare">time</span> we
are looking at the body of <code>isOdd()</code> where it&rsquo;s called. If we swap the order of
the two functions, then <code>isOdd()</code> isn&rsquo;t defined when we&rsquo;re looking at
<code>isEven()</code>&rsquo;s body.</p>
<aside name="declare">
<p>Some statically typed languages like Java and C# solve this by specifying that
the top level of a program isn&rsquo;t a sequence of imperative statements. Instead, a
program is a set of declarations which all come into being simultaneously. The
implementation declares <em>all</em> of the names before looking at the bodies of <em>any</em>
of the functions.</p>
<p>Older languages like C and Pascal don&rsquo;t work like this. Instead, they force you
to add explicit <em>forward declarations</em> to declare a name before it&rsquo;s fully
defined. That was a concession to the limited computing power at the time. They
wanted to be able to compile a source file in one single pass through the text,
so those compilers couldn&rsquo;t gather up all of the declarations first before
processing function bodies.</p>
</aside>
<p>Since making it a <em>static</em> error makes recursive declarations too difficult,
we&rsquo;ll defer the error to runtime. It&rsquo;s OK to refer to a variable before it&rsquo;s
defined as long as you don&rsquo;t <em>evaluate</em> the reference. That lets the program
for even and odd numbers work, but you&rsquo;d get a runtime error in:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="i">a</span>;
<span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;too late!&quot;</span>;
</pre></div>
<p>As with type errors in the expression evaluation code, we report a runtime error
by throwing an exception. The exception contains the variable&rsquo;s token so we can
tell the user where in their code they messed up.</p>
<h3><a href="#interpreting-global-variables" id="interpreting-global-variables"><small>8&#8202;.&#8202;3&#8202;.&#8202;1</small>Interpreting global variables</a></h3>
<p>The Interpreter class gets an instance of the new Environment class.</p>
<div class="codehilite"><pre class="insert-before">class Interpreter implements Expr.Visitor&lt;Object&gt;,
                             Stmt.Visitor&lt;Void&gt; {
</pre><div class="source-file"><em>lox/Interpreter.java</em><br>
in class <em>Interpreter</em></div>
<pre class="insert">  <span class="k">private</span> <span class="t">Environment</span> <span class="i">environment</span> = <span class="k">new</span> <span class="t">Environment</span>();

</pre><pre class="insert-after">  void interpret(List&lt;Stmt&gt; statements) {
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, in class <em>Interpreter</em></div>

<p>We store it as a field directly in Interpreter so that the variables stay in
memory as long as the interpreter is still running.</p>
<p>We have two new syntax trees, so that&rsquo;s two new visit methods. The first is for
declaration statements.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitPrintStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitVarStmt</span>(<span class="t">Stmt</span>.<span class="t">Var</span> <span class="i">stmt</span>) {
    <span class="t">Object</span> <span class="i">value</span> = <span class="k">null</span>;
    <span class="k">if</span> (<span class="i">stmt</span>.<span class="i">initializer</span> != <span class="k">null</span>) {
      <span class="i">value</span> = <span class="i">evaluate</span>(<span class="i">stmt</span>.<span class="i">initializer</span>);
    }

    <span class="i">environment</span>.<span class="i">define</span>(<span class="i">stmt</span>.<span class="i">name</span>.<span class="i">lexeme</span>, <span class="i">value</span>);
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitPrintStmt</em>()</div>

<p>If the variable has an initializer, we evaluate it. If not, we have another
choice to make. We could have made this a syntax error in the parser by
<em>requiring</em> an initializer. Most languages don&rsquo;t, though, so it feels a little
harsh to do so in Lox.</p>
<p>We could make it a runtime error. We&rsquo;d let you define an uninitialized variable,
but if you accessed it before assigning to it, a runtime error would occur. It&rsquo;s
not a bad idea, but most dynamically typed languages don&rsquo;t do that. Instead,
we&rsquo;ll keep it simple and say that Lox sets a variable to <code>nil</code> if it isn&rsquo;t
explicitly initialized.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span>;
<span class="k">print</span> <span class="i">a</span>; <span class="c">// &quot;nil&quot;.</span>
</pre></div>
<p>Thus, if there isn&rsquo;t an initializer, we set the value to <code>null</code>, which is the
Java representation of Lox&rsquo;s <code>nil</code> value. Then we tell the environment to bind
the variable to that value.</p>
<p>Next, we evaluate a variable expression.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitUnaryExpr</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitVariableExpr</span>(<span class="t">Expr</span>.<span class="t">Variable</span> <span class="i">expr</span>) {
    <span class="k">return</span> <span class="i">environment</span>.<span class="i">get</span>(<span class="i">expr</span>.<span class="i">name</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitUnaryExpr</em>()</div>

<p>This simply forwards to the environment which does the heavy lifting to make
sure the variable is defined. With that, we&rsquo;ve got rudimentary variables
working. Try this out:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
<span class="k">var</span> <span class="i">b</span> = <span class="n">2</span>;
<span class="k">print</span> <span class="i">a</span> + <span class="i">b</span>;
</pre></div>
<p>We can&rsquo;t reuse <em>code</em> yet, but we can start to build up programs that reuse
<em>data</em>.</p>
<h2><a href="#assignment" id="assignment"><small>8&#8202;.&#8202;4</small>Assignment</a></h2>
<p>It&rsquo;s possible to create a language that has variables but does not let you
reassign<span class="em">&mdash;</span>or <strong>mutate</strong><span class="em">&mdash;</span>them. Haskell is one example. SML supports only
mutable references and arrays<span class="em">&mdash;</span>variables cannot be reassigned. Rust steers you
away from mutation by requiring a <code>mut</code> modifier to enable assignment.</p>
<p>Mutating a variable is a side effect and, as the name suggests, some language
folks think side effects are <span name="pure">dirty</span> or inelegant. Code
should be pure math that produces values<span class="em">&mdash;</span>crystalline, unchanging ones<span class="em">&mdash;</span>like
an act of divine creation. Not some grubby automaton that beats blobs of data
into shape, one imperative grunt at a time.</p>
<aside name="pure">
<p>I find it delightful that the same group of people who pride themselves on
dispassionate logic are also the ones who can&rsquo;t resist emotionally loaded terms
for their work: &ldquo;pure&rdquo;, &ldquo;side effect&rdquo;, &ldquo;lazy&rdquo;, &ldquo;persistent&rdquo;, &ldquo;first-class&rdquo;,
&ldquo;higher-order&rdquo;.</p>
</aside>
<p>Lox is not so austere. Lox is an imperative language, and mutation comes with
the territory. Adding support for assignment doesn&rsquo;t require much work. Global
variables already support redefinition, so most of the machinery is there now.
Mainly, we&rsquo;re missing an explicit assignment notation.</p>
<h3><a href="#assignment-syntax" id="assignment-syntax"><small>8&#8202;.&#8202;4&#8202;.&#8202;1</small>Assignment syntax</a></h3>
<p>That little <code>=</code> syntax is more complex than it might seem. Like most C-derived
languages, assignment is an <span name="assign">expression</span> and not a
statement. As in C, it is the lowest precedence expression form. That means the
rule slots between <code>expression</code> and <code>equality</code> (the next lowest precedence
expression).</p>
<aside name="assign">
<p>In some other languages, like Pascal, Python, and Go, assignment is a statement.</p>
</aside>
<div class="codehilite"><pre><span class="i">expression</span>     → <span class="i">assignment</span> ;
<span class="i">assignment</span>     → <span class="t">IDENTIFIER</span> <span class="s">&quot;=&quot;</span> <span class="i">assignment</span>
               | <span class="i">equality</span> ;
</pre></div>
<p>This says an <code>assignment</code> is either an identifier followed by an <code>=</code> and an
expression for the value, or an <code>equality</code> (and thus any other) expression.
Later, <code>assignment</code> will get more complex when we add property setters on
objects, like:</p>
<div class="codehilite"><pre><span class="i">instance</span>.<span class="i">field</span> = <span class="s">&quot;value&quot;</span>;
</pre></div>
<p>The easy part is adding the <span name="assign-ast">new syntax tree node</span>.</p>
<div class="codehilite"><pre class="insert-before">    defineAst(outputDir, &quot;Expr&quot;, Arrays.asList(
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Assign   : Token name, Expr value&quot;</span>,
</pre><pre class="insert-after">      &quot;Binary   : Expr left, Token operator, Expr right&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="assign-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#assign-expression">Appendix II</a>.</p>
</aside>
<p>It has a token for the variable being assigned to, and an expression for the new
value. After you run the AstGenerator to get the new Expr.Assign class, swap out
the body of the parser&rsquo;s existing <code>expression()</code> method to match the updated
rule.</p>
<div class="codehilite"><pre class="insert-before">  private Expr expression() {
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>expression</em>()<br>
replace 1 line</div>
<pre class="insert">    <span class="k">return</span> <span class="i">assignment</span>();
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>expression</em>(), replace 1 line</div>

<p>Here is where it gets tricky. A single token lookahead recursive descent parser
can&rsquo;t see far enough to tell that it&rsquo;s parsing an assignment until <em>after</em> it
has gone through the left-hand side and stumbled onto the <code>=</code>. You might wonder
why it even needs to. After all, we don&rsquo;t know we&rsquo;re parsing a <code>+</code> expression
until after we&rsquo;ve finished parsing the left operand.</p>
<p>The difference is that the left-hand side of an assignment isn&rsquo;t an expression
that evaluates to a value. It&rsquo;s a sort of pseudo-expression that evaluates to a
&ldquo;thing&rdquo; you can assign to. Consider:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;before&quot;</span>;
<span class="i">a</span> = <span class="s">&quot;value&quot;</span>;
</pre></div>
<p>On the second line, we don&rsquo;t <em>evaluate</em> <code>a</code> (which would return the string
&ldquo;before&rdquo;). We figure out what variable <code>a</code> refers to so we know where to store
the right-hand side expression&rsquo;s value. The <a href="https://en.wikipedia.org/wiki/Value_(computer_science)#lrvalue">classic terms</a> for these
two <span name="l-value">constructs</span> are <strong>l-value</strong> and <strong>r-value</strong>. All
of the expressions that we&rsquo;ve seen so far that produce values are r-values. An
l-value &ldquo;evaluates&rdquo; to a storage location that you can assign into.</p>
<aside name="l-value">
<p>In fact, the names come from assignment expressions: <em>l</em>-values appear on the
<em>left</em> side of the <code>=</code> in an assignment, and <em>r</em>-values on the <em>right</em>.</p>
</aside>
<p>We want the syntax tree to reflect that an l-value isn&rsquo;t evaluated like a normal
expression. That&rsquo;s why the Expr.Assign node has a <em>Token</em> for the left-hand
side, not an Expr. The problem is that the parser doesn&rsquo;t know it&rsquo;s parsing an
l-value until it hits the <code>=</code>. In a complex l-value, that may occur <span
name="many">many</span> tokens later.</p>
<div class="codehilite"><pre><span class="i">makeList</span>().<span class="i">head</span>.<span class="i">next</span> = <span class="i">node</span>;
</pre></div>
<aside name="many">
<p>Since the receiver of a field assignment can be any expression, and expressions
can be as long as you want to make them, it may take an <em>unbounded</em> number of
tokens of lookahead to find the <code>=</code>.</p>
</aside>
<p>We have only a single token of lookahead, so what do we do? We use a little
trick, and it looks like this:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>expressionStatement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">Expr</span> <span class="i">assignment</span>() {
    <span class="t">Expr</span> <span class="i">expr</span> = <span class="i">equality</span>();

    <span class="k">if</span> (<span class="i">match</span>(<span class="i">EQUAL</span>)) {
      <span class="t">Token</span> <span class="i">equals</span> = <span class="i">previous</span>();
      <span class="t">Expr</span> <span class="i">value</span> = <span class="i">assignment</span>();

      <span class="k">if</span> (<span class="i">expr</span> <span class="k">instanceof</span> <span class="t">Expr</span>.<span class="t">Variable</span>) {
        <span class="t">Token</span> <span class="i">name</span> = ((<span class="t">Expr</span>.<span class="t">Variable</span>)<span class="i">expr</span>).<span class="i">name</span>;
        <span class="k">return</span> <span class="k">new</span> <span class="t">Expr</span>.<span class="t">Assign</span>(<span class="i">name</span>, <span class="i">value</span>);
      }

      <span class="i">error</span>(<span class="i">equals</span>, <span class="s">&quot;Invalid assignment target.&quot;</span>);<span name="no-throw"> </span>
    }

    <span class="k">return</span> <span class="i">expr</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>expressionStatement</em>()</div>

<p>Most of the code for parsing an assignment expression looks similar to that of
the other binary operators like <code>+</code>. We parse the left-hand side, which can be
any expression of higher precedence. If we find an <code>=</code>, we parse the right-hand
side and then wrap it all up in an assignment expression tree node.</p>
<aside name="no-throw">
<p>We <em>report</em> an error if the left-hand side isn&rsquo;t a valid assignment target, but
we don&rsquo;t <em>throw</em> it because the parser isn&rsquo;t in a confused state where we need
to go into panic mode and synchronize.</p>
</aside>
<p>One slight difference from binary operators is that we don&rsquo;t loop to build up a
sequence of the same operator. Since assignment is right-associative, we instead
recursively call <code>assignment()</code> to parse the right-hand side.</p>
<p>The trick is that right before we create the assignment expression node, we look
at the left-hand side expression and figure out what kind of assignment target
it is. We convert the r-value expression node into an l-value representation.</p>
<p>This conversion works because it turns out that every valid assignment target
happens to also be <span name="converse">valid syntax</span> as a normal
expression. Consider a complex field assignment like:</p>
<aside name="converse">
<p>You can still use this trick even if there are assignment targets that are not
valid expressions. Define a <strong>cover grammar</strong>, a looser grammar that accepts
all of the valid expression <em>and</em> assignment target syntaxes. When you hit
an <code>=</code>, report an error if the left-hand side isn&rsquo;t within the valid assignment
target grammar. Conversely, if you <em>don&rsquo;t</em> hit an <code>=</code>, report an error if the
left-hand side isn&rsquo;t a valid <em>expression</em>.</p>
</aside>
<div class="codehilite"><pre><span class="i">newPoint</span>(<span class="i">x</span> + <span class="n">2</span>, <span class="n">0</span>).<span class="i">y</span> = <span class="n">3</span>;
</pre></div>
<p>The left-hand side of that assignment could also work as a valid expression.</p>
<div class="codehilite"><pre><span class="i">newPoint</span>(<span class="i">x</span> + <span class="n">2</span>, <span class="n">0</span>).<span class="i">y</span>;
</pre></div>
<p>The first example sets the field, the second gets it.</p>
<p>This means we can parse the left-hand side <em>as if it were</em> an expression and
then after the fact produce a syntax tree that turns it into an assignment
target. If the left-hand side expression isn&rsquo;t a <span name="paren">valid</span>
assignment target, we fail with a syntax error. That ensures we report an error
on code like this:</p>
<div class="codehilite"><pre><span class="i">a</span> + <span class="i">b</span> = <span class="i">c</span>;
</pre></div>
<aside name="paren">
<p>Way back in the parsing chapter, I said we represent parenthesized expressions
in the syntax tree because we&rsquo;ll need them later. This is why. We need to be
able to distinguish these cases:</p>
<div class="codehilite"><pre><span class="i">a</span> = <span class="n">3</span>;   <span class="c">// OK.</span>
(<span class="i">a</span>) = <span class="n">3</span>; <span class="c">// Error.</span>
</pre></div>
</aside>
<p>Right now, the only valid target is a simple variable expression, but we&rsquo;ll add
fields later. The end result of this trick is an assignment expression tree node
that knows what it is assigning to and has an expression subtree for the value
being assigned. All with only a single token of lookahead and no backtracking.</p>
<h3><a href="#assignment-semantics" id="assignment-semantics"><small>8&#8202;.&#8202;4&#8202;.&#8202;2</small>Assignment semantics</a></h3>
<p>We have a new syntax tree node, so our interpreter gets a new visit method.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>visitVarStmt</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Object</span> <span class="i">visitAssignExpr</span>(<span class="t">Expr</span>.<span class="t">Assign</span> <span class="i">expr</span>) {
    <span class="t">Object</span> <span class="i">value</span> = <span class="i">evaluate</span>(<span class="i">expr</span>.<span class="i">value</span>);
    <span class="i">environment</span>.<span class="i">assign</span>(<span class="i">expr</span>.<span class="i">name</span>, <span class="i">value</span>);
    <span class="k">return</span> <span class="i">value</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>visitVarStmt</em>()</div>

<p>For obvious reasons, it&rsquo;s similar to variable declaration. It evaluates the
right-hand side to get the value, then stores it in the named variable. Instead
of using <code>define()</code> on Environment, it calls this new method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Environment.java</em><br>
add after <em>get</em>()</div>
<pre>  <span class="t">void</span> <span class="i">assign</span>(<span class="t">Token</span> <span class="i">name</span>, <span class="t">Object</span> <span class="i">value</span>) {
    <span class="k">if</span> (<span class="i">values</span>.<span class="i">containsKey</span>(<span class="i">name</span>.<span class="i">lexeme</span>)) {
      <span class="i">values</span>.<span class="i">put</span>(<span class="i">name</span>.<span class="i">lexeme</span>, <span class="i">value</span>);
      <span class="k">return</span>;
    }

    <span class="k">throw</span> <span class="k">new</span> <span class="t">RuntimeError</span>(<span class="i">name</span>,
        <span class="s">&quot;Undefined variable &#39;&quot;</span> + <span class="i">name</span>.<span class="i">lexeme</span> + <span class="s">&quot;&#39;.&quot;</span>);
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, add after <em>get</em>()</div>

<p>The key difference between assignment and definition is that assignment is not
<span name="new">allowed</span> to create a <em>new</em> variable. In terms of our
implementation, that means it&rsquo;s a runtime error if the key doesn&rsquo;t already exist
in the environment&rsquo;s variable map.</p>
<aside name="new">
<p>Unlike Python and Ruby, Lox doesn&rsquo;t do <a href="#design-note">implicit variable declaration</a>.</p>
</aside>
<p>The last thing the <code>visit()</code> method does is return the assigned value. That&rsquo;s
because assignment is an expression that can be nested inside other expressions,
like so:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
<span class="k">print</span> <span class="i">a</span> = <span class="n">2</span>; <span class="c">// &quot;2&quot;.</span>
</pre></div>
<p>Our interpreter can now create, read, and modify variables. It&rsquo;s about as
sophisticated as early <span name="basic">BASICs</span>. Global variables are
simple, but writing a large program when any two chunks of code can accidentally
step on each other&rsquo;s state is no fun. We want <em>local</em> variables, which means
it&rsquo;s time for <em>scope</em>.</p>
<aside name="basic">
<p>Maybe a little better than that. Unlike some old BASICs, Lox can handle variable
names longer than two characters.</p>
</aside>
<h2><a href="#scope" id="scope"><small>8&#8202;.&#8202;5</small>Scope</a></h2>
<p>A <strong>scope</strong> defines a region where a name maps to a certain entity. Multiple
scopes enable the same name to refer to different things in different contexts.
In my house, &ldquo;Bob&rdquo; usually refers to me. But maybe in your town you know a
different Bob. Same name, but different dudes based on where you say it.</p>
<p><span name="lexical"><strong>Lexical scope</strong></span> (or the less commonly heard
<strong>static scope</strong>) is a specific style of scoping where the text of the program
itself shows where a scope begins and ends. In Lox, as in most modern languages,
variables are lexically scoped. When you see an expression that uses some
variable, you can figure out which variable declaration it refers to just by
statically reading the code.</p>
<aside name="lexical">
<p>&ldquo;Lexical&rdquo; comes from the Greek &ldquo;lexikos&rdquo; which means &ldquo;related to words&rdquo;. When we
use it in programming languages, it usually means a thing you can figure out
from source code itself without having to execute anything.</p>
<p>Lexical scope came onto the scene with ALGOL. Earlier languages were often
dynamically scoped. Computer scientists back then believed dynamic scope was
faster to execute. Today, thanks to early Scheme hackers, we know that isn&rsquo;t
true. If anything, it&rsquo;s the opposite.</p>
<p>Dynamic scope for variables lives on in some corners. Emacs Lisp defaults to
dynamic scope for variables. The <a href="http://clojuredocs.org/clojure.core/binding"><code>binding</code></a> macro in Clojure provides
it. The widely disliked <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/with"><code>with</code> statement</a> in JavaScript turns properties
on an object into dynamically scoped variables.</p>
</aside>
<p>For example:</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;first&quot;</span>;
  <span class="k">print</span> <span class="i">a</span>; <span class="c">// &quot;first&quot;.</span>
}

{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;second&quot;</span>;
  <span class="k">print</span> <span class="i">a</span>; <span class="c">// &quot;second&quot;.</span>
}
</pre></div>
<p>Here, we have two blocks with a variable <code>a</code> declared in each of them. You and
I can tell just from looking at the code that the use of <code>a</code> in the first
<code>print</code> statement refers to the first <code>a</code>, and the second one refers to the
second.</p><img src="image/statements-and-state/blocks.png" alt="An environment for each 'a'." />
<p>This is in contrast to <strong>dynamic scope</strong> where you don&rsquo;t know what a name refers
to until you execute the code. Lox doesn&rsquo;t have dynamically scoped <em>variables</em>,
but methods and fields on objects are dynamically scoped.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Saxophone</span> {
  <span class="i">play</span>() {
    <span class="k">print</span> <span class="s">&quot;Careless Whisper&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">GolfClub</span> {
  <span class="i">play</span>() {
    <span class="k">print</span> <span class="s">&quot;Fore!&quot;</span>;
  }
}

<span class="k">fun</span> <span class="i">playIt</span>(<span class="i">thing</span>) {
  <span class="i">thing</span>.<span class="i">play</span>();
}
</pre></div>
<p>When <code>playIt()</code> calls <code>thing.play()</code>, we don&rsquo;t know if we&rsquo;re about to hear
&ldquo;Careless Whisper&rdquo; or &ldquo;Fore!&rdquo; It depends on whether you pass a Saxophone or a
GolfClub to the function, and we don&rsquo;t know that until runtime.</p>
<p>Scope and environments are close cousins. The former is the theoretical concept,
and the latter is the machinery that implements it. As our interpreter works its
way through code, syntax tree nodes that affect scope will change the
environment. In a C-ish syntax like Lox&rsquo;s, scope is controlled by curly-braced
blocks. (That&rsquo;s why we call it <strong>block scope</strong>.)</p>
<div class="codehilite"><pre>{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;in block&quot;</span>;
}
<span class="k">print</span> <span class="i">a</span>; <span class="c">// Error! No more &quot;a&quot;.</span>
</pre></div>
<p>The beginning of a block introduces a new local scope, and that scope ends when
execution passes the closing <code>}</code>. Any variables declared inside the block
disappear.</p>
<h3><a href="#nesting-and-shadowing" id="nesting-and-shadowing"><small>8&#8202;.&#8202;5&#8202;.&#8202;1</small>Nesting and shadowing</a></h3>
<p>A first cut at implementing block scope might work like this:</p>
<ol>
<li>
<p>As we visit each statement inside the block, keep track of any variables
declared.</p>
</li>
<li>
<p>After the last statement is executed, tell the environment to delete all of
those variables.</p>
</li>
</ol>
<p>That would work for the previous example. But remember, one motivation for
local scope is encapsulation<span class="em">&mdash;</span>a block of code in one corner of the program
shouldn&rsquo;t interfere with some other block. Check this out:</p>
<div class="codehilite"><pre><span class="c">// How loud?</span>
<span class="k">var</span> <span class="i">volume</span> = <span class="n">11</span>;

<span class="c">// Silence.</span>
<span class="i">volume</span> = <span class="n">0</span>;

<span class="c">// Calculate size of 3x4x5 cuboid.</span>
{
  <span class="k">var</span> <span class="i">volume</span> = <span class="n">3</span> * <span class="n">4</span> * <span class="n">5</span>;
  <span class="k">print</span> <span class="i">volume</span>;
}
</pre></div>
<p>Look at the block where we calculate the volume of the cuboid using a local
declaration of <code>volume</code>. After the block exits, the interpreter will delete the
<em>global</em> <code>volume</code> variable. That ain&rsquo;t right. When we exit the block, we should
remove any variables declared inside the block, but if there is a variable with
the same name declared outside of the block, <em>that&rsquo;s a different variable</em>. It
shouldn&rsquo;t get touched.</p>
<p>When a local variable has the same name as a variable in an enclosing scope, it
<strong>shadows</strong> the outer one. Code inside the block can&rsquo;t see it any more<span class="em">&mdash;</span>it is
hidden in the &ldquo;shadow&rdquo; cast by the inner one<span class="em">&mdash;</span>but it&rsquo;s still there.</p>
<p>When we enter a new block scope, we need to preserve variables defined in outer
scopes so they are still around when we exit the inner block. We do that by
defining a fresh environment for each block containing only the variables
defined in that scope. When we exit the block, we discard its environment and
restore the previous one.</p>
<p>We also need to handle enclosing variables that are <em>not</em> shadowed.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">global</span> = <span class="s">&quot;outside&quot;</span>;
{
  <span class="k">var</span> <span class="i">local</span> = <span class="s">&quot;inside&quot;</span>;
  <span class="k">print</span> <span class="i">global</span> + <span class="i">local</span>;
}
</pre></div>
<p>Here, <code>global</code> lives in the outer global environment and <code>local</code> is defined
inside the block&rsquo;s environment. In that <code>print</code> statement, both of those
variables are in scope. In order to find them, the interpreter must search not
only the current innermost environment, but also any enclosing ones.</p>
<p>We implement this by <span name="cactus">chaining</span> the environments
together. Each environment has a reference to the environment of the immediately
enclosing scope. When we look up a variable, we walk that chain from innermost
out until we find the variable. Starting at the inner scope is how we make local
variables shadow outer ones.</p><img src="image/statements-and-state/chaining.png" alt="Environments for each scope, linked together." />
<aside name="cactus">
<p>While the interpreter is running, the environments form a linear list of
objects, but consider the full set of environments created during the entire
execution. An outer scope may have multiple blocks nested within it, and each
will point to the outer one, giving a tree-like structure, though only one path
through the tree exists at a time.</p>
<p>The boring name for this is a <a href="https://en.wikipedia.org/wiki/Parent_pointer_tree"><strong>parent-pointer tree</strong></a>, but I
much prefer the evocative <strong>cactus stack</strong>.</p><img class="above" src="image/statements-and-state/cactus.png" alt="Each branch points to its parent. The root is global scope." />
</aside>
<p>Before we add block syntax to the grammar, we&rsquo;ll beef up our Environment class
with support for this nesting. First, we give each environment a reference to
its enclosing one.</p>
<div class="codehilite"><pre class="insert-before">class Environment {
</pre><div class="source-file"><em>lox/Environment.java</em><br>
in class <em>Environment</em></div>
<pre class="insert">  <span class="k">final</span> <span class="t">Environment</span> <span class="i">enclosing</span>;
</pre><pre class="insert-after">  private final Map&lt;String, Object&gt; values = new HashMap&lt;&gt;();
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, in class <em>Environment</em></div>

<p>This field needs to be initialized, so we add a couple of constructors.</p>
<div class="codehilite"><div class="source-file"><em>lox/Environment.java</em><br>
in class <em>Environment</em></div>
<pre>  <span class="t">Environment</span>() {
    <span class="i">enclosing</span> = <span class="k">null</span>;
  }

  <span class="t">Environment</span>(<span class="t">Environment</span> <span class="i">enclosing</span>) {
    <span class="k">this</span>.<span class="i">enclosing</span> = <span class="i">enclosing</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, in class <em>Environment</em></div>

<p>The no-argument constructor is for the global scope&rsquo;s environment, which ends
the chain. The other constructor creates a new local scope nested inside the
given outer one.</p>
<p>We don&rsquo;t have to touch the <code>define()</code> method<span class="em">&mdash;</span>a new variable is always
declared in the current innermost scope. But variable lookup and assignment work
with existing variables and they need to walk the chain to find them. First,
lookup:</p>
<div class="codehilite"><pre class="insert-before">      return values.get(name.lexeme);
    }
</pre><div class="source-file"><em>lox/Environment.java</em><br>
in <em>get</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">enclosing</span> != <span class="k">null</span>) <span class="k">return</span> <span class="i">enclosing</span>.<span class="i">get</span>(<span class="i">name</span>);
</pre><pre class="insert-after">

    throw new RuntimeError(name,
        &quot;Undefined variable '&quot; + name.lexeme + &quot;'.&quot;);
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, in <em>get</em>()</div>

<p>If the variable isn&rsquo;t found in this environment, we simply try the enclosing
one. That in turn does the same thing <span name="recurse">recursively</span>,
so this will ultimately walk the entire chain. If we reach an environment with
no enclosing one and still don&rsquo;t find the variable, then we give up and report
an error as before.</p>
<p>Assignment works the same way.</p>
<aside name="recurse">
<p>It&rsquo;s likely faster to iteratively walk the chain, but I think the recursive
solution is prettier. We&rsquo;ll do something <em>much</em> faster in clox.</p>
</aside>
<div class="codehilite"><pre class="insert-before">      values.put(name.lexeme, value);
      return;
    }

</pre><div class="source-file"><em>lox/Environment.java</em><br>
in <em>assign</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">enclosing</span> != <span class="k">null</span>) {
      <span class="i">enclosing</span>.<span class="i">assign</span>(<span class="i">name</span>, <span class="i">value</span>);
      <span class="k">return</span>;
    }

</pre><pre class="insert-after">    throw new RuntimeError(name,
</pre></div>
<div class="source-file-narrow"><em>lox/Environment.java</em>, in <em>assign</em>()</div>

<p>Again, if the variable isn&rsquo;t in this environment, it checks the outer one,
recursively.</p>
<h3><a href="#block-syntax-and-semantics" id="block-syntax-and-semantics"><small>8&#8202;.&#8202;5&#8202;.&#8202;2</small>Block syntax and semantics</a></h3>
<p>Now that Environments nest, we&rsquo;re ready to add blocks to the language. Behold
the grammar:</p>
<div class="codehilite"><pre><span class="i">statement</span>      → <span class="i">exprStmt</span>
               | <span class="i">printStmt</span>
               | <span class="i">block</span> ;

<span class="i">block</span>          → <span class="s">&quot;{&quot;</span> <span class="i">declaration</span>* <span class="s">&quot;}&quot;</span> ;
</pre></div>
<p>A block is a (possibly empty) series of statements or declarations surrounded by
curly braces. A block is itself a statement and can appear anywhere a statement
is allowed. The <span name="block-ast">syntax tree</span> node looks like this:</p>
<div class="codehilite"><pre class="insert-before">    defineAst(outputDir, &quot;Stmt&quot;, Arrays.asList(
</pre><div class="source-file"><em>tool/GenerateAst.java</em><br>
in <em>main</em>()</div>
<pre class="insert">      <span class="s">&quot;Block      : List&lt;Stmt&gt; statements&quot;</span>,
</pre><pre class="insert-after">      &quot;Expression : Expr expression&quot;,
</pre></div>
<div class="source-file-narrow"><em>tool/GenerateAst.java</em>, in <em>main</em>()</div>

<aside name="block-ast">
<p>The generated code for the new node is in <a href="appendix-ii.html#block-statement">Appendix II</a>.</p>
</aside>
<p><span name="generate">It</span> contains the list of statements that are inside
the block. Parsing is straightforward. Like other statements, we detect the
beginning of a block by its leading token<span class="em">&mdash;</span>in this case the <code>{</code>. In the
<code>statement()</code> method, we add:</p>
<aside name="generate">
<p>As always, don&rsquo;t forget to run &ldquo;GenerateAst.java&rdquo;.</p>
</aside>
<div class="codehilite"><pre class="insert-before">    if (match(PRINT)) return printStatement();
</pre><div class="source-file"><em>lox/Parser.java</em><br>
in <em>statement</em>()</div>
<pre class="insert">    <span class="k">if</span> (<span class="i">match</span>(<span class="i">LEFT_BRACE</span>)) <span class="k">return</span> <span class="k">new</span> <span class="t">Stmt</span>.<span class="t">Block</span>(<span class="i">block</span>());
</pre><pre class="insert-after">

    return expressionStatement();
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, in <em>statement</em>()</div>

<p>All the real work happens here:</p>
<div class="codehilite"><div class="source-file"><em>lox/Parser.java</em><br>
add after <em>expressionStatement</em>()</div>
<pre>  <span class="k">private</span> <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">block</span>() {
    <span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span> = <span class="k">new</span> <span class="t">ArrayList</span>&lt;&gt;();

    <span class="k">while</span> (!<span class="i">check</span>(<span class="i">RIGHT_BRACE</span>) &amp;&amp; !<span class="i">isAtEnd</span>()) {
      <span class="i">statements</span>.<span class="i">add</span>(<span class="i">declaration</span>());
    }

    <span class="i">consume</span>(<span class="i">RIGHT_BRACE</span>, <span class="s">&quot;Expect &#39;}&#39; after block.&quot;</span>);
    <span class="k">return</span> <span class="i">statements</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Parser.java</em>, add after <em>expressionStatement</em>()</div>

<p>We <span name="list">create</span> an empty list and then parse statements and
add them to the list until we reach the end of the block, marked by the closing
<code>}</code>. Note that the loop also has an explicit check for <code>isAtEnd()</code>. We have to
be careful to avoid infinite loops, even when parsing invalid code. If the user
forgets a closing <code>}</code>, the parser needs to not get stuck.</p>
<aside name="list">
<p>Having <code>block()</code> return the raw list of statements and leaving it to
<code>statement()</code> to wrap the list in a Stmt.Block looks a little odd. I did it that
way because we&rsquo;ll reuse <code>block()</code> later for parsing function bodies and we don&rsquo;t
want that body wrapped in a Stmt.Block.</p>
</aside>
<p>That&rsquo;s it for syntax. For semantics, we add another visit method to Interpreter.</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>execute</em>()</div>
<pre>  <span class="a">@Override</span>
  <span class="k">public</span> <span class="t">Void</span> <span class="i">visitBlockStmt</span>(<span class="t">Stmt</span>.<span class="t">Block</span> <span class="i">stmt</span>) {
    <span class="i">executeBlock</span>(<span class="i">stmt</span>.<span class="i">statements</span>, <span class="k">new</span> <span class="t">Environment</span>(<span class="i">environment</span>));
    <span class="k">return</span> <span class="k">null</span>;
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>execute</em>()</div>

<p>To execute a block, we create a new environment for the block&rsquo;s scope and pass
it off to this other method:</p>
<div class="codehilite"><div class="source-file"><em>lox/Interpreter.java</em><br>
add after <em>execute</em>()</div>
<pre>  <span class="t">void</span> <span class="i">executeBlock</span>(<span class="t">List</span>&lt;<span class="t">Stmt</span>&gt; <span class="i">statements</span>,
                    <span class="t">Environment</span> <span class="i">environment</span>) {
    <span class="t">Environment</span> <span class="i">previous</span> = <span class="k">this</span>.<span class="i">environment</span>;
    <span class="k">try</span> {
      <span class="k">this</span>.<span class="i">environment</span> = <span class="i">environment</span>;

      <span class="k">for</span> (<span class="t">Stmt</span> <span class="i">statement</span> : <span class="i">statements</span>) {
        <span class="i">execute</span>(<span class="i">statement</span>);
      }
    } <span class="k">finally</span> {
      <span class="k">this</span>.<span class="i">environment</span> = <span class="i">previous</span>;
    }
  }
</pre></div>
<div class="source-file-narrow"><em>lox/Interpreter.java</em>, add after <em>execute</em>()</div>

<p>This new method executes a list of statements in the context of a given <span
name="param">environment</span>. Up until now, the <code>environment</code> field in
Interpreter always pointed to the same environment<span class="em">&mdash;</span>the global one. Now, that
field represents the <em>current</em> environment. That&rsquo;s the environment that
corresponds to the innermost scope containing the code to be executed.</p>
<p>To execute code within a given scope, this method updates the interpreter&rsquo;s
<code>environment</code> field, visits all of the statements, and then restores the
previous value. As is always good practice in Java, it restores the previous
environment using a finally clause. That way it gets restored even if an
exception is thrown.</p>
<aside name="param">
<p>Manually changing and restoring a mutable <code>environment</code> field feels inelegant.
Another classic approach is to explicitly pass the environment as a parameter to
each visit method. To &ldquo;change&rdquo; the environment, you pass a different one as you
recurse down the tree. You don&rsquo;t have to restore the old one, since the new one
lives on the Java stack and is implicitly discarded when the interpreter returns
from the block&rsquo;s visit method.</p>
<p>I considered that for jlox, but it&rsquo;s kind of tedious and verbose adding an
environment parameter to every single visit method. To keep the book a little
simpler, I went with the mutable field.</p>
</aside>
<p>Surprisingly, that&rsquo;s all we need to do in order to fully support local
variables, nesting, and shadowing. Go ahead and try this out:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;global a&quot;</span>;
<span class="k">var</span> <span class="i">b</span> = <span class="s">&quot;global b&quot;</span>;
<span class="k">var</span> <span class="i">c</span> = <span class="s">&quot;global c&quot;</span>;
{
  <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;outer a&quot;</span>;
  <span class="k">var</span> <span class="i">b</span> = <span class="s">&quot;outer b&quot;</span>;
  {
    <span class="k">var</span> <span class="i">a</span> = <span class="s">&quot;inner a&quot;</span>;
    <span class="k">print</span> <span class="i">a</span>;
    <span class="k">print</span> <span class="i">b</span>;
    <span class="k">print</span> <span class="i">c</span>;
  }
  <span class="k">print</span> <span class="i">a</span>;
  <span class="k">print</span> <span class="i">b</span>;
  <span class="k">print</span> <span class="i">c</span>;
}
<span class="k">print</span> <span class="i">a</span>;
<span class="k">print</span> <span class="i">b</span>;
<span class="k">print</span> <span class="i">c</span>;
</pre></div>
<p>Our little interpreter can remember things now. We are inching closer to
something resembling a full-featured programming language.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>The REPL no longer supports entering a single expression and automatically
printing its result value. That&rsquo;s a drag. Add support to the REPL to let
users type in both statements and expressions. If they enter a statement,
execute it. If they enter an expression, evaluate it and display the result
value.</p>
</li>
<li>
<p>Maybe you want Lox to be a little more explicit about variable
initialization. Instead of implicitly initializing variables to <code>nil</code>, make
it a runtime error to access a variable that has not been initialized or
assigned to, as in:</p>
<div class="codehilite"><pre><span class="c">// No initializers.</span>
<span class="k">var</span> <span class="i">a</span>;
<span class="k">var</span> <span class="i">b</span>;

<span class="i">a</span> = <span class="s">&quot;assigned&quot;</span>;
<span class="k">print</span> <span class="i">a</span>; <span class="c">// OK, was assigned first.</span>

<span class="k">print</span> <span class="i">b</span>; <span class="c">// Error!</span>
</pre></div>
</li>
<li>
<p>What does the following program do?</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
{
  <span class="k">var</span> <span class="i">a</span> = <span class="i">a</span> + <span class="n">2</span>;
  <span class="k">print</span> <span class="i">a</span>;
}
</pre></div>
<p>What did you <em>expect</em> it to do? Is it what you think it should do? What
does analogous code in other languages you are familiar with do? What do
you think users will expect this to do?</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Implicit Variable Declaration</a></h2>
<p>Lox has distinct syntax for declaring a new variable and assigning to an
existing one. Some languages collapse those to only assignment syntax. Assigning
to a non-existent variable automatically brings it into being. This is called
<strong>implicit variable declaration</strong> and exists in Python, Ruby, and CoffeeScript,
among others. JavaScript has an explicit syntax to declare variables, but can
also create new variables on assignment. Visual Basic has <a href="https://msdn.microsoft.com/en-us/library/xe53dz5w(v=vs.100).aspx">an option to enable
or disable implicit variables</a>.</p>
<p>When the same syntax can assign or create a variable, each language must decide
what happens when it isn&rsquo;t clear about which behavior the user intends. In
particular, each language must choose how implicit declaration interacts with
shadowing, and which scope an implicitly declared variable goes into.</p>
<ul>
<li>
<p>In Python, assignment always creates a variable in the current function&rsquo;s
scope, even if there is a variable with the same name declared outside of
the function.</p>
</li>
<li>
<p>Ruby avoids some ambiguity by having different naming rules for local and
global variables. However, blocks in Ruby (which are more like closures than
like &ldquo;blocks&rdquo; in C) have their own scope, so it still has the problem.
Assignment in Ruby assigns to an existing variable outside of the current
block if there is one with the same name. Otherwise, it creates a new
variable in the current block&rsquo;s scope.</p>
</li>
<li>
<p>CoffeeScript, which takes after Ruby in many ways, is similar. It explicitly
disallows shadowing by saying that assignment always assigns to a variable
in an outer scope if there is one, all the way up to the outermost global
scope. Otherwise, it creates the variable in the current function scope.</p>
</li>
<li>
<p>In JavaScript, assignment modifies an existing variable in any enclosing
scope, if found. If not, it implicitly creates a new variable in the
<em>global</em> scope.</p>
</li>
</ul>
<p>The main advantage to implicit declaration is simplicity. There&rsquo;s less syntax
and no &ldquo;declaration&rdquo; concept to learn. Users can just start assigning stuff and
the language figures it out.</p>
<p>Older, statically typed languages like C benefit from explicit declaration
because they give the user a place to tell the compiler what type each variable
has and how much storage to allocate for it. In a dynamically typed,
garbage-collected language, that isn&rsquo;t really necessary, so you can get away
with making declarations implicit. It feels a little more &ldquo;scripty&rdquo;, more &ldquo;you
know what I mean&rdquo;.</p>
<p>But is that a good idea? Implicit declaration has some problems.</p>
<ul>
<li>
<p>A user may intend to assign to an existing variable, but may have misspelled
it. The interpreter doesn&rsquo;t know that, so it goes ahead and silently creates
some new variable and the variable the user wanted to assign to still has
its old value. This is particularly heinous in JavaScript where a typo will
create a <em>global</em> variable, which may in turn interfere with other code.</p>
</li>
<li>
<p>JS, Ruby, and CoffeeScript use the presence of an existing variable with the
same name<span class="em">&mdash;</span>even in an outer scope<span class="em">&mdash;</span>to determine whether or not an
assignment creates a new variable or assigns to an existing one. That means
adding a new variable in a surrounding scope can change the meaning of
existing code. What was once a local variable may silently turn into an
assignment to that new outer variable.</p>
</li>
<li>
<p>In Python, you may <em>want</em> to assign to some variable outside of the current
function instead of creating a new variable in the current one, but you
can&rsquo;t.</p>
</li>
</ul>
<p>Over time, the languages I know with implicit variable declaration ended up
adding more features and complexity to deal with these problems.</p>
<ul>
<li>
<p>Implicit declaration of global variables in JavaScript is universally
considered a mistake today. &ldquo;Strict mode&rdquo; disables it and makes it a compile
error.</p>
</li>
<li>
<p>Python added a <code>global</code> statement to let you explicitly assign to a global
variable from within a function. Later, as functional programming and nested
functions became more popular, they added a similar <code>nonlocal</code> statement to
assign to variables in enclosing functions.</p>
</li>
<li>
<p>Ruby extended its block syntax to allow declaring certain variables to be
explicitly local to the block even if the same name exists in an outer
scope.</p>
</li>
</ul>
<p>Given those, I think the simplicity argument is mostly lost. There is an
argument that implicit declaration is the right <em>default</em> but I personally find
that less compelling.</p>
<p>My opinion is that implicit declaration made sense in years past when most
scripting languages were heavily imperative and code was pretty flat. As
programmers have gotten more comfortable with deep nesting, functional
programming, and closures, it&rsquo;s become much more common to want access to
variables in outer scopes. That makes it more likely that users will run into
the tricky cases where it&rsquo;s not clear whether they intend their assignment to
create a new variable or reuse a surrounding one.</p>
<p>So I prefer explicitly declaring variables, which is why Lox requires it.</p>
</div>

<footer>
<a href="control-flow.html" class="next">
  Next Chapter: &ldquo;Control Flow&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/strings.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Strings &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Strings<small>19</small></a></h3>

<ul>
    <li><a href="#values-and-objects"><small>19.1</small> Values and Objects</a></li>
    <li><a href="#struct-inheritance"><small>19.2</small> Struct Inheritance</a></li>
    <li><a href="#strings"><small>19.3</small> Strings</a></li>
    <li><a href="#operations-on-strings"><small>19.4</small> Operations on Strings</a></li>
    <li><a href="#freeing-objects"><small>19.5</small> Freeing Objects</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>String Encoding</a></li>
</ul>


<div class="prev-next">
    <a href="types-of-values.html" title="Types of Values" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="hash-tables.html" title="Hash Tables" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="types-of-values.html" title="Types of Values" class="prev">←</a>
<a href="hash-tables.html" title="Hash Tables" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Strings<small>19</small></a></h3>

<ul>
    <li><a href="#values-and-objects"><small>19.1</small> Values and Objects</a></li>
    <li><a href="#struct-inheritance"><small>19.2</small> Struct Inheritance</a></li>
    <li><a href="#strings"><small>19.3</small> Strings</a></li>
    <li><a href="#operations-on-strings"><small>19.4</small> Operations on Strings</a></li>
    <li><a href="#freeing-objects"><small>19.5</small> Freeing Objects</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>String Encoding</a></li>
</ul>


<div class="prev-next">
    <a href="types-of-values.html" title="Types of Values" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="hash-tables.html" title="Hash Tables" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">19</div>
  <h1>Strings</h1>

<blockquote>
<p>&ldquo;Ah? A small aversion to menial labor?&rdquo; The doctor cocked an eyebrow.
&ldquo;Understandable, but misplaced. One should treasure those hum-drum
tasks that keep the body occupied but leave the mind and heart unfettered.&rdquo;</p>
<p><cite>Tad Williams, <em>The Dragonbone Chair</em></cite></p>
</blockquote>
<p>Our little VM can represent three types of values right now: numbers, Booleans,
and <code>nil</code>. Those types have two important things in common: they&rsquo;re immutable
and they&rsquo;re small. Numbers are the largest, and they still fit into two 64-bit
words. That&rsquo;s a small enough price that we can afford to pay it for all values,
even Booleans and nils which don&rsquo;t need that much space.</p>
<p>Strings, unfortunately, are not so petite. There&rsquo;s no maximum length for a
string. Even if we were to artificially cap it at some contrived limit like
<span name="pascal">255</span> characters, that&rsquo;s still too much memory to spend
on every single value.</p>
<aside name="pascal">
<p>UCSD Pascal, one of the first implementations of Pascal, had this exact limit.
Instead of using a terminating null byte to indicate the end of the string like
C, Pascal strings started with a length value. Since UCSD used only a single
byte to store the length, strings couldn&rsquo;t be any longer than 255 characters.</p><img src="image/strings/pstring.png" alt="The Pascal string 'hello' with a length byte of 5 preceding it." />
</aside>
<p>We need a way to support values whose sizes vary, sometimes greatly. This is
exactly what dynamic allocation on the heap is designed for. We can allocate as
many bytes as we need. We get back a pointer that we&rsquo;ll use to keep track of the
value as it flows through the VM.</p>
<h2><a href="#values-and-objects" id="values-and-objects"><small>19&#8202;.&#8202;1</small>Values and Objects</a></h2>
<p>Using the heap for larger, variable-sized values and the stack for smaller,
atomic ones leads to a two-level representation. Every Lox value that you can
store in a variable or return from an expression will be a Value. For small,
fixed-size types like numbers, the payload is stored directly inside the Value
struct itself.</p>
<p>If the object is larger, its data lives on the heap. Then the Value&rsquo;s payload is
a <em>pointer</em> to that blob of memory. We&rsquo;ll eventually have a handful of
heap-allocated types in clox: strings, instances, functions, you get the idea.
Each type has its own unique data, but there is also state they all share that
<a href="garbage-collection.html">our future garbage collector</a> will use to manage their memory.</p><img src="image/strings/value.png" class="wide" alt="Field layout of number and obj values." />
<p>We&rsquo;ll call this common representation <span name="short">&ldquo;Obj&rdquo;</span>. Each Lox
value whose state lives on the heap is an Obj. We can thus use a single new
ValueType case to refer to all heap-allocated types.</p>
<aside name="short">
<p>&ldquo;Obj&rdquo; is short for &ldquo;object&rdquo;, natch.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  VAL_NUMBER,
</pre><div class="source-file"><em>value.h</em><br>
in enum <em>ValueType</em></div>
<pre class="insert">  <span class="a">VAL_OBJ</span>
</pre><pre class="insert-after">} ValueType;
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, in enum <em>ValueType</em></div>

<p>When a Value&rsquo;s type is <code>VAL_OBJ</code>, the payload is a pointer to the heap memory,
so we add another case to the union for that.</p>
<div class="codehilite"><pre class="insert-before">    double number;
</pre><div class="source-file"><em>value.h</em><br>
in struct <em>Value</em></div>
<pre class="insert">    <span class="t">Obj</span>* <span class="i">obj</span>;
</pre><pre class="insert-after">  } as;<span name="as"> </span>
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, in struct <em>Value</em></div>

<p>As we did with the other value types, we crank out a couple of helpful macros
for working with Obj values.</p>
<div class="codehilite"><pre class="insert-before">#define IS_NUMBER(value)  ((value).type == VAL_NUMBER)
</pre><div class="source-file"><em>value.h</em><br>
add after struct <em>Value</em></div>
<pre class="insert"><span class="a">#define IS_OBJ(value)     ((value).type == VAL_OBJ)</span>
</pre><pre class="insert-after">

#define AS_BOOL(value)    ((value).as.boolean)
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after struct <em>Value</em></div>

<p>This evaluates to <code>true</code> if the given Value is an Obj. If so, we can use this:</p>
<div class="codehilite"><pre class="insert-before">#define IS_OBJ(value)     ((value).type == VAL_OBJ)

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define AS_OBJ(value)     ((value).as.obj)</span>
</pre><pre class="insert-after">#define AS_BOOL(value)    ((value).as.boolean)
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>It extracts the Obj pointer from the value. We can also go the other way.</p>
<div class="codehilite"><pre class="insert-before">#define NUMBER_VAL(value) ((Value){VAL_NUMBER, {.number = value}})
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="a">#define OBJ_VAL(object)   ((Value){VAL_OBJ, {.obj = (Obj*)object}})</span>
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>This takes a bare Obj pointer and wraps it in a full Value.</p>
<h2><a href="#struct-inheritance" id="struct-inheritance"><small>19&#8202;.&#8202;2</small>Struct Inheritance</a></h2>
<p>Every heap-allocated value is an Obj, but <span name="objs">Objs</span> are
not all the same. For strings, we need the array of characters. When we get to
instances, they will need their data fields. A function object will need its
chunk of bytecode. How do we handle different payloads and sizes? We can&rsquo;t use
another union like we did for Value since the sizes are all over the place.</p>
<aside name="objs">
<p>No, I don&rsquo;t know how to pronounce &ldquo;objs&rdquo; either. Feels like there should be a
vowel in there somewhere.</p>
</aside>
<p>Instead, we&rsquo;ll use another technique. It&rsquo;s been around for ages, to the point
that the C specification carves out specific support for it, but I don&rsquo;t know
that it has a canonical name. It&rsquo;s an example of <a href="https://en.wikipedia.org/wiki/Type_punning"><em>type punning</em></a>, but that
term is too broad. In the absence of any better ideas, I&rsquo;ll call it <strong>struct
inheritance</strong>, because it relies on structs and roughly follows how
single-inheritance of state works in object-oriented languages.</p>
<p>Like a tagged union, each Obj starts with a tag field that identifies what kind
of object it is<span class="em">&mdash;</span>string, instance, etc. Following that are the payload fields.
Instead of a union with cases for each type, each type is its own separate
struct. The tricky part is how to treat these structs uniformly since C has no
concept of inheritance or polymorphism. I&rsquo;ll explain that soon, but first lets
get the preliminary stuff out of the way.</p>
<p>The name &ldquo;Obj&rdquo; itself refers to a struct that contains the state shared across
all object types. It&rsquo;s sort of like the &ldquo;base class&rdquo; for objects. Because of
some cyclic dependencies between values and objects, we forward-declare it in
the &ldquo;value&rdquo; module.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="k">typedef</span> <span class="k">struct</span> <span class="t">Obj</span> <span class="t">Obj</span>;

</pre><pre class="insert-after">typedef enum {
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>And the actual definition is in a new module.</p>
<div class="codehilite"><div class="source-file"><em>object.h</em><br>
create new file</div>
<pre><span class="a">#ifndef clox_object_h</span>
<span class="a">#define clox_object_h</span>

<span class="a">#include &quot;common.h&quot;</span>
<span class="a">#include &quot;value.h&quot;</span>

<span class="k">struct</span> <span class="t">Obj</span> {
  <span class="t">ObjType</span> <span class="i">type</span>;
};

<span class="a">#endif</span>
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, create new file</div>

<p>Right now, it contains only the type tag. Shortly, we&rsquo;ll add some other
bookkeeping information for memory management. The type enum is this:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;value.h&quot;
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert">

<span class="k">typedef</span> <span class="k">enum</span> {
  <span class="a">OBJ_STRING</span>,
} <span class="t">ObjType</span>;
</pre><pre class="insert-after">

struct Obj {
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>Obviously, that will be more useful in later chapters after we add more
heap-allocated types. Since we&rsquo;ll be accessing these tag types frequently, it&rsquo;s
worth making a little macro that extracts the object type tag from a given
Value.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;value.h&quot;
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert">

<span class="a">#define OBJ_TYPE(value)        (AS_OBJ(value)-&gt;type)</span>
</pre><pre class="insert-after">

typedef enum {
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>That&rsquo;s our foundation.</p>
<p>Now, let&rsquo;s build strings on top of it. The payload for strings is defined in a
separate struct. Again, we need to forward-declare it.</p>
<div class="codehilite"><pre class="insert-before">typedef struct Obj Obj;
</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="k">typedef</span> <span class="k">struct</span> <span class="t">ObjString</span> <span class="t">ObjString</span>;
</pre><pre class="insert-after">

typedef enum {
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<p>The definition lives alongside Obj.</p>
<div class="codehilite"><pre class="insert-before">};
</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>Obj</em></div>
<pre class="insert">

<span class="k">struct</span> <span class="t">ObjString</span> {
  <span class="t">Obj</span> <span class="i">obj</span>;
  <span class="t">int</span> <span class="i">length</span>;
  <span class="t">char</span>* <span class="i">chars</span>;
};
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>Obj</em></div>

<p>A string object contains an array of characters. Those are stored in a separate,
heap-allocated array so that we set aside only as much room as needed for each
string. We also store the number of bytes in the array. This isn&rsquo;t strictly
necessary but lets us tell how much memory is allocated for the string without
walking the character array to find the null terminator.</p>
<p>Because ObjString is an Obj, it also needs the state all Objs share. It
accomplishes that by having its first field be an Obj. C specifies that struct
fields are arranged in memory in the order that they are declared. Also, when
you nest structs, the inner struct&rsquo;s fields are expanded right in place. So the
memory for Obj and for ObjString looks like this:</p><img src="image/strings/obj.png" alt="The memory layout for the fields in Obj and ObjString." />
<p>Note how the first bytes of ObjString exactly line up with Obj. This is not a
coincidence<span class="em">&mdash;</span>C <span name="spec">mandates</span> it. This is designed to
enable a clever pattern: You can take a pointer to a struct and safely convert
it to a pointer to its first field and back.</p>
<aside name="spec">
<p>The key part of the spec is:</p>
<blockquote>
<p>&sect; 6.7.2.1 13</p>
<p>Within a structure object, the non-bit-field members and the units in which
bit-fields reside have addresses that increase in the order in which they
are declared. A pointer to a structure object, suitably converted, points to
its initial member (or if that member is a bit-field, then to the unit in
which it resides), and vice versa. There may be unnamed padding within a
structure object, but not at its beginning.</p>
</blockquote>
</aside>
<p>Given an <code>ObjString*</code>, you can safely cast it to <code>Obj*</code> and then access the
<code>type</code> field from it. Every ObjString &ldquo;is&rdquo; an Obj in the OOP sense of &ldquo;is&rdquo;. When
we later add other object types, each struct will have an Obj as its first
field. Any code that wants to work with all objects can treat them as base
<code>Obj*</code> and ignore any other fields that may happen to follow.</p>
<p>You can go in the other direction too. Given an <code>Obj*</code>, you can &ldquo;downcast&rdquo; it to
an <code>ObjString*</code>. Of course, you need to ensure that the <code>Obj*</code> pointer you have
does point to the <code>obj</code> field of an actual ObjString. Otherwise, you are
unsafely reinterpreting random bits of memory. To detect that such a cast is
safe, we add another macro.</p>
<div class="codehilite"><pre class="insert-before">#define OBJ_TYPE(value)        (AS_OBJ(value)-&gt;type)
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert">

<span class="a">#define IS_STRING(value)       isObjType(value, OBJ_STRING)</span>
</pre><pre class="insert-after">

typedef enum {
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>It takes a Value, not a raw <code>Obj*</code> because most code in the VM works with
Values. It relies on this inline function:</p>
<div class="codehilite"><pre class="insert-before">};

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjString</em></div>
<pre class="insert"><span class="k">static</span> <span class="k">inline</span> <span class="t">bool</span> <span class="i">isObjType</span>(<span class="t">Value</span> <span class="i">value</span>, <span class="t">ObjType</span> <span class="i">type</span>) {
  <span class="k">return</span> <span class="a">IS_OBJ</span>(<span class="i">value</span>) &amp;&amp; <span class="a">AS_OBJ</span>(<span class="i">value</span>)-&gt;<span class="i">type</span> == <span class="i">type</span>;
}

</pre><pre class="insert-after">#endif
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjString</em></div>

<p>Pop quiz: Why not just put the body of this function right in the macro? What&rsquo;s
different about this one compared to the others? Right, it&rsquo;s because the body
uses <code>value</code> twice. A macro is expanded by inserting the argument <em>expression</em>
every place the parameter name appears in the body. If a macro uses a parameter
more than once, that expression gets evaluated multiple times.</p>
<p>That&rsquo;s bad if the expression has side effects. If we put the body of
<code>isObjType()</code> into the macro definition and then you did, say,</p>
<div class="codehilite"><pre><span class="a">IS_STRING</span>(<span class="a">POP</span>())
</pre></div>
<p>then it would pop two values off the stack! Using a function fixes that.</p>
<p>As long as we ensure that we set the type tag correctly whenever we create an
Obj of some type, this macro will tell us when it&rsquo;s safe to cast a value to a
specific object type. We can do that using these:</p>
<div class="codehilite"><pre class="insert-before">#define IS_STRING(value)       isObjType(value, OBJ_STRING)
</pre><div class="source-file"><em>object.h</em></div>
<pre class="insert">

<span class="a">#define AS_STRING(value)       ((ObjString*)AS_OBJ(value))</span>
<span class="a">#define AS_CSTRING(value)      (((ObjString*)AS_OBJ(value))-&gt;chars)</span>
</pre><pre class="insert-after">

typedef enum {
</pre></div>
<div class="source-file-narrow"><em>object.h</em></div>

<p>These two macros take a Value that is expected to contain a pointer to a valid
ObjString on the heap. The first one returns the <code>ObjString*</code> pointer. The
second one steps through that to return the character array itself, since that&rsquo;s
often what we&rsquo;ll end up needing.</p>
<h2><a href="#strings" id="strings"><small>19&#8202;.&#8202;3</small>Strings</a></h2>
<p>OK, our VM can now represent string values. It&rsquo;s time to add strings to the
language itself. As usual, we begin in the front end. The lexer already
tokenizes string literals, so it&rsquo;s the parser&rsquo;s turn.</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_IDENTIFIER]    = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_STRING</span>]        = {<span class="i">string</span>,   <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_NUMBER]        = {number,   NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>When the parser hits a string token, it calls this parse function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>number</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">string</span>() {
  <span class="i">emitConstant</span>(<span class="a">OBJ_VAL</span>(<span class="i">copyString</span>(<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">start</span> + <span class="n">1</span>,
                                  <span class="i">parser</span>.<span class="i">previous</span>.<span class="i">length</span> - <span class="n">2</span>)));
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>number</em>()</div>

<p>This takes the string&rsquo;s characters <span name="escape">directly</span> from the
lexeme. The <code>+ 1</code> and <code>- 2</code> parts trim the leading and trailing quotation marks.
It then creates a string object, wraps it in a Value, and stuffs it into the
constant table.</p>
<aside name="escape">
<p>If Lox supported string escape sequences like <code>\n</code>, we&rsquo;d translate those here.
Since it doesn&rsquo;t, we can take the characters as they are.</p>
</aside>
<p>To create the string, we use <code>copyString()</code>, which is declared in <code>object.h</code>.</p>
<div class="codehilite"><pre class="insert-before">};

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjString</em></div>
<pre class="insert"><span class="t">ObjString</span>* <span class="i">copyString</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">chars</span>, <span class="t">int</span> <span class="i">length</span>);

</pre><pre class="insert-after">static inline bool isObjType(Value value, ObjType type) {
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjString</em></div>

<p>The compiler module needs to include that.</p>
<div class="codehilite"><pre class="insert-before">#define clox_compiler_h

</pre><div class="source-file"><em>compiler.h</em></div>
<pre class="insert"><span class="a">#include &quot;object.h&quot;</span>
</pre><pre class="insert-after">#include &quot;vm.h&quot;
</pre></div>
<div class="source-file-narrow"><em>compiler.h</em></div>

<p>Our &ldquo;object&rdquo; module gets an implementation file where we define the new
function.</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
create new file</div>
<pre><span class="a">#include &lt;stdio.h&gt;</span>
<span class="a">#include &lt;string.h&gt;</span>

<span class="a">#include &quot;memory.h&quot;</span>
<span class="a">#include &quot;object.h&quot;</span>
<span class="a">#include &quot;value.h&quot;</span>
<span class="a">#include &quot;vm.h&quot;</span>

<span class="t">ObjString</span>* <span class="i">copyString</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">chars</span>, <span class="t">int</span> <span class="i">length</span>) {
  <span class="t">char</span>* <span class="i">heapChars</span> = <span class="a">ALLOCATE</span>(<span class="t">char</span>, <span class="i">length</span> + <span class="n">1</span>);
  <span class="i">memcpy</span>(<span class="i">heapChars</span>, <span class="i">chars</span>, <span class="i">length</span>);
  <span class="i">heapChars</span>[<span class="i">length</span>] = <span class="s">&#39;\0&#39;</span>;
  <span class="k">return</span> <span class="i">allocateString</span>(<span class="i">heapChars</span>, <span class="i">length</span>);
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, create new file</div>

<p>First, we allocate a new array on the heap, just big enough for the string&rsquo;s
characters and the trailing <span name="terminator">terminator</span>, using
this low-level macro that allocates an array with a given element type and
count:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;

</pre><div class="source-file"><em>memory.h</em></div>
<pre class="insert"><span class="a">#define ALLOCATE(type, count) \</span>
<span class="a">    (type*)reallocate(NULL, 0, sizeof(type) * (count))</span>

</pre><pre class="insert-after">#define GROW_CAPACITY(capacity) \
</pre></div>
<div class="source-file-narrow"><em>memory.h</em></div>

<p>Once we have the array, we copy over the characters from the lexeme and
terminate it.</p>
<aside name="terminator" class="bottom">
<p>We need to terminate the string ourselves because the lexeme points at a range
of characters inside the monolithic source string and isn&rsquo;t terminated.</p>
<p>Since ObjString stores the length explicitly, we <em>could</em> leave the character
array unterminated, but slapping a terminator on the end costs us only a byte
and lets us pass the character array to C standard library functions that expect
a terminated string.</p>
</aside>
<p>You might wonder why the ObjString can&rsquo;t just point back to the original
characters in the source string. Some ObjStrings will be created dynamically at
runtime as a result of string operations like concatenation. Those strings
obviously need to dynamically allocate memory for the characters, which means
the string needs to <em>free</em> that memory when it&rsquo;s no longer needed.</p>
<p>If we had an ObjString for a string literal, and tried to free its character
array that pointed into the original source code string, bad things would
happen. So, for literals, we preemptively copy the characters over to the heap.
This way, every ObjString reliably owns its character array and can free it.</p>
<p>The real work of creating a string object happens in this function:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;vm.h&quot;

</pre><div class="source-file"><em>object.c</em></div>
<pre class="insert"><span class="k">static</span> <span class="t">ObjString</span>* <span class="i">allocateString</span>(<span class="t">char</span>* <span class="i">chars</span>, <span class="t">int</span> <span class="i">length</span>) {
  <span class="t">ObjString</span>* <span class="i">string</span> = <span class="a">ALLOCATE_OBJ</span>(<span class="t">ObjString</span>, <span class="a">OBJ_STRING</span>);
  <span class="i">string</span>-&gt;<span class="i">length</span> = <span class="i">length</span>;
  <span class="i">string</span>-&gt;<span class="i">chars</span> = <span class="i">chars</span>;
  <span class="k">return</span> <span class="i">string</span>;
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em></div>

<p>It creates a new ObjString on the heap and then initializes its fields. It&rsquo;s
sort of like a constructor in an OOP language. As such, it first calls the &ldquo;base
class&rdquo; constructor to initialize the Obj state, using a new macro.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;vm.h&quot;
</pre><div class="source-file"><em>object.c</em></div>
<pre class="insert">

<span class="a">#define ALLOCATE_OBJ(type, objectType) \</span>
<span class="a">    (type*)allocateObject(sizeof(type), objectType)</span>
</pre><pre class="insert-after">

static ObjString* allocateString(char* chars, int length) {
</pre></div>
<div class="source-file-narrow"><em>object.c</em></div>

<p><span name="factored">Like</span> the previous macro, this exists mainly to
avoid the need to redundantly cast a <code>void*</code> back to the desired type. The
actual functionality is here:</p>
<aside name="factored">
<p>I admit this chapter has a sea of helper functions and macros to wade through. I
try to keep the code nicely factored, but that leads to a scattering of tiny
functions. They will pay off when we reuse them later.</p>
</aside>
<div class="codehilite"><pre class="insert-before">#define ALLOCATE_OBJ(type, objectType) \
    (type*)allocateObject(sizeof(type), objectType)
</pre><div class="source-file"><em>object.c</em></div>
<pre class="insert">

<span class="k">static</span> <span class="t">Obj</span>* <span class="i">allocateObject</span>(<span class="t">size_t</span> <span class="i">size</span>, <span class="t">ObjType</span> <span class="i">type</span>) {
  <span class="t">Obj</span>* <span class="i">object</span> = (<span class="t">Obj</span>*)<span class="i">reallocate</span>(<span class="a">NULL</span>, <span class="n">0</span>, <span class="i">size</span>);
  <span class="i">object</span>-&gt;<span class="i">type</span> = <span class="i">type</span>;
  <span class="k">return</span> <span class="i">object</span>;
}
</pre><pre class="insert-after">

static ObjString* allocateString(char* chars, int length) {
</pre></div>
<div class="source-file-narrow"><em>object.c</em></div>

<p>It allocates an object of the given size on the heap. Note that the size is
<em>not</em> just the size of Obj itself. The caller passes in the number of bytes so
that there is room for the extra payload fields needed by the specific object
type being created.</p>
<p>Then it initializes the Obj state<span class="em">&mdash;</span>right now, that&rsquo;s just the type tag. This
function returns to <code>allocateString()</code>, which finishes initializing the ObjString
fields. <span name="viola"><em>Voilà</em></span>, we can compile and execute string
literals.</p>
<aside name="viola"><img src="image/strings/viola.png" class="above" alt="A viola." />
<p>Don&rsquo;t get &ldquo;voilà&rdquo; confused with &ldquo;viola&rdquo;. One means &ldquo;there it is&rdquo; and the other
is a string instrument, the middle child between a violin and a cello. Yes, I
did spend two hours drawing a viola just to mention that.</p>
</aside>
<h2><a href="#operations-on-strings" id="operations-on-strings"><small>19&#8202;.&#8202;4</small>Operations on Strings</a></h2>
<p>Our fancy strings are there, but they don&rsquo;t do much of anything yet. A good
first step is to make the existing print code not barf on the new value type.</p>
<div class="codehilite"><pre class="insert-before">    case VAL_NUMBER: printf(&quot;%g&quot;, AS_NUMBER(value)); break;
</pre><div class="source-file"><em>value.c</em><br>
in <em>printValue</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">VAL_OBJ</span>: <span class="i">printObject</span>(<span class="i">value</span>); <span class="k">break</span>;
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>printValue</em>()</div>

<p>If the value is a heap-allocated object, it defers to a helper function over in
the &ldquo;object&rdquo; module.</p>
<div class="codehilite"><pre class="insert-before">ObjString* copyString(const char* chars, int length);
</pre><div class="source-file"><em>object.h</em><br>
add after <em>copyString</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">printObject</span>(<span class="t">Value</span> <span class="i">value</span>);
</pre><pre class="insert-after">

static inline bool isObjType(Value value, ObjType type) {
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after <em>copyString</em>()</div>

<p>The implementation looks like this:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>copyString</em>()</div>
<pre><span class="t">void</span> <span class="i">printObject</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="k">switch</span> (<span class="a">OBJ_TYPE</span>(<span class="i">value</span>)) {
    <span class="k">case</span> <span class="a">OBJ_STRING</span>:
      <span class="i">printf</span>(<span class="s">&quot;%s&quot;</span>, <span class="a">AS_CSTRING</span>(<span class="i">value</span>));
      <span class="k">break</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>copyString</em>()</div>

<p>We have only a single object type now, but this function will sprout additional
switch cases in later chapters. For string objects, it simply <span
name="term-2">prints</span> the character array as a C string.</p>
<aside name="term-2">
<p>I told you terminating the string would come in handy.</p>
</aside>
<p>The equality operators also need to gracefully handle strings. Consider:</p>
<div class="codehilite"><pre><span class="s">&quot;string&quot;</span> == <span class="s">&quot;string&quot;</span>
</pre></div>
<p>These are two separate string literals. The compiler will make two separate
calls to <code>copyString()</code>, create two distinct ObjString objects and store them as
two constants in the chunk. They are different objects in the heap. But our
users (and thus we) expect strings to have value equality. The above expression
should evaluate to <code>true</code>. That requires a little special support.</p>
<div class="codehilite"><pre class="insert-before">    case VAL_NUMBER: return AS_NUMBER(a) == AS_NUMBER(b);
</pre><div class="source-file"><em>value.c</em><br>
in <em>valuesEqual</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">VAL_OBJ</span>: {
      <span class="t">ObjString</span>* <span class="i">aString</span> = <span class="a">AS_STRING</span>(<span class="i">a</span>);
      <span class="t">ObjString</span>* <span class="i">bString</span> = <span class="a">AS_STRING</span>(<span class="i">b</span>);
      <span class="k">return</span> <span class="i">aString</span>-&gt;<span class="i">length</span> == <span class="i">bString</span>-&gt;<span class="i">length</span> &amp;&amp;
          <span class="i">memcmp</span>(<span class="i">aString</span>-&gt;<span class="i">chars</span>, <span class="i">bString</span>-&gt;<span class="i">chars</span>,
                 <span class="i">aString</span>-&gt;<span class="i">length</span>) == <span class="n">0</span>;
    }
</pre><pre class="insert-after">    default:         return false; // Unreachable.
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>valuesEqual</em>()</div>

<p>If the two values are both strings, then they are equal if their character
arrays contain the same characters, regardless of whether they are two separate
objects or the exact same one. This does mean that string equality is slower
than equality on other types since it has to walk the whole string. We&rsquo;ll revise
that <a href="hash-tables.html">later</a>, but this gives us the right semantics for now.</p>
<p>Finally, in order to use <code>memcmp()</code> and the new stuff in the &ldquo;object&rdquo; module, we
need a couple of includes. Here:</p>
<div class="codehilite"><pre class="insert-before">#include &lt;stdio.h&gt;
</pre><div class="source-file"><em>value.c</em></div>
<pre class="insert"><span class="a">#include &lt;string.h&gt;</span>
</pre><pre class="insert-after">

#include &quot;memory.h&quot;
</pre></div>
<div class="source-file-narrow"><em>value.c</em></div>

<p>And here:</p>
<div class="codehilite"><pre class="insert-before">#include &lt;string.h&gt;

</pre><div class="source-file"><em>value.c</em></div>
<pre class="insert"><span class="a">#include &quot;object.h&quot;</span>
</pre><pre class="insert-after">#include &quot;memory.h&quot;
</pre></div>
<div class="source-file-narrow"><em>value.c</em></div>

<h3><a href="#concatenation" id="concatenation"><small>19&#8202;.&#8202;4&#8202;.&#8202;1</small>Concatenation</a></h3>
<p>Full-grown languages provide lots of operations for working with strings<span class="em">&mdash;</span>access to individual characters, the string&rsquo;s length, changing case, splitting,
joining, searching, etc. When you implement your language, you&rsquo;ll likely want
all that. But for this book, we keep things <em>very</em> minimal.</p>
<p>The only interesting operation we support on strings is <code>+</code>. If you use that
operator on two string objects, it produces a new string that&rsquo;s a concatenation
of the two operands. Since Lox is dynamically typed, we can&rsquo;t tell which
behavior is needed at compile time because we don&rsquo;t know the types of the
operands until runtime. Thus, the <code>OP_ADD</code> instruction dynamically inspects the
operands and chooses the right operation.</p>
<div class="codehilite"><pre class="insert-before">      case OP_LESS:     BINARY_OP(BOOL_VAL, &lt;); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_ADD</span>: {
        <span class="k">if</span> (<span class="a">IS_STRING</span>(<span class="i">peek</span>(<span class="n">0</span>)) &amp;&amp; <span class="a">IS_STRING</span>(<span class="i">peek</span>(<span class="n">1</span>))) {
          <span class="i">concatenate</span>();
        } <span class="k">else</span> <span class="k">if</span> (<span class="a">IS_NUMBER</span>(<span class="i">peek</span>(<span class="n">0</span>)) &amp;&amp; <span class="a">IS_NUMBER</span>(<span class="i">peek</span>(<span class="n">1</span>))) {
          <span class="t">double</span> <span class="i">b</span> = <span class="a">AS_NUMBER</span>(<span class="i">pop</span>());
          <span class="t">double</span> <span class="i">a</span> = <span class="a">AS_NUMBER</span>(<span class="i">pop</span>());
          <span class="i">push</span>(<span class="a">NUMBER_VAL</span>(<span class="i">a</span> + <span class="i">b</span>));
        } <span class="k">else</span> {
          <span class="i">runtimeError</span>(
              <span class="s">&quot;Operands must be two numbers or two strings.&quot;</span>);
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_SUBTRACT: BINARY_OP(NUMBER_VAL, -); break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 1 line</div>

<p>If both operands are strings, it concatenates. If they&rsquo;re both numbers, it adds
them. Any other <span name="convert">combination</span> of operand types is a
runtime error.</p>
<aside name="convert" class="bottom">
<p>This is more conservative than most languages. In other languages, if one
operand is a string, the other can be any type and it will be implicitly
converted to a string before concatenating the two.</p>
<p>I think that&rsquo;s a fine feature, but would require writing tedious &ldquo;convert to
string&rdquo; code for each type, so I left it out of Lox.</p>
</aside>
<p>To concatenate strings, we define a new function.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>isFalsey</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">concatenate</span>() {
  <span class="t">ObjString</span>* <span class="i">b</span> = <span class="a">AS_STRING</span>(<span class="i">pop</span>());
  <span class="t">ObjString</span>* <span class="i">a</span> = <span class="a">AS_STRING</span>(<span class="i">pop</span>());

  <span class="t">int</span> <span class="i">length</span> = <span class="i">a</span>-&gt;<span class="i">length</span> + <span class="i">b</span>-&gt;<span class="i">length</span>;
  <span class="t">char</span>* <span class="i">chars</span> = <span class="a">ALLOCATE</span>(<span class="t">char</span>, <span class="i">length</span> + <span class="n">1</span>);
  <span class="i">memcpy</span>(<span class="i">chars</span>, <span class="i">a</span>-&gt;<span class="i">chars</span>, <span class="i">a</span>-&gt;<span class="i">length</span>);
  <span class="i">memcpy</span>(<span class="i">chars</span> + <span class="i">a</span>-&gt;<span class="i">length</span>, <span class="i">b</span>-&gt;<span class="i">chars</span>, <span class="i">b</span>-&gt;<span class="i">length</span>);
  <span class="i">chars</span>[<span class="i">length</span>] = <span class="s">&#39;\0&#39;</span>;

  <span class="t">ObjString</span>* <span class="i">result</span> = <span class="i">takeString</span>(<span class="i">chars</span>, <span class="i">length</span>);
  <span class="i">push</span>(<span class="a">OBJ_VAL</span>(<span class="i">result</span>));
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>isFalsey</em>()</div>

<p>It&rsquo;s pretty verbose, as C code that works with strings tends to be. First, we
calculate the length of the result string based on the lengths of the operands.
We allocate a character array for the result and then copy the two halves in. As
always, we carefully ensure the string is terminated.</p>
<p>In order to call <code>memcpy()</code>, the VM needs an include.</p>
<div class="codehilite"><pre class="insert-before">#include &lt;stdio.h&gt;
</pre><div class="source-file"><em>vm.c</em></div>
<pre class="insert"><span class="a">#include &lt;string.h&gt;</span>
</pre><pre class="insert-after">

#include &quot;common.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em></div>

<p>Finally, we produce an ObjString to contain those characters. This time we use a
new function, <code>takeString()</code>.</p>
<div class="codehilite"><pre class="insert-before">};

</pre><div class="source-file"><em>object.h</em><br>
add after struct <em>ObjString</em></div>
<pre class="insert"><span class="t">ObjString</span>* <span class="i">takeString</span>(<span class="t">char</span>* <span class="i">chars</span>, <span class="t">int</span> <span class="i">length</span>);
</pre><pre class="insert-after">ObjString* copyString(const char* chars, int length);
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, add after struct <em>ObjString</em></div>

<p>The implementation looks like this:</p>
<div class="codehilite"><div class="source-file"><em>object.c</em><br>
add after <em>allocateString</em>()</div>
<pre><span class="t">ObjString</span>* <span class="i">takeString</span>(<span class="t">char</span>* <span class="i">chars</span>, <span class="t">int</span> <span class="i">length</span>) {
  <span class="k">return</span> <span class="i">allocateString</span>(<span class="i">chars</span>, <span class="i">length</span>);
}
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, add after <em>allocateString</em>()</div>

<p>The previous <code>copyString()</code> function assumes it <em>cannot</em> take ownership of the
characters you pass in. Instead, it conservatively creates a copy of the
characters on the heap that the ObjString can own. That&rsquo;s the right thing for
string literals where the passed-in characters are in the middle of the source
string.</p>
<p>But, for concatenation, we&rsquo;ve already dynamically allocated a character array on
the heap. Making another copy of that would be redundant (and would mean
<code>concatenate()</code> has to remember to free its copy). Instead, this function claims
ownership of the string you give it.</p>
<p>As usual, stitching this functionality together requires a couple of includes.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;debug.h&quot;
</pre><div class="source-file"><em>vm.c</em></div>
<pre class="insert"><span class="a">#include &quot;object.h&quot;</span>
<span class="a">#include &quot;memory.h&quot;</span>
</pre><pre class="insert-after">#include &quot;vm.h&quot;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em></div>

<h2><a href="#freeing-objects" id="freeing-objects"><small>19&#8202;.&#8202;5</small>Freeing Objects</a></h2>
<p>Behold this innocuous-seeming expression:</p>
<div class="codehilite"><pre><span class="s">&quot;st&quot;</span> + <span class="s">&quot;ri&quot;</span> + <span class="s">&quot;ng&quot;</span>
</pre></div>
<p>When the compiler chews through this, it allocates an ObjString for each of
those three string literals and stores them in the chunk&rsquo;s constant table and
generates this <span name="stack">bytecode</span>:</p>
<aside name="stack">
<p>Here&rsquo;s what the stack looks like after each instruction:</p><img src="image/strings/stack.png" alt="The state of the stack at each instruction." />
</aside>
<div class="codehilite"><pre>0000    OP_CONSTANT         0 &quot;st&quot;
0002    OP_CONSTANT         1 &quot;ri&quot;
0004    OP_ADD
0005    OP_CONSTANT         2 &quot;ng&quot;
0007    OP_ADD
0008    OP_RETURN
</pre></div>
<p>The first two instructions push <code>"st"</code> and <code>"ri"</code> onto the stack. Then the
<code>OP_ADD</code> pops those and concatenates them. That dynamically allocates a new
<code>"stri"</code> string on the heap. The VM pushes that and then pushes the <code>"ng"</code>
constant. The last <code>OP_ADD</code> pops <code>"stri"</code> and <code>"ng"</code>, concatenates them, and
pushes the result: <code>"string"</code>. Great, that&rsquo;s what we expect.</p>
<p>But, wait. What happened to that <code>"stri"</code> string? We dynamically allocated it,
then the VM discarded it after concatenating it with <code>"ng"</code>. We popped it from
the stack and no longer have a reference to it, but we never freed its memory.
We&rsquo;ve got ourselves a classic memory leak.</p>
<p>Of course, it&rsquo;s perfectly fine for the <em>Lox program</em> to forget about
intermediate strings and not worry about freeing them. Lox automatically manages
memory on the user&rsquo;s behalf. The responsibility to manage memory doesn&rsquo;t
<em>disappear</em>. Instead, it falls on our shoulders as VM implementers.</p>
<p>The full <span name="borrowed">solution</span> is a <a href="garbage-collection.html">garbage collector</a> that
reclaims unused memory while the program is running. We&rsquo;ve got some other stuff
to get in place before we&rsquo;re ready to tackle that project. Until then, we are
living on borrowed time. The longer we wait to add the collector, the harder it
is to do.</p>
<aside name="borrowed">
<p>I&rsquo;ve seen a number of people implement large swathes of their language before
trying to start on the GC. For the kind of toy programs you typically run while
a language is being developed, you actually don&rsquo;t run out of memory before
reaching the end of the program, so this gets you surprisingly far.</p>
<p>But that underestimates how <em>hard</em> it is to add a garbage collector later. The
collector <em>must</em> ensure it can find every bit of memory that <em>is</em> still being
used so that it doesn&rsquo;t collect live data. There are hundreds of places a
language implementation can squirrel away a reference to some object. If you
don&rsquo;t find all of them, you get nightmarish bugs.</p>
<p>I&rsquo;ve seen language implementations die because it was too hard to get the GC in
later. If your language needs GC, get it working as soon as you can. It&rsquo;s a
crosscutting concern that touches the entire codebase.</p>
</aside>
<p>Today, we should at least do the bare minimum: avoid <em>leaking</em> memory by making
sure the VM can still find every allocated object even if the Lox program itself
no longer references them. There are many sophisticated techniques that advanced
memory managers use to allocate and track memory for objects. We&rsquo;re going to
take the simplest practical approach.</p>
<p>We&rsquo;ll create a linked list that stores every Obj. The VM can traverse that
list to find every single object that has been allocated on the heap, whether or
not the user&rsquo;s program or the VM&rsquo;s stack still has a reference to it.</p>
<p>We could define a separate linked list node struct but then we&rsquo;d have to
allocate those too. Instead, we&rsquo;ll use an <strong>intrusive list</strong><span class="em">&mdash;</span>the Obj struct
itself will be the linked list node. Each Obj gets a pointer to the next Obj in
the chain.</p>
<div class="codehilite"><pre class="insert-before">struct Obj {
  ObjType type;
</pre><div class="source-file"><em>object.h</em><br>
in struct <em>Obj</em></div>
<pre class="insert">  <span class="k">struct</span> <span class="t">Obj</span>* <span class="i">next</span>;
</pre><pre class="insert-after">};
</pre></div>
<div class="source-file-narrow"><em>object.h</em>, in struct <em>Obj</em></div>

<p>The VM stores a pointer to the head of the list.</p>
<div class="codehilite"><pre class="insert-before">  Value* stackTop;
</pre><div class="source-file"><em>vm.h</em><br>
in struct <em>VM</em></div>
<pre class="insert">  <span class="t">Obj</span>* <span class="i">objects</span>;
</pre><pre class="insert-after">} VM;
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, in struct <em>VM</em></div>

<p>When we first initialize the VM, there are no allocated objects.</p>
<div class="codehilite"><pre class="insert-before">  resetStack();
</pre><div class="source-file"><em>vm.c</em><br>
in <em>initVM</em>()</div>
<pre class="insert">  <span class="i">vm</span>.<span class="i">objects</span> = <span class="a">NULL</span>;
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>initVM</em>()</div>

<p>Every time we allocate an Obj, we insert it in the list.</p>
<div class="codehilite"><pre class="insert-before">  object-&gt;type = type;
</pre><div class="source-file"><em>object.c</em><br>
in <em>allocateObject</em>()</div>
<pre class="insert">

  <span class="i">object</span>-&gt;<span class="i">next</span> = <span class="i">vm</span>.<span class="i">objects</span>;
  <span class="i">vm</span>.<span class="i">objects</span> = <span class="i">object</span>;
</pre><pre class="insert-after">  return object;
</pre></div>
<div class="source-file-narrow"><em>object.c</em>, in <em>allocateObject</em>()</div>

<p>Since this is a singly linked list, the easiest place to insert it is as the
head. That way, we don&rsquo;t need to also store a pointer to the tail and keep it
updated.</p>
<p>The &ldquo;object&rdquo; module is directly using the global <code>vm</code> variable from the &ldquo;vm&rdquo;
module, so we need to expose that externally.</p>
<div class="codehilite"><pre class="insert-before">} InterpretResult;

</pre><div class="source-file"><em>vm.h</em><br>
add after enum <em>InterpretResult</em></div>
<pre class="insert"><span class="k">extern</span> <span class="a">VM</span> <span class="i">vm</span>;

</pre><pre class="insert-after">void initVM();
</pre></div>
<div class="source-file-narrow"><em>vm.h</em>, add after enum <em>InterpretResult</em></div>

<p>Eventually, the garbage collector will free memory while the VM is still
running. But, even then, there will usually be unused objects still lingering in
memory when the user&rsquo;s program completes. The VM should free those too.</p>
<p>There&rsquo;s no sophisticated logic for that. Once the program is done, we can free
<em>every</em> object. We can and should implement that now.</p>
<div class="codehilite"><pre class="insert-before">void freeVM() {
</pre><div class="source-file"><em>vm.c</em><br>
in <em>freeVM</em>()</div>
<pre class="insert">  <span class="i">freeObjects</span>();
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>freeVM</em>()</div>

<p>That empty function we defined <a href="a-virtual-machine.html#an-instruction-execution-machine">way back when</a> finally does something! It
calls this:</p>
<div class="codehilite"><pre class="insert-before">void* reallocate(void* pointer, size_t oldSize, size_t newSize);
</pre><div class="source-file"><em>memory.h</em><br>
add after <em>reallocate</em>()</div>
<pre class="insert"><span class="t">void</span> <span class="i">freeObjects</span>();
</pre><pre class="insert-after">

#endif
</pre></div>
<div class="source-file-narrow"><em>memory.h</em>, add after <em>reallocate</em>()</div>

<p>Here&rsquo;s how we free the objects:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>reallocate</em>()</div>
<pre><span class="t">void</span> <span class="i">freeObjects</span>() {
  <span class="t">Obj</span>* <span class="i">object</span> = <span class="i">vm</span>.<span class="i">objects</span>;
  <span class="k">while</span> (<span class="i">object</span> != <span class="a">NULL</span>) {
    <span class="t">Obj</span>* <span class="i">next</span> = <span class="i">object</span>-&gt;<span class="i">next</span>;
    <span class="i">freeObject</span>(<span class="i">object</span>);
    <span class="i">object</span> = <span class="i">next</span>;
  }
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>reallocate</em>()</div>

<p>This is a CS 101 textbook implementation of walking a linked list and freeing
its nodes. For each node, we call:</p>
<div class="codehilite"><div class="source-file"><em>memory.c</em><br>
add after <em>reallocate</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">freeObject</span>(<span class="t">Obj</span>* <span class="i">object</span>) {
  <span class="k">switch</span> (<span class="i">object</span>-&gt;<span class="i">type</span>) {
    <span class="k">case</span> <span class="a">OBJ_STRING</span>: {
      <span class="t">ObjString</span>* <span class="i">string</span> = (<span class="t">ObjString</span>*)<span class="i">object</span>;
      <span class="a">FREE_ARRAY</span>(<span class="t">char</span>, <span class="i">string</span>-&gt;<span class="i">chars</span>, <span class="i">string</span>-&gt;<span class="i">length</span> + <span class="n">1</span>);
      <span class="a">FREE</span>(<span class="t">ObjString</span>, <span class="i">object</span>);
      <span class="k">break</span>;
    }
  }
}
</pre></div>
<div class="source-file-narrow"><em>memory.c</em>, add after <em>reallocate</em>()</div>

<p>We aren&rsquo;t only freeing the Obj itself. Since some object types also allocate
other memory that they own, we also need a little type-specific code to handle
each object type&rsquo;s special needs. Here, that means we free the character array
and then free the ObjString. Those both use one last memory management macro.</p>
<div class="codehilite"><pre class="insert-before">    (type*)reallocate(NULL, 0, sizeof(type) * (count))
</pre><div class="source-file"><em>memory.h</em></div>
<pre class="insert">

<span class="a">#define FREE(type, pointer) reallocate(pointer, sizeof(type), 0)</span>
</pre><pre class="insert-after">

#define GROW_CAPACITY(capacity) \
</pre></div>
<div class="source-file-narrow"><em>memory.h</em></div>

<p>It&rsquo;s a tiny <span name="free">wrapper</span> around <code>reallocate()</code> that
&ldquo;resizes&rdquo; an allocation down to zero bytes.</p>
<aside name="free">
<p>Using <code>reallocate()</code> to free memory might seem pointless. Why not just call
<code>free()</code>? Later, this will help the VM track how much memory is still being
used. If all allocation and freeing goes through <code>reallocate()</code>, it&rsquo;s easy to
keep a running count of the number of bytes of allocated memory.</p>
</aside>
<p>As usual, we need an include to wire everything together.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;
</pre><div class="source-file"><em>memory.h</em></div>
<pre class="insert"><span class="a">#include &quot;object.h&quot;</span>
</pre><pre class="insert-after">

#define ALLOCATE(type, count) \
</pre></div>
<div class="source-file-narrow"><em>memory.h</em></div>

<p>Then in the implementation file:</p>
<div class="codehilite"><pre class="insert-before">#include &quot;memory.h&quot;
</pre><div class="source-file"><em>memory.c</em></div>
<pre class="insert"><span class="a">#include &quot;vm.h&quot;</span>
</pre><pre class="insert-after">

void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
</pre></div>
<div class="source-file-narrow"><em>memory.c</em></div>

<p>With this, our VM no longer leaks memory. Like a good C program, it cleans up
its mess before exiting. But it doesn&rsquo;t free any objects while the VM is
running. Later, when it&rsquo;s possible to write longer-running Lox programs, the VM
will eat more and more memory as it goes, not relinquishing a single byte until
the entire program is done.</p>
<p>We won&rsquo;t address that until we&rsquo;ve added <a href="garbage-collection.html">a real garbage collector</a>, but this
is a big step. We now have the infrastructure to support a variety of different
kinds of dynamically allocated objects. And we&rsquo;ve used that to add strings to
clox, one of the most used types in most programming languages. Strings in turn
enable us to build another fundamental data type, especially in dynamic
languages: the venerable <a href="hash-tables.html">hash table</a>. But that&rsquo;s for the next chapter<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Each string requires two separate dynamic allocations<span class="em">&mdash;</span>one for the
ObjString and a second for the character array. Accessing the characters
from a value requires two pointer indirections, which can be bad for
performance. A more efficient solution relies on a technique called
<strong><a href="https://en.wikipedia.org/wiki/Flexible_array_member">flexible array members</a></strong>. Use that to store the ObjString and its
character array in a single contiguous allocation.</p>
</li>
<li>
<p>When we create the ObjString for each string literal, we copy the characters
onto the heap. That way, when the string is later freed, we know it is safe
to free the characters too.</p>
<p>This is a simpler approach but wastes some memory, which might be a problem
on very constrained devices. Instead, we could keep track of which
ObjStrings own their character array and which are &ldquo;constant strings&rdquo; that
just point back to the original source string or some other non-freeable
location. Add support for this.</p>
</li>
<li>
<p>If Lox was your language, what would you have it do when a user tries to use
<code>+</code> with one string operand and the other some other type? Justify your
choice. What do other languages do?</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: String Encoding</a></h2>
<p>In this book, I try not to shy away from the gnarly problems you&rsquo;ll run into in
a real language implementation. We might not always use the most <em>sophisticated</em>
solution<span class="em">&mdash;</span>it&rsquo;s an intro book after all<span class="em">&mdash;</span>but I don&rsquo;t think it&rsquo;s honest to
pretend the problem doesn&rsquo;t exist at all. However, I did skirt around one really
nasty conundrum: deciding how to represent strings.</p>
<p>There are two facets to a string encoding:</p>
<ul>
<li>
<p><strong>What is a single &ldquo;character&rdquo; in a string?</strong> How many different values are
there and what do they represent? The first widely adopted standard answer
to this was <a href="https://en.wikipedia.org/wiki/ASCII">ASCII</a>. It gave you 127 different character values and
specified what they were. It was great<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>if you only ever cared about
English. While it has weird, mostly forgotten characters like &ldquo;record
separator&rdquo; and &ldquo;synchronous idle&rdquo;, it doesn&rsquo;t have a single umlaut, acute,
or grave. It can&rsquo;t represent &ldquo;jalapeño&rdquo;, &ldquo;naïve&rdquo;, <span
name="gruyere">&ldquo;Gruyère&rdquo;</span>, or &ldquo;Mötley Crüe&rdquo;.</p>
<aside name="gruyere">
<p>It goes without saying that a language that does not let one discuss Gruyère
or Mötley Crüe is a language not worth using.</p>
</aside>
<p>Next came <a href="https://en.wikipedia.org/wiki/Unicode">Unicode</a>. Initially, it supported 16,384 different characters
(<strong>code points</strong>), which fit nicely in 16 bits with a couple of bits to
spare. Later that grew and grew, and now there are well over 100,000
different code points including such vital instruments of human
communication as 💩 (Unicode Character &lsquo;PILE OF POO&rsquo;, <code>U+1F4A9</code>).</p>
<p>Even that long list of code points is not enough to represent each possible
visible glyph a language might support. To handle that, Unicode also has
<strong>combining characters</strong> that modify a preceding code point. For example,
&ldquo;a&rdquo; followed by the combining character &ldquo;¨&rdquo; gives you &ldquo;ä&rdquo;. (To make things
more confusing Unicode <em>also</em> has a single code point that looks like &ldquo;ä&rdquo;.)</p>
<p>If a user accesses the fourth &ldquo;character&rdquo; in &ldquo;naïve&rdquo;, do they expect to get
back &ldquo;v&rdquo; or &ldquo;¨&rdquo;? The former means they are thinking of each code
point and its combining character as a single unit<span class="em">&mdash;</span>what Unicode calls an
<strong>extended grapheme cluster</strong><span class="em">&mdash;</span>the latter means they are thinking in
individual code points. Which do your users expect?</p>
</li>
<li>
<p><strong>How is a single unit represented in memory?</strong> Most systems using ASCII
gave a single byte to each character and left the high bit unused. Unicode
has a handful of common encodings. UTF-16 packs most code points into 16
bits. That was great when every code point fit in that size. When that
overflowed, they added <em>surrogate pairs</em> that use multiple 16-bit code units
to represent a single code point. UTF-32 is the next evolution of
UTF-16<span class="em">&mdash;</span>it gives a full 32 bits to each and every code point.</p>
<p>UTF-8 is more complex than either of those. It uses a variable number of
bytes to encode a code point. Lower-valued code points fit in fewer bytes.
Since each character may occupy a different number of bytes, you can&rsquo;t
directly index into the string to find a specific code point. If you want,
say, the 10th code point, you don&rsquo;t know how many bytes into the string that
is without walking and decoding all of the preceding ones.</p>
</li>
</ul>
<p>Choosing a character representation and encoding involves fundamental
trade-offs. Like many things in engineering, there&rsquo;s no <span
name="python">perfect</span> solution:</p>
<aside name="python">
<p>An example of how difficult this problem is comes from Python. The achingly long
transition from Python 2 to 3 is painful mostly because of its changes around
string encoding.</p>
</aside>
<ul>
<li>
<p>ASCII is memory efficient and fast, but it kicks non-Latin languages to the
side.</p>
</li>
<li>
<p>UTF-32 is fast and supports the whole Unicode range, but wastes a lot of
memory given that most code points do tend to be in the lower range of
values, where a full 32 bits aren&rsquo;t needed.</p>
</li>
<li>
<p>UTF-8 is memory efficient and supports the whole Unicode range, but its
variable-length encoding makes it slow to access arbitrary code points.</p>
</li>
<li>
<p>UTF-16 is worse than all of them<span class="em">&mdash;</span>an ugly consequence of Unicode
outgrowing its earlier 16-bit range. It&rsquo;s less memory efficient than UTF-8
but is still a variable-length encoding thanks to surrogate pairs. Avoid it
if you can. Alas, if your language needs to run on or interoperate with the
browser, the JVM, or the CLR, you might be stuck with it, since those all
use UTF-16 for their strings and you don&rsquo;t want to have to convert every
time you pass a string to the underlying system.</p>
</li>
</ul>
<p>One option is to take the maximal approach and do the &ldquo;rightest&rdquo; thing. Support
all the Unicode code points. Internally, select an encoding for each string
based on its contents<span class="em">&mdash;</span>use ASCII if every code point fits in a byte, UTF-16 if
there are no surrogate pairs, etc. Provide APIs to let users iterate over both
code points and extended grapheme clusters.</p>
<p>This covers all your bases but is really complex. It&rsquo;s a lot to implement,
debug, and optimize. When serializing strings or interoperating with other
systems, you have to deal with all of the encodings. Users need to understand
the two indexing APIs and know which to use when. This is the approach that
newer, big languages tend to take<span class="em">&mdash;</span>like Raku and Swift.</p>
<p>A simpler compromise is to always encode using UTF-8 and only expose an API that
works with code points. For users that want to work with grapheme clusters, let
them use a third-party library for that. This is less Latin-centric than ASCII
but not much more complex. You lose fast direct indexing by code point, but you
can usually live without that or afford to make it <em>O(n)</em> instead of <em>O(1)</em>.</p>
<p>If I were designing a big workhorse language for people writing large
applications, I&rsquo;d probably go with the maximal approach. For my little embedded
scripting language <a href="http://wren.io">Wren</a>, I went with UTF-8 and code points.</p>
</div>

<footer>
<a href="hash-tables.html" class="next">
  Next Chapter: &ldquo;Hash Tables&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/style.css
================================================
@charset "UTF-8";
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-roman.woff") format("woff");
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-italic.woff") format("woff");
  font-style: italic;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-semibold.woff") format("woff");
  font-weight: 600;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-semibolditalic.woff") format("woff");
  font-style: italic;
  font-weight: 600;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-bold.woff") format("woff");
  font-weight: bold;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-bolditalic.woff") format("woff");
  font-style: italic;
  font-weight: bold;
}
body, h1, h2, h3, h4, p, blockquote, code, ul, ol, dl, dd, img {
  margin: 0;
}

img {
  outline: none;
}

img.arrow {
  width: auto;
  height: 11px;
}

img.dot {
  width: auto;
  height: 18px;
  vertical-align: text-bottom;
}

body {
  color: #222;
  font: normal 16px/24px "Crimson", Georgia, serif;
}

article.chapter h2 {
  font: 600 30px/24px "Crimson", Georgia, serif;
  margin: 69px 0 0 0;
  padding-bottom: 3px;
}
article.chapter h2 small {
  font: 800 22px/24px "Crimson", Georgia, serif;
  float: right;
}
article.chapter h3 {
  font: italic 24px/24px "Crimson", Georgia, serif;
  margin: 71px 0 0 0;
  padding-bottom: 1px;
}
article.chapter h3 small {
  font: 600 16px/24px "Crimson", Georgia, serif;
  float: right;
}
article.chapter h2 a, article.chapter h3 a {
  color: #222;
  border-bottom: none;
}
article.chapter h2 a:hover, article.chapter h3 a:hover {
  border-bottom: none;
  color: inherit;
}
article.chapter h2 a::before, article.chapter h3 a::before {
  position: absolute;
  left: -48px;
  width: 48px;
  content: "§";
  color: #fff;
  transition: color 0.2s ease;
  text-align: center;
}
article.chapter h2 a:hover::before, article.chapter h3 a:hover::before {
  color: #ddd;
}
article.chapter .challenges, article.chapter .design-note {
  border-radius: 3px;
  padding: 12px;
  margin: -2px -12px 26px -12px;
  font: normal 16px/24px "Source Sans Pro", sans-serif;
  color: #444;
}
article.chapter .challenges h2, article.chapter .design-note h2 {
  margin: 0 0 -12px 0;
  padding: 0;
  font: 600 16px/24px "Source Sans Pro", sans-serif;
  text-transform: uppercase;
  letter-spacing: 1px;
}
article.chapter .challenges h2 a, article.chapter .design-note h2 a {
  color: inherit;
}
article.chapter .challenges h2 a::before, article.chapter .design-note h2 a::before {
  content: none;
}
article.chapter .challenges ol, article.chapter .design-note ol {
  padding: 0 0 0 18px;
}
article.chapter .challenges ol li, article.chapter .design-note ol li {
  padding: 0 0 0 6px;
  font-weight: 600;
}
article.chapter .challenges ol li p, article.chapter .design-note ol li p {
  font-weight: 400;
}
article.chapter .challenges pre, article.chapter .design-note pre {
  margin: 0;
}
article.chapter .challenges > blockquote p, article.chapter .design-note > blockquote p {
  margin: 0 24px;
  font: italic 16px/24px "Source Sans Pro", sans-serif;
  color: #444;
}
article.chapter .challenges > blockquote::before, article.chapter .challenges > blockquote::after, article.chapter .design-note > blockquote::before, article.chapter .design-note > blockquote::after {
  content: none;
}
article.chapter .challenges aside code, article.chapter .challenges aside .codehilite, article.chapter .design-note aside code, article.chapter .design-note aside .codehilite {
  color: #595959;
  background: #faf8f5;
}
article.chapter .challenges *:last-child, article.chapter .design-note *:last-child {
  margin-bottom: 0;
}
article.chapter .challenges .codehilite,
article.chapter .design-note .codehilite {
  margin: -12px 0 -12px 0;
}
article.chapter .challenges {
  background: #eef4f7;
}
article.chapter .challenges code, article.chapter .challenges .codehilite {
  background: #e4eef1;
}
article.chapter .design-note {
  background: #f6f8f2;
}
article.chapter .design-note code, article.chapter .design-note .codehilite {
  background: #eef1ea;
}
article.chapter table {
  width: 100%;
  border-collapse: collapse;
}
article.chapter table thead {
  font: 700 15px "Crimson", Georgia, serif;
}
article.chapter table td {
  border-bottom: solid 1px #dee9ed;
  line-height: 22px;
  padding: 3px 0 0 0;
  margin: 0;
}
article.chapter table td + td {
  padding-left: 12px;
}

@media only screen and (max-width: 960px) {
  article.chapter .challenges aside, article.chapter .design-note aside {
    font: normal 15px/24px "Source Sans Pro", sans-serif;
    padding-bottom: 4px;
  }
  article.chapter .challenges aside code, article.chapter .challenges aside .codehilite {
    background: #e4eef1;
  }
  article.chapter .design-note aside code, article.chapter .design-note aside .codehilite {
    background: #eef1ea;
  }
}
@media only screen and (max-width: 630px) {
  article.chapter h2 a::before, article.chapter h3 a::before {
    left: -24px;
    width: 24px;
  }
}
@media only screen and (max-width: 580px) {
  article.chapter h2 {
    margin-top: 64px;
    padding-bottom: 2px;
    font-size: 22px;
    line-height: 22px;
  }
  article.chapter h3 {
    margin-top: 64px;
    padding-bottom: 0;
    font-size: 20px;
  }
  article.chapter .challenges, article.chapter .design-note {
    padding: 11px 11px 8px 11px;
    margin: 25px 0 0 0;
    font-size: 15px;
    line-height: 22px;
  }
  article.chapter .challenges code, article.chapter .challenges .codehilite, article.chapter .design-note code, article.chapter .design-note .codehilite {
    font-size: 14px;
  }
  article.chapter .challenges h2, article.chapter .design-note h2 {
    padding: 5px 0 4px 6px;
    font-size: 17px;
    line-height: 22px;
  }
  article.chapter .challenges aside, article.chapter .design-note aside {
    line-height: 22px;
  }
}
article.contents h2 {
  margin: 22px 0 6px 0;
  font: 600 normal 18px/24px "Source Sans Pro", sans-serif;
  text-transform: uppercase;
  letter-spacing: 1px;
}
article.contents h2 .num {
  display: inline-block;
  width: 36px;
}
article.contents ul {
  margin: -12px 0 0 0;
  padding: 6px 0 14px 0;
}
article.contents li {
  padding: 12px 0 0 36px;
  font: normal 16px/24px "Source Sans Pro", sans-serif;
  color: #7aa0b8;
  list-style-type: none;
}
article.contents li .num {
  display: inline-block;
  letter-spacing: 1px;
  width: 36px;
}
article.contents li a {
  font: 600 17px/24px "Source Sans Pro", sans-serif;
}
article.contents li.design-note {
  padding-top: 0;
}
article.contents li.design-note a {
  font: 400 16px/23px "Source Sans Pro", sans-serif;
}
article.contents .chapters {
  display: table;
  width: 864px;
}
article.contents .row {
  display: table-row;
}
article.contents .first, article.contents .second {
  display: table-cell;
  vertical-align: top;
}
article.contents .second {
  padding-left: 48px;
}
article.contents footer {
  width: 864px;
}

@media only screen and (max-width: 1344px) {
  article.contents .chapters, article.contents .row, article.contents .first, article.contents .second {
    display: block;
    width: auto;
  }
  article.contents .second {
    padding-left: 0;
  }
  article.contents footer {
    width: inherit;
  }
}
@media only screen and (max-width: 630px) {
  article.contents h2 .num, article.contents li .num {
    width: 28px;
  }
  article.contents ol, article.contents ul {
    margin-left: 0;
  }
  article.contents li {
    padding-left: 0;
  }
}
@media only screen and (max-width: 580px) {
  article.contents h2 {
    margin: 19px 0 6px 0;
    font-size: 17px;
    line-height: 22px;
  }
  article.contents h3 {
    padding: 1px 0 2px 0;
    font-size: 17px;
    line-height: 22px;
  }
  article.contents p {
    font-size: 15px;
    line-height: 22px;
  }
  article.contents ol, article.contents ul {
    padding-bottom: 8px;
  }
  article.contents li {
    font-size: 14px;
    line-height: 22px;
    padding: 4px 0 3px 0;
  }
}
.sign-up {
  padding: 12px;
  margin: 24px 0 24px 0;
  background: #fcf6e8;
  color: #bf9540;
  border-radius: 3px;
}
.sign-up form {
  display: flex;
}
.sign-up input {
  padding: 4px;
  font: 16px "Source Sans Pro", sans-serif;
  outline: none;
  border-radius: 3px;
  border: solid 2px #ffd580;
  color: #825e17;
  height: 32px;
}
.sign-up input.email {
  display: block;
  box-sizing: border-box;
  width: 100%;
}
.sign-up input.button {
  margin-left: 8px;
  padding: 4px 8px;
  font: 600 13px "Source Sans Pro", sans-serif;
  text-transform: uppercase;
  letter-spacing: 1px;
  background: #ffbb33;
  border: none;
  transition: background-color 0.2s ease;
}
.sign-up input.button:hover {
  background: #ffd580;
}
.sign-up input:focus {
  border-color: #ffaa00;
}

@font-face {
  font-family: "Crimson";
  src: url("font/crimson-roman.woff") format("woff");
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-italic.woff") format("woff");
  font-style: italic;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-semibold.woff") format("woff");
  font-weight: 600;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-semibolditalic.woff") format("woff");
  font-style: italic;
  font-weight: 600;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-bold.woff") format("woff");
  font-weight: bold;
}
@font-face {
  font-family: "Crimson";
  src: url("font/crimson-bolditalic.woff") format("woff");
  font-style: italic;
  font-weight: bold;
}
body, h1, h2, h3, h4, p, blockquote, code, ul, ol, dl, dd, img {
  margin: 0;
}

img {
  outline: none;
}

img.arrow {
  width: auto;
  height: 11px;
}

img.dot {
  width: auto;
  height: 18px;
  vertical-align: text-bottom;
}

body {
  color: #222;
  font: normal 16px/24px "Crimson", Georgia, serif;
}

@media print {
  body, a, code {
    color: #000 !important;
    background: none !important;
  }

  nav, .sign-up {
    display: none;
  }

  .page {
    margin: 0 !important;
  }

  .codehilite {
    margin: 0 !important;
    background: none !important;
    border-radius: 0 !important;
    border-left: solid 1px #dad8d6;
    border-right: solid 1px #dad8d6;
  }
  .codehilite pre {
    color: #000 !important;
  }
  .codehilite .insert {
    border-left: solid 3px #dad8d6 !important;
    border-right: solid 3px #dad8d6 !important;
    background: none !important;
  }
  .codehilite .delete {
    -webkit-print-color-adjust: exact;
    color-adjust: exact;
  }
  .codehilite .insert-before span, .codehilite .insert-after span {
    -webkit-print-color-adjust: exact;
    color-adjust: exact;
  }
}
.emdash {
  white-space: nowrap;
}

.scrim {
  position: absolute;
  width: 100%;
  height: 10000px;
  z-index: 4;
  background: url("rows.png");
}

.small-caps {
  font-weight: 600;
  font-size: 13px;
}

a {
  color: #1481b8;
  text-decoration: none;
  border-bottom: solid 1px rgba(222, 233, 237, 0);
  transition: color 0.2s ease, border-color 0.4s ease;
}

a:hover {
  color: #1481b8;
  border-bottom: solid 1px #dee9ed;
}

nav {
  font: 300 15px/24px "Source Sans Pro", sans-serif;
  background: #29313d;
  color: #4b6781;
}
nav a, nav h2 a {
  color: #7aa0b8;
  text-decoration: none;
  border-bottom: none;
}
nav a:hover {
  color: #dee9ed;
  text-decoration: none;
  border-bottom: none;
}
nav img {
  box-sizing: border-box;
  width: 100%;
  padding: 55px 48px 23px 48px;
}
nav h2 {
  font: 400 16px/24px "Source Sans Pro", sans-serif;
  text-transform: uppercase;
  letter-spacing: 1px;
  color: #7aa0b8;
}
nav h3 {
  font: 400 18px/24px "Source Sans Pro", sans-serif;
  color: #7aa0b8;
}
nav h2 small, nav h3 small {
  float: right;
  font-size: 16px;
  color: #4b6781;
}
nav ol, nav ul {
  margin: 6px 0 3px 0;
  padding: 6px 0 4px 24px;
  border-top: solid 1px #3b4b5e;
  border-bottom: solid 1px #3b4b5e;
}
nav ul {
  list-style-type: none;
  padding-left: 0;
}
nav hr {
  border: none;
  border-top: solid 1px #3b4b5e;
  margin: 6px 0 0 0;
  padding: 0 0 3px 0;
}
nav li small {
  float: right;
  font-size: 14px;
  color: #4b6781;
}
nav li.divider {
  margin: 5px 0 7px 0;
  border-top: solid 1px #3b4b5e;
}
nav li.end-part {
  font-size: 12px;
  font-weight: 400;
  text-transform: uppercase;
  letter-spacing: 1px;
}
nav li.end-part small {
  font-weight: 300;
  text-transform: none;
  letter-spacing: 0;
}
nav .prev-next {
  padding-top: 7px;
  font: 400 12px/18px "Source Sans Pro", sans-serif;
  text-align: center;
  text-transform: uppercase;
  letter-spacing: 1px;
}

nav.wide {
  position: fixed;
  width: 336px;
  height: 100%;
}
nav.wide .contents {
  margin: 24px 48px;
}

.nav-wrapper {
  position: absolute;
  right: 288px;
}

nav.floating {
  display: none;
  z-index: 2;
  position: absolute;
  width: 288px;
  border-bottom-left-radius: 3px;
  border-bottom-right-radius: 3px;
}
nav.floating #expand-nav {
  padding: 0 0 4px 0;
  display: block;
  font-size: 20px;
  text-align: center;
  color: #4b6781;
  cursor: pointer;
  transition: padding 0.2s ease, margin 0.2s ease, color 0.2s ease;
}
nav.floating #expand-nav, nav.floating #expand-nav:hover {
  border-bottom: none;
}
nav.floating #expand-nav:hover {
  color: #dee9ed;
}
nav.floating .expandable {
  overflow: hidden;
  padding: 0 12px;
  max-height: 0;
  transition: margin 0.2s ease, max-height 1s ease;
}
nav.floating .expandable .prev-next {
  padding-bottom: 6px;
}
nav.floating .expandable.shown {
  max-height: 550px;
}
nav.floating img {
  padding: 110px 24px 23px 24px;
}

nav.floating.pinned {
  position: fixed;
  top: -85px;
}
nav.floating.pinned .expandable {
  margin-top: -13px;
}
nav.floating.pinned #expand-nav {
  margin-top: -14px;
}

nav.narrow {
  display: none;
  text-align: center;
}
nav.narrow img {
  box-sizing: content-box;
  padding: 11px 0 3px 0;
  width: auto;
  height: 27px;
}
nav.narrow .prev, nav.narrow .next {
  font-size: 32px;
  position: absolute;
  top: 12px;
  padding: 0 48px;
}
nav.narrow .prev {
  left: 0;
}
nav.narrow .next {
  right: 0;
}

.left {
  float: left;
}

.right {
  float: right;
}

.page {
  position: relative;
  width: 912px;
  margin: 0 auto 0 384px;
}

.em {
  padding: 0 0.1em;
  white-space: nowrap;
}

.ellipse {
  white-space: nowrap;
}

code {
  font: normal 16px "Source Code Pro", Menlo, Consolas, Monaco, monospace;
  color: #717170;
  white-space: nowrap;
  padding: 2px;
}

strong code {
  font-weight: bold;
  color: inherit;
}

a code {
  color: #1481b8;
}

.codehilite {
  color: #595959;
  background: #faf8f5;
  border-radius: 3px;
  padding: 12px;
  margin: -12px;
}

pre {
  font: normal 13px/20px "Source Code Pro", Menlo, Consolas, Monaco, monospace;
  margin: 0;
  padding: 0;
  white-space: pre-wrap;
  overflow-wrap: anywhere;
}

div.codehilite + div.challenges {
  margin-top: 24px;
}

article {
  position: relative;
  width: 576px;
}
article h1 {
  position: relative;
  font: 48px/48px "Crimson", Georgia, serif;
  padding: 109px 0 19px 0;
  z-index: 2;
}
article h1.part {
  font: 600 36px/48px "Source Sans Pro", sans-serif;
  padding: 108px 0 20px 0;
  text-transform: uppercase;
  letter-spacing: 1px;
}
article .number {
  position: absolute;
  top: 50px;
  left: 624px;
  z-index: 1;
  font: 300 96px "Source Sans Pro", sans-serif;
  color: #dee9ed;
}
article p {
  margin: 24px 0;
}
article ol, article ul {
  margin: 24px 0;
  padding: 0 0 0 24px;
}
article img {
  max-width: 100%;
}
article img.wide {
  max-width: none;
  width: 912px;
}

aside {
  position: absolute;
  right: -336px;
  width: 288px;
  font: normal 14px/20px "Crimson", Georgia, serif;
  border-top: solid 1px #dee9ed;
}
aside p {
  margin: 20px 0;
}
aside p:first-child,
aside img:first-child {
  margin-top: 4px;
}
aside p:last-child {
  margin-bottom: 4px;
}
aside code {
  font-size: 14px;
  border-radius: 2px;
  padding: 1px 2px;
}
aside .codehilite {
  padding: 6px;
  margin: -12px 0;
}
aside .codehilite:last-child {
  margin-bottom: 4px;
}
aside img.above {
  position: absolute;
  bottom: 100%;
  margin-bottom: 16px;
}
aside blockquote {
  margin: 20px 0;
}
aside blockquote::before, aside blockquote::after {
  content: none;
}
aside blockquote p {
  margin: 0 12px;
  font: italic 15px/20px "Crimson", Georgia, serif;
  color: inherit;
}

aside.bottom {
  border-top: none;
  border-bottom: solid 1px #dee9ed;
}

blockquote {
  position: relative;
  margin: 29px 0 31px 0;
}
blockquote::before, blockquote::after {
  position: absolute;
  top: -20px;
  font: italic 72px "Crimson", Georgia, serif;
  color: #dee9ed;
}
blockquote::before {
  content: "“";
  left: -7px;
}
blockquote::after {
  content: "”";
  right: 8px;
}
blockquote p {
  margin: 0 48px;
  font: italic 24px/36px "Crimson", Georgia, serif;
  color: #5985a6;
}
blockquote p em {
  font-style: normal;
}
blockquote cite {
  display: block;
  text-align: right;
  color: #7aa0b8;
  font-style: normal;
  font-size: 18px;
}
blockquote cite::before {
  content: "— ";
  color: #dee9ed;
}
blockquote cite em {
  font-style: italic;
}

footer {
  position: relative;
  border-top: solid 1px #dee9ed;
  color: #7aa0b8;
  font: 400 15px "Source Sans Pro", sans-serif;
  text-align: center;
  margin: 48px 0;
  padding-top: 48px;
}
footer a, footer a:hover {
  border: none;
}
footer .next {
  position: absolute;
  right: 0;
  top: -13px;
  padding-left: 4px;
  background: #fff;
  font: 400 17px/24px "Source Sans Pro", sans-serif;
  text-transform: uppercase;
  letter-spacing: 1px;
}
footer .next:hover {
  color: #004466;
  border: none;
}

.dedication {
  margin: 96px 0 128px 0;
  text-align: center;
}
.dedication img {
  width: 50%;
}

.source-file, .source-file-narrow {
  font: normal 11px/16px "Source Code Pro", Menlo, Consolas, Monaco, monospace;
  color: #bab8b7;
}
.source-file em, .source-file-narrow em {
  color: #999997;
  font-style: normal;
}

.source-file-narrow {
  display: none;
  margin: 0px -12px 0 0;
  padding: 14px 0 0 0;
  text-align: right;
}

.source-file {
  position: absolute;
  right: -336px;
  width: 288px;
  padding: 2px 0 0 0;
}
.source-file::before {
  content: "<<";
  color: #dad8d6;
  position: absolute;
  left: -36px;
  width: 36px;
  text-align: center;
}

.codehilite pre {
  color: #797978;
}
.codehilite .k {
  color: #0099e6;
}
.codehilite .n {
  color: #dd713c;
}
.codehilite .s {
  color: #c38e22;
}
.codehilite .e {
  color: #e8ba30;
}
.codehilite .c {
  color: #aaa9a7;
}
.codehilite .a {
  color: #9966cc;
}
.codehilite .i {
  color: #1b6e98;
}
.codehilite .t {
  color: #00a4b3;
}
.codehilite .insert {
  margin: -2px -12px;
  padding: 2px 10px;
  border-left: solid 2px #dad8d6;
  border-right: solid 2px #dad8d6;
  background: #f5f3f0;
}
.codehilite .delete {
  margin: -2px -12px;
  padding: 2px 10px;
  border-left: solid 2px #dad8d6;
  border-right: solid 2px #dad8d6;
  background: repeating-linear-gradient(-45deg, #dad8d6, #dad8d6 1px, rgba(0, 0, 0, 0) 1px, rgba(0, 0, 0, 0) 6px);
}
.codehilite .delete span {
  color: #bab8b7;
}
.codehilite .insert-before, .codehilite .insert-after {
  color: #bab8b7;
}
.codehilite .insert-before .insert-comma {
  margin: -2px -1px;
  padding: 2px 1px;
  border-radius: 2px;
  background: #f5f3f0;
  color: #595959;
}

@media only screen and (max-width: 1344px) {
  nav.wide {
    display: none;
  }

  nav.floating {
    display: block;
  }

  body {
    margin: 0 24px;
  }

  .page {
    position: relative;
    width: inherit;
    max-width: 912px;
    margin: 0 auto;
  }

  article {
    width: inherit;
    margin-right: 336px;
  }
  article .number {
    top: 73px;
    left: inherit;
    right: 0;
    font-size: 72px;
  }
  article h1 {
    padding: 110px 0 18px 0;
    font-size: 44px;
  }
}
@media only screen and (max-width: 960px) {
  body {
    margin: 0;
  }

  nav.floating {
    display: none;
  }

  nav.narrow {
    display: block;
  }

  .page {
    margin: 0 48px;
    width: inherit;
  }

  article {
    margin: 0;
  }
  article img.wide {
    width: inherit;
    max-width: 100%;
  }

  aside {
    position: inherit;
    right: inherit;
    width: inherit;
    border-bottom: solid 1px #dee9ed;
  }
  aside p:first-child {
    margin-top: 8px;
  }
  aside p:last-child {
    margin-bottom: 8px;
  }
  aside div.codehilite:last-child {
    margin-bottom: 12px;
  }
  aside img {
    display: block;
    max-width: 288px;
    margin: 0 auto;
  }
  aside img.above {
    position: relative;
  }

  aside + div.codehilite {
    margin-top: 12px;
  }

  div.codehilite + aside {
    margin-top: 24px;
  }

  .source-file {
    display: none;
  }

  .source-file-narrow {
    display: block;
  }
}
@media only screen and (max-width: 630px) {
  .page {
    margin: 0 24px;
    width: inherit;
  }

  nav.narrow .prev, nav.narrow .next {
    padding: 0 24px;
  }
}
@media only screen and (max-width: 580px) {
  body {
    font-size: 15px;
    line-height: 22px;
  }

  .small-caps {
    font-size: 12px;
  }

  .scrim {
    background: url("rows-22.png");
  }

  nav.narrow img {
    padding: 9px 0 1px 0;
    height: 27px;
  }
  nav.narrow .prev, nav.narrow .next {
    top: 11px;
  }

  article h1 {
    font-size: 36px;
    padding: 100px 0 14px 0;
  }
  article h1.part {
    font-size: 30px;
    padding: 97px 0 17px 0;
  }
  article .number {
    top: 61px;
    font-size: 72px;
  }
  article p {
    margin: 22px 0;
  }
  article ol, article ul {
    margin: 22px 0;
    padding: 0 0 0 22px;
  }

  blockquote {
    margin: 27px 0 28px 0;
  }
  blockquote::before, blockquote::after {
    top: -17px;
    font-size: 52px;
  }
  blockquote p {
    margin: 0 22px;
    font-size: 20px;
    line-height: 33px;
  }

  footer .next {
    font-size: 15px;
  }
}

================================================
FILE: site/superclasses.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Superclasses &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Superclasses<small>29</small></a></h3>

<ul>
    <li><a href="#inheriting-methods"><small>29.1</small> Inheriting Methods</a></li>
    <li><a href="#storing-superclasses"><small>29.2</small> Storing Superclasses</a></li>
    <li><a href="#super-calls"><small>29.3</small> Super Calls</a></li>
    <li><a href="#a-complete-virtual-machine"><small>29.4</small> A Complete Virtual Machine</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="methods-and-initializers.html" title="Methods and Initializers" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="optimization.html" title="Optimization" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="methods-and-initializers.html" title="Methods and Initializers" class="prev">←</a>
<a href="optimization.html" title="Optimization" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Superclasses<small>29</small></a></h3>

<ul>
    <li><a href="#inheriting-methods"><small>29.1</small> Inheriting Methods</a></li>
    <li><a href="#storing-superclasses"><small>29.2</small> Storing Superclasses</a></li>
    <li><a href="#super-calls"><small>29.3</small> Super Calls</a></li>
    <li><a href="#a-complete-virtual-machine"><small>29.4</small> A Complete Virtual Machine</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="methods-and-initializers.html" title="Methods and Initializers" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="optimization.html" title="Optimization" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">29</div>
  <h1>Superclasses</h1>

<blockquote>
<p>You can choose your friends but you sho&rsquo; can&rsquo;t choose your family, an&rsquo; they&rsquo;re
still kin to you no matter whether you acknowledge &rsquo;em or not, and it
makes you look right silly when you don&rsquo;t.</p>
<p><cite>Harper Lee, <em>To Kill a Mockingbird</em></cite></p>
</blockquote>
<p>This is the very last chapter where we add new functionality to our VM. We&rsquo;ve
packed almost the entire Lox language in there already. All that remains is
inheriting methods and calling superclass methods. We have <a href="optimization.html">another
chapter</a> after this one, but it introduces no new behavior. It
<span name="faster">only</span> makes existing stuff faster. Make it to the end
of this one, and you&rsquo;ll have a complete Lox implementation.</p>
<aside name="faster">
<p>That &ldquo;only&rdquo; should not imply that making stuff faster isn&rsquo;t important! After
all, the whole purpose of our entire second virtual machine is better
performance over jlox. You could argue that <em>all</em> of the past fifteen chapters
are &ldquo;optimization&rdquo;.</p>
</aside>
<p>Some of the material in this chapter will remind you of jlox. The way we resolve
super calls is pretty much the same, though viewed through clox&rsquo;s more complex
mechanism for storing state on the stack. But we have an entirely different,
much faster, way of handling inherited method calls this time around.</p>
<h2><a href="#inheriting-methods" id="inheriting-methods"><small>29&#8202;.&#8202;1</small>Inheriting Methods</a></h2>
<p>We&rsquo;ll kick things off with method inheritance since it&rsquo;s the simpler piece. To
refresh your memory, Lox inheritance syntax looks like this:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Dunk in the fryer.&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">Cruller</span> &lt; <span class="t">Doughnut</span> {
  <span class="i">finish</span>() {
    <span class="k">print</span> <span class="s">&quot;Glaze with icing.&quot;</span>;
  }
}
</pre></div>
<p>Here, the Cruller class inherits from Doughnut and thus, instances of Cruller
inherit the <code>cook()</code> method. I don&rsquo;t know why I&rsquo;m belaboring this. You know how
inheritance works. Let&rsquo;s start compiling the new syntax.</p>
<div class="codehilite"><pre class="insert-before">  currentClass = &amp;classCompiler;

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_LESS</span>)) {
    <span class="i">consume</span>(<span class="a">TOKEN_IDENTIFIER</span>, <span class="s">&quot;Expect superclass name.&quot;</span>);
    <span class="i">variable</span>(<span class="k">false</span>);
    <span class="i">namedVariable</span>(<span class="i">className</span>, <span class="k">false</span>);
    <span class="i">emitByte</span>(<span class="a">OP_INHERIT</span>);
  }

</pre><pre class="insert-after">  namedVariable(className, false);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>After we compile the class name, if the next token is a <code>&lt;</code>, then we found a
superclass clause. We consume the superclass&rsquo;s identifier token, then call
<code>variable()</code>. That function takes the previously consumed token, treats it as a
variable reference, and emits code to load the variable&rsquo;s value. In other words,
it looks up the superclass by name and pushes it onto the stack.</p>
<p>After that, we call <code>namedVariable()</code> to load the subclass doing the inheriting
onto the stack, followed by an <code>OP_INHERIT</code> instruction. That instruction
wires up the superclass to the new subclass. In the last chapter, we defined an
<code>OP_METHOD</code> instruction to mutate an existing class object by adding a method to
its method table. This is similar<span class="em">&mdash;</span>the <code>OP_INHERIT</code> instruction takes an
existing class and applies the effect of inheritance to it.</p>
<p>In the previous example, when the compiler works through this bit of syntax:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Cruller</span> &lt; <span class="t">Doughnut</span> {
</pre></div>
<p>The result is this bytecode:</p><img src="image/superclasses/inherit-stack.png" alt="The series of bytecode instructions for a Cruller class inheriting from Doughnut." />
<p>Before we implement the new <code>OP_INHERIT</code> instruction, we have an edge case to
detect.</p>
<div class="codehilite"><pre class="insert-before">    variable(false);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">

    <span class="k">if</span> (<span class="i">identifiersEqual</span>(&amp;<span class="i">className</span>, &amp;<span class="i">parser</span>.<span class="i">previous</span>)) {
      <span class="i">error</span>(<span class="s">&quot;A class can&#39;t inherit from itself.&quot;</span>);
    }

</pre><pre class="insert-after">    namedVariable(className, false);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p><span name="cycle">A</span> class cannot be its own superclass. Unless you have
access to a deranged nuclear physicist and a very heavily modified DeLorean, you
cannot inherit from yourself.</p>
<aside name="cycle">
<p>Interestingly, with the way we implement method inheritance, I don&rsquo;t think
allowing cycles would actually cause any problems in clox. It wouldn&rsquo;t do
anything <em>useful</em>, but I don&rsquo;t think it would cause a crash or infinite loop.</p>
</aside>
<h3><a href="#executing-inheritance" id="executing-inheritance"><small>29&#8202;.&#8202;1&#8202;.&#8202;1</small>Executing inheritance</a></h3>
<p>Now onto the new instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_CLASS,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_INHERIT</span>,
</pre><pre class="insert-after">  OP_METHOD
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>There are no operands to worry about. The two values we need<span class="em">&mdash;</span>superclass and
subclass<span class="em">&mdash;</span>are both found on the stack. That means disassembling is easy.</p>
<div class="codehilite"><pre class="insert-before">      return constantInstruction(&quot;OP_CLASS&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_INHERIT</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_INHERIT&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_METHOD:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>The interpreter is where the action happens.</p>
<div class="codehilite"><pre class="insert-before">        break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_INHERIT</span>: {
        <span class="t">Value</span> <span class="i">superclass</span> = <span class="i">peek</span>(<span class="n">1</span>);
        <span class="t">ObjClass</span>* <span class="i">subclass</span> = <span class="a">AS_CLASS</span>(<span class="i">peek</span>(<span class="n">0</span>));
        <span class="i">tableAddAll</span>(&amp;<span class="a">AS_CLASS</span>(<span class="i">superclass</span>)-&gt;<span class="i">methods</span>,
                    &amp;<span class="i">subclass</span>-&gt;<span class="i">methods</span>);
        <span class="i">pop</span>(); <span class="c">// Subclass.</span>
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_METHOD:
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>From the top of the stack down, we have the subclass then the superclass. We
grab both of those and then do the inherit-y bit. This is where clox takes a
different path than jlox. In our first interpreter, each subclass stored a
reference to its superclass. On method access, if we didn&rsquo;t find the method in
the subclass&rsquo;s method table, we recursed through the inheritance chain looking
at each ancestor&rsquo;s method table until we found it.</p>
<p>For example, calling <code>cook()</code> on an instance of Cruller sends jlox on this
journey:</p><img src="image/superclasses/jlox-resolve.png" alt="Resolving a call to cook() in an instance of Cruller means walking the superclass chain." />
<p>That&rsquo;s a lot of work to perform during method <em>invocation</em> time. It&rsquo;s slow, and
worse, the farther an inherited method is up the ancestor chain, the slower it
gets. Not a great performance story.</p>
<p>The new approach is much faster. When the subclass is declared, we copy all of
the inherited class&rsquo;s methods down into the subclass&rsquo;s own method table. Later,
when <em>calling</em> a method, any method inherited from a superclass will be found
right in the subclass&rsquo;s own method table. There is no extra runtime work needed
for inheritance at all. By the time the class is declared, the work is done.
This means inherited method calls are exactly as fast as normal method calls<span class="em">&mdash;</span>a <span name="two">single</span> hash table lookup.</p><img src="image/superclasses/clox-resolve.png" alt="Resolving a call to cook() in an instance of Cruller which has the method in its own method table." />
<aside name="two">
<p>Well, two hash table lookups, I guess. Because first we have to make sure a
field on the instance doesn&rsquo;t shadow the method.</p>
</aside>
<p>I&rsquo;ve sometimes heard this technique called &ldquo;copy-down inheritance&rdquo;. It&rsquo;s simple
and fast, but, like most optimizations, you get to use it only under certain
constraints. It works in Lox because Lox classes are <em>closed</em>. Once a class
declaration is finished executing, the set of methods for that class can never
change.</p>
<p>In languages like Ruby, Python, and JavaScript, it&rsquo;s possible to <span
name="monkey">crack</span> open an existing class and jam some new methods into
it or even remove them. That would break our optimization because if those
modifications happened to a superclass <em>after</em> the subclass declaration
executed, the subclass would not pick up those changes. That breaks a user&rsquo;s
expectation that inheritance always reflects the current state of the
superclass.</p>
<aside name="monkey">
<p>As you can imagine, changing the set of methods a class defines imperatively at
runtime can make it hard to reason about a program. It is a very powerful tool,
but also a dangerous tool.</p>
<p>Those who find this tool maybe a little <em>too</em> dangerous gave it the unbecoming
name &ldquo;monkey patching&rdquo;, or the even less decorous &ldquo;duck punching&rdquo;.</p><img src="image/superclasses/monkey.png" alt="A monkey with an eyepatch, naturally." />
</aside>
<p>Fortunately for us (but not for users who like the feature, I guess), Lox
doesn&rsquo;t let you patch monkeys or punch ducks, so we can safely apply this
optimization.</p>
<p>What about method overrides? Won&rsquo;t copying the superclass&rsquo;s methods into the
subclass&rsquo;s method table clash with the subclass&rsquo;s own methods? Fortunately, no.
We emit the <code>OP_INHERIT</code> after the <code>OP_CLASS</code> instruction that creates the
subclass but before any method declarations and <code>OP_METHOD</code> instructions have
been compiled. At the point that we copy the superclass&rsquo;s methods down, the
subclass&rsquo;s method table is empty. Any methods the subclass overrides will
overwrite those inherited entries in the table.</p>
<h3><a href="#invalid-superclasses" id="invalid-superclasses"><small>29&#8202;.&#8202;1&#8202;.&#8202;2</small>Invalid superclasses</a></h3>
<p>Our implementation is simple and fast, which is just the way I like my VM code.
But it&rsquo;s not robust. Nothing prevents a user from inheriting from an object that
isn&rsquo;t a class at all:</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="t">NotClass</span> = <span class="s">&quot;So not a class&quot;</span>;
<span class="k">class</span> <span class="t">OhNo</span> &lt; <span class="t">NotClass</span> {}
</pre></div>
<p>Obviously, no self-respecting programmer would write that, but we have to guard
against potential Lox users who have no self respect. A simple runtime check
fixes that.</p>
<div class="codehilite"><pre class="insert-before">        Value superclass = peek(1);
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">        <span class="k">if</span> (!<span class="a">IS_CLASS</span>(<span class="i">superclass</span>)) {
          <span class="i">runtimeError</span>(<span class="s">&quot;Superclass must be a class.&quot;</span>);
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }

</pre><pre class="insert-after">        ObjClass* subclass = AS_CLASS(peek(0));
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>If the value we loaded from the identifier in the superclass clause isn&rsquo;t an
ObjClass, we report a runtime error to let the user know what we think of them
and their code.</p>
<h2><a href="#storing-superclasses" id="storing-superclasses"><small>29&#8202;.&#8202;2</small>Storing Superclasses</a></h2>
<p>Did you notice that when we added method inheritance, we didn&rsquo;t actually add any
reference from a subclass to its superclass? After we copy the inherited methods
over, we forget the superclass entirely. We don&rsquo;t need to keep a handle on the
superclass, so we don&rsquo;t.</p>
<p>That won&rsquo;t be sufficient to support super calls. Since a subclass <span
name="may">may</span> override the superclass method, we need to be able to get
our hands on superclass method tables. Before we get to that mechanism, I want 
to refresh your memory on how super calls are statically resolved.</p>
<aside name="may">
<p>&ldquo;May&rdquo; might not be a strong enough word. Presumably the method <em>has</em> been
overridden. Otherwise, why are you bothering to use <code>super</code> instead of just
calling it directly?</p>
</aside>
<p>Back in the halcyon days of jlox, I showed you <a href="inheritance.html#semantics">this tricky example</a> to
explain the way super calls are dispatched:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">print</span> <span class="s">&quot;A method&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">B</span> &lt; <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">print</span> <span class="s">&quot;B method&quot;</span>;
  }

  <span class="i">test</span>() {
    <span class="k">super</span>.<span class="i">method</span>();
  }
}

<span class="k">class</span> <span class="t">C</span> &lt; <span class="t">B</span> {}

<span class="t">C</span>().<span class="i">test</span>();
</pre></div>
<p>Inside the body of the <code>test()</code> method, <code>this</code> is an instance of C. If super
calls were resolved relative to the superclass of the <em>receiver</em>, then we would
look in C&rsquo;s superclass, B. But super calls are resolved relative to the
superclass of the <em>surrounding class where the super call occurs</em>. In this case,
we are in B&rsquo;s <code>test()</code> method, so the superclass is A, and the program should
print &ldquo;A method&rdquo;.</p>
<p>This means that super calls are not resolved dynamically based on the runtime
instance. The superclass used to look up the method is a static<span class="em">&mdash;</span>practically
lexical<span class="em">&mdash;</span>property of where the call occurs. When we added inheritance to jlox,
we took advantage of that static aspect by storing the superclass in the same
Environment structure we used for all lexical scopes. Almost as if the
interpreter saw the above program like this:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">print</span> <span class="s">&quot;A method&quot;</span>;
  }
}

<span class="k">var</span> <span class="t">Bs_super</span> = <span class="t">A</span>;
<span class="k">class</span> <span class="t">B</span> &lt; <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">print</span> <span class="s">&quot;B method&quot;</span>;
  }

  <span class="i">test</span>() {
    <span class="i">runtimeSuperCall</span>(<span class="t">Bs_super</span>, <span class="s">&quot;method&quot;</span>);
  }
}

<span class="k">var</span> <span class="t">Cs_super</span> = <span class="t">B</span>;
<span class="k">class</span> <span class="t">C</span> &lt; <span class="t">B</span> {}

<span class="t">C</span>().<span class="i">test</span>();
</pre></div>
<p>Each subclass has a hidden variable storing a reference to its superclass.
Whenever we need to perform a super call, we access the superclass from that
variable and tell the runtime to start looking for methods there.</p>
<p>We&rsquo;ll take the same path with clox. The difference is that instead of jlox&rsquo;s
heap-allocated Environment class, we have the bytecode VM&rsquo;s value stack and
upvalue system. The machinery is a little different, but the overall effect is
the same.</p>
<h3><a href="#a-superclass-local-variable" id="a-superclass-local-variable"><small>29&#8202;.&#8202;2&#8202;.&#8202;1</small>A superclass local variable</a></h3>
<p>Our compiler already emits code to load the superclass onto the stack. Instead
of leaving that slot as a temporary, we create a new scope and make it a local
variable.</p>
<div class="codehilite"><pre class="insert-before">    }

</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">    <span class="i">beginScope</span>();
    <span class="i">addLocal</span>(<span class="i">syntheticToken</span>(<span class="s">&quot;super&quot;</span>));
    <span class="i">defineVariable</span>(<span class="n">0</span>);

</pre><pre class="insert-after">    namedVariable(className, false);
    emitByte(OP_INHERIT);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>Creating a new lexical scope ensures that if we declare two classes in the same
scope, each has a different local slot to store its superclass. Since we always
name this variable &ldquo;super&rdquo;, if we didn&rsquo;t make a scope for each subclass, the
variables would collide.</p>
<p>We name the variable &ldquo;super&rdquo; for the same reason we use &ldquo;this&rdquo; as the name of
the hidden local variable that <code>this</code> expressions resolve to: &ldquo;super&rdquo; is a
reserved word, which guarantees the compiler&rsquo;s hidden variable won&rsquo;t collide
with a user-defined one.</p>
<p>The difference is that when compiling <code>this</code> expressions, we conveniently have a
token sitting around whose lexeme is &ldquo;this&rdquo;. We aren&rsquo;t so lucky here. Instead,
we add a little helper function to create a synthetic token for the given <span
name="constant">constant</span> string.</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>variable</em>()</div>
<pre><span class="k">static</span> <span class="t">Token</span> <span class="i">syntheticToken</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">text</span>) {
  <span class="t">Token</span> <span class="i">token</span>;
  <span class="i">token</span>.<span class="i">start</span> = <span class="i">text</span>;
  <span class="i">token</span>.<span class="i">length</span> = (<span class="t">int</span>)<span class="i">strlen</span>(<span class="i">text</span>);
  <span class="k">return</span> <span class="i">token</span>;
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>variable</em>()</div>

<aside name="constant" class="bottom">
<p>I say &ldquo;constant string&rdquo; because tokens don&rsquo;t do any memory management of their
lexeme. If we tried to use a heap-allocated string for this, we&rsquo;d end up leaking
memory because it never gets freed. But the memory for C string literals lives
in the executable&rsquo;s constant data section and never needs to be freed, so we&rsquo;re
fine.</p>
</aside>
<p>Since we opened a local scope for the superclass variable, we need to close it.</p>
<div class="codehilite"><pre class="insert-before">  emitByte(OP_POP);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">

  <span class="k">if</span> (<span class="i">classCompiler</span>.<span class="i">hasSuperclass</span>) {
    <span class="i">endScope</span>();
  }
</pre><pre class="insert-after">

  currentClass = currentClass-&gt;enclosing;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>We pop the scope and discard the &ldquo;super&rdquo; variable after compiling the class body
and its methods. That way, the variable is accessible in all of the methods of
the subclass. It&rsquo;s a somewhat pointless optimization, but we create the scope
only if there <em>is</em> a superclass clause. Thus we need to close the scope only if
there is one.</p>
<p>To track that, we could declare a little local variable in <code>classDeclaration()</code>.
But soon, other functions in the compiler will need to know whether the
surrounding class is a subclass or not. So we may as well give our future selves
a hand and store this fact as a field in the ClassCompiler now.</p>
<div class="codehilite"><pre class="insert-before">typedef struct ClassCompiler {
  struct ClassCompiler* enclosing;
</pre><div class="source-file"><em>compiler.c</em><br>
in struct <em>ClassCompiler</em></div>
<pre class="insert">  <span class="t">bool</span> <span class="i">hasSuperclass</span>;
</pre><pre class="insert-after">} ClassCompiler;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in struct <em>ClassCompiler</em></div>

<p>When we first initialize a ClassCompiler, we assume it is not a subclass.</p>
<div class="codehilite"><pre class="insert-before">  ClassCompiler classCompiler;
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">  <span class="i">classCompiler</span>.<span class="i">hasSuperclass</span> = <span class="k">false</span>;
</pre><pre class="insert-after">  classCompiler.enclosing = currentClass;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>Then, if we see a superclass clause, we know we are compiling a subclass.</p>
<div class="codehilite"><pre class="insert-before">    emitByte(OP_INHERIT);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>classDeclaration</em>()</div>
<pre class="insert">    <span class="i">classCompiler</span>.<span class="i">hasSuperclass</span> = <span class="k">true</span>;
</pre><pre class="insert-after">  }
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>classDeclaration</em>()</div>

<p>This machinery gives us a mechanism at runtime to access the superclass object
of the surrounding subclass from within any of the subclass&rsquo;s methods<span class="em">&mdash;</span>simply
emit code to load the variable named &ldquo;super&rdquo;. That variable is a local outside
of the method body, but our existing upvalue support enables the VM to capture
that local inside the body of the method or even in functions nested inside that
method.</p>
<h2><a href="#super-calls" id="super-calls"><small>29&#8202;.&#8202;3</small>Super Calls</a></h2>
<p>With that runtime support in place, we are ready to implement super calls. As
usual, we go front to back, starting with the new syntax. A super call <span
name="last">begins</span>, naturally enough, with the <code>super</code> keyword.</p>
<aside name="last">
<p>This is it, friend. The very last entry you&rsquo;ll add to the parsing table.</p>
</aside>
<div class="codehilite"><pre class="insert-before">  [TOKEN_RETURN]        = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_SUPER</span>]         = {<span class="i">super_</span>,   <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_THIS]          = {this_,    NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>When the expression parser lands on a <code>super</code> token, control jumps to a new
parsing function which starts off like so:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>syntheticToken</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">super_</span>(<span class="t">bool</span> <span class="i">canAssign</span>) {
  <span class="i">consume</span>(<span class="a">TOKEN_DOT</span>, <span class="s">&quot;Expect &#39;.&#39; after &#39;super&#39;.&quot;</span>);
  <span class="i">consume</span>(<span class="a">TOKEN_IDENTIFIER</span>, <span class="s">&quot;Expect superclass method name.&quot;</span>);
  <span class="t">uint8_t</span> <span class="i">name</span> = <span class="i">identifierConstant</span>(&amp;<span class="i">parser</span>.<span class="i">previous</span>);
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>syntheticToken</em>()</div>

<p>This is pretty different from how we compiled <code>this</code> expressions. Unlike <code>this</code>,
a <code>super</code> <span name="token">token</span> is not a standalone expression.
Instead, the dot and method name following it are inseparable parts of the
syntax. However, the parenthesized argument list is separate. As with normal
method access, Lox supports getting a reference to a superclass method as a
closure without invoking it:</p>
<aside name="token">
<p>Hypothetical question: If a bare <code>super</code> token <em>was</em> an expression, what kind of
object would it evaluate to?</p>
</aside>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">print</span> <span class="s">&quot;A&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">B</span> &lt; <span class="t">A</span> {
  <span class="i">method</span>() {
    <span class="k">var</span> <span class="i">closure</span> = <span class="k">super</span>.<span class="i">method</span>;
    <span class="i">closure</span>(); <span class="c">// Prints &quot;A&quot;.</span>
  }
}
</pre></div>
<p>In other words, Lox doesn&rsquo;t really have super <em>call</em> expressions, it has super
<em>access</em> expressions, which you can choose to immediately invoke if you want. So
when the compiler hits a <code>super</code> token, we consume the subsequent <code>.</code> token and
then look for a method name. Methods are looked up dynamically, so we use
<code>identifierConstant()</code> to take the lexeme of the method name token and store it
in the constant table just like we do for property access expressions.</p>
<p>Here is what the compiler does after consuming those tokens:</p>
<div class="codehilite"><pre class="insert-before">  uint8_t name = identifierConstant(&amp;parser.previous);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>super_</em>()</div>
<pre class="insert">

  <span class="i">namedVariable</span>(<span class="i">syntheticToken</span>(<span class="s">&quot;this&quot;</span>), <span class="k">false</span>);
  <span class="i">namedVariable</span>(<span class="i">syntheticToken</span>(<span class="s">&quot;super&quot;</span>), <span class="k">false</span>);
  <span class="i">emitBytes</span>(<span class="a">OP_GET_SUPER</span>, <span class="i">name</span>);
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>super_</em>()</div>

<p>In order to access a <em>superclass method</em> on <em>the current instance</em>, the runtime
needs both the receiver <em>and</em> the superclass of the surrounding method&rsquo;s class.
The first <code>namedVariable()</code> call generates code to look up the current receiver
stored in the hidden variable &ldquo;this&rdquo; and push it onto the stack. The second
<code>namedVariable()</code> call emits code to look up the superclass from its &ldquo;super&rdquo;
variable and push that on top.</p>
<p>Finally, we emit a new <code>OP_GET_SUPER</code> instruction with an operand for the
constant table index of the method name. That&rsquo;s a lot to hold in your head. To
make it tangible, consider this example program:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Dunk in the fryer.&quot;</span>;
    <span class="k">this</span>.<span class="i">finish</span>(<span class="s">&quot;sprinkles&quot;</span>);
  }

  <span class="i">finish</span>(<span class="i">ingredient</span>) {
    <span class="k">print</span> <span class="s">&quot;Finish with &quot;</span> + <span class="i">ingredient</span>;
  }
}

<span class="k">class</span> <span class="t">Cruller</span> &lt; <span class="t">Doughnut</span> {
  <span class="i">finish</span>(<span class="i">ingredient</span>) {
    <span class="c">// No sprinkles, always icing.</span>
    <span class="k">super</span>.<span class="i">finish</span>(<span class="s">&quot;icing&quot;</span>);
  }
}
</pre></div>
<p>The bytecode emitted for the <code>super.finish("icing")</code> expression looks and works
like this:</p><img src="image/superclasses/super-instructions.png" alt="The series of bytecode instructions for calling super.finish()." />
<p>The first three instructions give the runtime access to the three pieces of
information it needs to perform the super access:</p>
<ol>
<li>
<p>The first instruction loads <strong>the instance</strong> onto the stack.</p>
</li>
<li>
<p>The second instruction loads <strong>the superclass where the method is
resolved</strong>.</p>
</li>
<li>
<p>Then the new <code>OP_GET_SUPER</code> instuction encodes <strong>the name of the method to
access</strong> as an operand.</p>
</li>
</ol>
<p>The remaining instructions are the normal bytecode for evaluating an argument
list and calling a function.</p>
<p>We&rsquo;re almost ready to implement the new <code>OP_GET_SUPER</code> instruction in the
interpreter. But before we do, the compiler has some errors it is responsible
for reporting.</p>
<div class="codehilite"><pre class="insert-before">static void super_(bool canAssign) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>super_</em>()</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">currentClass</span> == <span class="a">NULL</span>) {
    <span class="i">error</span>(<span class="s">&quot;Can&#39;t use &#39;super&#39; outside of a class.&quot;</span>);
  } <span class="k">else</span> <span class="k">if</span> (!<span class="i">currentClass</span>-&gt;<span class="i">hasSuperclass</span>) {
    <span class="i">error</span>(<span class="s">&quot;Can&#39;t use &#39;super&#39; in a class with no superclass.&quot;</span>);
  }

</pre><pre class="insert-after">  consume(TOKEN_DOT, &quot;Expect '.' after 'super'.&quot;);
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>super_</em>()</div>

<p>A super call is meaningful only inside the body of a method (or in a function
nested inside a method), and only inside the method of a class that has a
superclass. We detect both of these cases using the value of <code>currentClass</code>. If
that&rsquo;s <code>NULL</code> or points to a class with no superclass, we report those errors.</p>
<h3><a href="#executing-super-accesses" id="executing-super-accesses"><small>29&#8202;.&#8202;3&#8202;.&#8202;1</small>Executing super accesses</a></h3>
<p>Assuming the user didn&rsquo;t put a <code>super</code> expression where it&rsquo;s not allowed, their
code passes from the compiler over to the runtime. We&rsquo;ve got ourselves a new
instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_SET_PROPERTY,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_GET_SUPER</span>,
</pre><pre class="insert-after">  OP_EQUAL,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>We disassemble it like other opcodes that take a constant table index operand.</p>
<div class="codehilite"><pre class="insert-before">      return constantInstruction(&quot;OP_SET_PROPERTY&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_GET_SUPER</span>:
      <span class="k">return</span> <span class="i">constantInstruction</span>(<span class="s">&quot;OP_GET_SUPER&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_EQUAL:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>You might anticipate something harder, but interpreting the new instruction is
similar to executing a normal property access.</p>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_GET_SUPER</span>: {
        <span class="t">ObjString</span>* <span class="i">name</span> = <span class="a">READ_STRING</span>();
        <span class="t">ObjClass</span>* <span class="i">superclass</span> = <span class="a">AS_CLASS</span>(<span class="i">pop</span>());

        <span class="k">if</span> (!<span class="i">bindMethod</span>(<span class="i">superclass</span>, <span class="i">name</span>)) {
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_EQUAL: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>As with properties, we read the method name from the
constant table. Then we pass that to <code>bindMethod()</code> which looks up the method in
the given class&rsquo;s method table and creates an ObjBoundMethod to bundle the
resulting closure to the current instance.</p>
<p>The key <span name="field">difference</span> is <em>which</em> class we pass to
<code>bindMethod()</code>. With a normal property access, we use the ObjInstances&rsquo;s own
class, which gives us the dynamic dispatch we want. For a super call, we don&rsquo;t
use the instance&rsquo;s class. Instead, we use the statically resolved superclass of
the containing class, which the compiler has conveniently ensured is sitting on
top of the stack waiting for us.</p>
<p>We pop that superclass and pass it to <code>bindMethod()</code>, which correctly skips over
any overriding methods in any of the subclasses between that superclass and the
instance&rsquo;s own class. It also correctly includes any methods inherited by the
superclass from any of <em>its</em> superclasses.</p>
<p>The rest of the behavior is the same. Popping the superclass leaves the instance
at the top of the stack. When <code>bindMethod()</code> succeeds, it pops the instance and
pushes the new bound method. Otherwise, it reports a runtime error and returns
<code>false</code>. In that case, we abort the interpreter.</p>
<aside name="field">
<p>Another difference compared to <code>OP_GET_PROPERTY</code> is that we don&rsquo;t try to look
for a shadowing field first. Fields are not inherited, so <code>super</code> expressions
always resolve to methods.</p>
<p>If Lox were a prototype-based language that used <em>delegation</em> instead of
<em>inheritance</em>, then instead of one <em>class</em> inheriting from another <em>class</em>,
instances would inherit from (&ldquo;delegate to&rdquo;) other instances. In that case,
fields <em>could</em> be inherited, and we would need to check for them here.</p>
</aside>
<h3><a href="#faster-super-calls" id="faster-super-calls"><small>29&#8202;.&#8202;3&#8202;.&#8202;2</small>Faster super calls</a></h3>
<p>We have superclass method accesses working now. And since the returned object is
an ObjBoundMethod that you can then invoke, we&rsquo;ve got super <em>calls</em> working too.
Just like last chapter, we&rsquo;ve reached a point where our VM has the complete,
correct semantics.</p>
<p>But, also like last chapter, it&rsquo;s pretty slow. Again, we&rsquo;re heap allocating an
ObjBoundMethod for each super call even though most of the time the very next
instruction is an <code>OP_CALL</code> that immediately unpacks that bound method, invokes
it, and then discards it. In fact, this is even more likely to be true for
super calls than for regular method calls. At least with method calls there is
a chance that the user is actually invoking a function stored in a field. With
super calls, you&rsquo;re <em>always</em> looking up a method. The only question is whether
you invoke it immediately or not.</p>
<p>The compiler can certainly answer that question for itself if it sees a left
parenthesis after the superclass method name, so we&rsquo;ll go ahead and perform the
same optimization we did for method calls. Take out the two lines of code that
load the superclass and emit <code>OP_GET_SUPER</code>, and replace them with this:</p>
<div class="codehilite"><pre class="insert-before">  namedVariable(syntheticToken(&quot;this&quot;), false);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>super_</em>()<br>
replace 2 lines</div>
<pre class="insert">  <span class="k">if</span> (<span class="i">match</span>(<span class="a">TOKEN_LEFT_PAREN</span>)) {
    <span class="t">uint8_t</span> <span class="i">argCount</span> = <span class="i">argumentList</span>();
    <span class="i">namedVariable</span>(<span class="i">syntheticToken</span>(<span class="s">&quot;super&quot;</span>), <span class="k">false</span>);
    <span class="i">emitBytes</span>(<span class="a">OP_SUPER_INVOKE</span>, <span class="i">name</span>);
    <span class="i">emitByte</span>(<span class="i">argCount</span>);
  } <span class="k">else</span> {
    <span class="i">namedVariable</span>(<span class="i">syntheticToken</span>(<span class="s">&quot;super&quot;</span>), <span class="k">false</span>);
    <span class="i">emitBytes</span>(<span class="a">OP_GET_SUPER</span>, <span class="i">name</span>);
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>super_</em>(), replace 2 lines</div>

<p>Now before we emit anything, we look for a parenthesized argument list. If we
find one, we compile that. Then we load the superclass. After that, we emit a
new <code>OP_SUPER_INVOKE</code> instruction. This <span
name="superinstruction">superinstruction</span> combines the behavior of
<code>OP_GET_SUPER</code> and <code>OP_CALL</code>, so it takes two operands: the constant table index
of the method name to look up and the number of arguments to pass to it.</p>
<aside name="superinstruction">
<p>This is a particularly <em>super</em> superinstruction, if you get what I&rsquo;m saying.
I<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>I&rsquo;m sorry for this terrible joke.</p>
</aside>
<p>Otherwise, if we don&rsquo;t find a <code>(</code>, we continue to compile the expression as a
super access like we did before and emit an <code>OP_GET_SUPER</code>.</p>
<p>Drifting down the compilation pipeline, our first stop is a new instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_INVOKE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_SUPER_INVOKE</span>,
</pre><pre class="insert-after">  OP_CLOSURE,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>And just past that, its disassembler support.</p>
<div class="codehilite"><pre class="insert-before">      return invokeInstruction(&quot;OP_INVOKE&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_SUPER_INVOKE</span>:
      <span class="k">return</span> <span class="i">invokeInstruction</span>(<span class="s">&quot;OP_SUPER_INVOKE&quot;</span>, <span class="i">chunk</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_CLOSURE: {
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>A super invocation instruction has the same set of operands as <code>OP_INVOKE</code>, so
we reuse the same helper to disassemble it. Finally, the pipeline dumps us into
the interpreter.</p>
<div class="codehilite"><pre class="insert-before">        break;
      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_SUPER_INVOKE</span>: {
        <span class="t">ObjString</span>* <span class="i">method</span> = <span class="a">READ_STRING</span>();
        <span class="t">int</span> <span class="i">argCount</span> = <span class="a">READ_BYTE</span>();
        <span class="t">ObjClass</span>* <span class="i">superclass</span> = <span class="a">AS_CLASS</span>(<span class="i">pop</span>());
        <span class="k">if</span> (!<span class="i">invokeFromClass</span>(<span class="i">superclass</span>, <span class="i">method</span>, <span class="i">argCount</span>)) {
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="i">frame</span> = &amp;<span class="i">vm</span>.<span class="i">frames</span>[<span class="i">vm</span>.<span class="i">frameCount</span> - <span class="n">1</span>];
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_CLOSURE: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>This handful of code is basically our implementation of <code>OP_INVOKE</code> mixed
together with a dash of <code>OP_GET_SUPER</code>. There are some differences in how the
stack is organized, though. With an unoptimized super call, the superclass is
popped and replaced by the ObjBoundMethod for the resolved function <em>before</em> the
arguments to the call are executed. This ensures that by the time the <code>OP_CALL</code>
is executed, the bound method is <em>under</em> the argument list, where the runtime
expects it to be for a closure call.</p>
<p>With our optimized instructions, things are shuffled a bit:</p><img src="image/superclasses/super-invoke.png" class="wide" alt="The series of bytecode instructions for calling super.finish() using OP_SUPER_INVOKE." />
<p>Now resolving the superclass method is part of the <em>invocation</em>, so the
arguments need to already be on the stack at the point that we look up the
method. This means the superclass object is on top of the arguments.</p>
<p>Aside from that, the behavior is roughly the same as an <code>OP_GET_SUPER</code> followed
by an <code>OP_CALL</code>. First, we pull out the method name and argument count operands.
Then we pop the superclass off the top of the stack so that we can look up the
method in its method table. This conveniently leaves the stack set up just right
for a method call.</p>
<p>We pass the superclass, method name, and argument count to our existing
<code>invokeFromClass()</code> function. That function looks up the given method on the
given class and attempts to create a call to it with the given arity. If a
method could not be found, it returns <code>false</code>, and we bail out of the
interpreter. Otherwise, <code>invokeFromClass()</code> pushes a new CallFrame onto the call
stack for the method&rsquo;s closure. That invalidates the interpreter&rsquo;s cached
CallFrame pointer, so we refresh <code>frame</code>.</p>
<h2><a href="#a-complete-virtual-machine" id="a-complete-virtual-machine"><small>29&#8202;.&#8202;4</small>A Complete Virtual Machine</a></h2>
<p>Take a look back at what we&rsquo;ve created. By my count, we wrote around 2,500 lines
of fairly clean, straightforward C. That little program contains a complete
implementation of the<span class="em">&mdash;</span>quite high-level!<span class="em">&mdash;</span>Lox language, with a whole
precedence table full of expression types and a suite of control flow
statements. We implemented variables, functions, closures, classes, fields,
methods, and inheritance.</p>
<p>Even more impressive, our implementation is portable to any platform with a C
compiler, and is fast enough for real-world production use. We have a
single-pass bytecode compiler, a tight virtual machine interpreter for our
internal instruction set, compact object representations, a stack for storing
variables without heap allocation, and a precise garbage collector.</p>
<p>If you go out and start poking around in the implementations of Lua, Python, or
Ruby, you will be surprised by how much of it now looks familiar to you. You
have seriously leveled up your knowledge of how programming languages work,
which in turn gives you a deeper understanding of programming itself. It&rsquo;s like
you used to be a race car driver, and now you can pop the hood and repair the
engine too.</p>
<p>You can stop here if you like. The two implementations of Lox you have are
complete and full featured. You built the car and can drive it wherever you want
now. But if you are looking to have more fun tuning and tweaking for even
greater performance out on the track, there is one more chapter. We don&rsquo;t add
any new capabilities, but we roll in a couple of classic optimizations to
squeeze even more perf out. If that sounds fun, <a href="optimization.html">keep reading</a><span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>A tenet of object-oriented programming is that a class should ensure new
objects are in a valid state. In Lox, that means defining an initializer
that populates the instance&rsquo;s fields. Inheritance complicates invariants
because the instance must be in a valid state according to all of the
classes in the object&rsquo;s inheritance chain.</p>
<p>The easy part is remembering to call <code>super.init()</code> in each subclass&rsquo;s
<code>init()</code> method. The harder part is fields. There is nothing preventing two
classes in the inheritance chain from accidentally claiming the same field
name. When this happens, they will step on each other&rsquo;s fields and possibly
leave you with an instance in a broken state.</p>
<p>If Lox was your language, how would you address this, if at all? If you
would change the language, implement your change.</p>
</li>
<li>
<p>Our copy-down inheritance optimization is valid only because Lox does not
permit you to modify a class&rsquo;s methods after its declaration. This means we
don&rsquo;t have to worry about the copied methods in the subclass getting out of
sync with later changes to the superclass.</p>
<p>Other languages, like Ruby, <em>do</em> allow classes to be modified after the
fact. How do implementations of languages like that support class
modification while keeping method resolution efficient?</p>
</li>
<li>
<p>In the <a href="inheritance.html">jlox chapter on inheritance</a>, we had a challenge to
implement the BETA language&rsquo;s approach to method overriding. Solve the
challenge again, but this time in clox. Here&rsquo;s the description of the
previous challenge:</p>
<p>In Lox, as in most other object-oriented languages, when looking up a
method, we start at the bottom of the class hierarchy and work our way up<span class="em">&mdash;</span>a subclass&rsquo;s method is preferred over a superclass&rsquo;s. In order to get to the
superclass method from within an overriding method, you use <code>super</code>.</p>
<p>The language <a href="https://beta.cs.au.dk/">BETA</a> takes the <a href="http://journal.stuffwithstuff.com/2012/12/19/the-impoliteness-of-overriding-methods/">opposite approach</a>. When you call a
method, it starts at the <em>top</em> of the class hierarchy and works <em>down</em>. A
superclass method wins over a subclass method. In order to get to the
subclass method, the superclass method can call <code>inner</code>, which is sort of
like the inverse of <code>super</code>. It chains to the next method down the
hierarchy.</p>
<p>The superclass method controls when and where the subclass is allowed to
refine its behavior. If the superclass method doesn&rsquo;t call <code>inner</code> at all,
then the subclass has no way of overriding or modifying the superclass&rsquo;s
behavior.</p>
<p>Take out Lox&rsquo;s current overriding and <code>super</code> behavior, and replace it with
BETA&rsquo;s semantics. In short:</p>
<ul>
<li>
<p>When calling a method on a class, the method <em>highest</em> on the
class&rsquo;s inheritance chain takes precedence.</p>
</li>
<li>
<p>Inside the body of a method, a call to <code>inner</code> looks for a method with
the same name in the nearest subclass along the inheritance chain
between the class containing the <code>inner</code> and the class of <code>this</code>. If
there is no matching method, the <code>inner</code> call does nothing.</p>
</li>
</ul>
<p>For example:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Fry until golden brown.&quot;</span>;
    <span class="i">inner</span>();
    <span class="k">print</span> <span class="s">&quot;Place in a nice box.&quot;</span>;
  }
}

<span class="k">class</span> <span class="t">BostonCream</span> &lt; <span class="t">Doughnut</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Pipe full of custard and coat with chocolate.&quot;</span>;
  }
}

<span class="t">BostonCream</span>().<span class="i">cook</span>();
</pre></div>
<p>This should print:</p>
<div class="codehilite"><pre>Fry until golden brown.
Pipe full of custard and coat with chocolate.
Place in a nice box.
</pre></div>
<p>Since clox is about not just implementing Lox, but doing so with good
performance, this time around try to solve the challenge with an eye towards
efficiency.</p>
</li>
</ol>
</div>

<footer>
<a href="optimization.html" class="next">
  Next Chapter: &ldquo;Optimization&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/the-lox-language.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>The Lox Language &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">The Lox Language<small>3</small></a></h3>

<ul>
    <li><a href="#hello-lox"><small>3.1</small> Hello, Lox</a></li>
    <li><a href="#a-high-level-language"><small>3.2</small> A High-Level Language</a></li>
    <li><a href="#data-types"><small>3.3</small> Data Types</a></li>
    <li><a href="#expressions"><small>3.4</small> Expressions</a></li>
    <li><a href="#statements"><small>3.5</small> Statements</a></li>
    <li><a href="#variables"><small>3.6</small> Variables</a></li>
    <li><a href="#control-flow"><small>3.7</small> Control Flow</a></li>
    <li><a href="#functions"><small>3.8</small> Functions</a></li>
    <li><a href="#classes"><small>3.9</small> Classes</a></li>
    <li><a href="#the-standard-library"><small>3.10</small> The Standard Library</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Expressions and Statements</a></li>
</ul>


<div class="prev-next">
    <a href="a-map-of-the-territory.html" title="A Map of the Territory" class="left">&larr;&nbsp;Previous</a>
    <a href="welcome.html" title="Welcome">&uarr;&nbsp;Up</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="a-map-of-the-territory.html" title="A Map of the Territory" class="prev">←</a>
<a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">The Lox Language<small>3</small></a></h3>

<ul>
    <li><a href="#hello-lox"><small>3.1</small> Hello, Lox</a></li>
    <li><a href="#a-high-level-language"><small>3.2</small> A High-Level Language</a></li>
    <li><a href="#data-types"><small>3.3</small> Data Types</a></li>
    <li><a href="#expressions"><small>3.4</small> Expressions</a></li>
    <li><a href="#statements"><small>3.5</small> Statements</a></li>
    <li><a href="#variables"><small>3.6</small> Variables</a></li>
    <li><a href="#control-flow"><small>3.7</small> Control Flow</a></li>
    <li><a href="#functions"><small>3.8</small> Functions</a></li>
    <li><a href="#classes"><small>3.9</small> Classes</a></li>
    <li><a href="#the-standard-library"><small>3.10</small> The Standard Library</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
    <li class="end-part"><a href="#design-note"><small>note</small>Expressions and Statements</a></li>
</ul>


<div class="prev-next">
    <a href="a-map-of-the-territory.html" title="A Map of the Territory" class="left">&larr;&nbsp;Previous</a>
    <a href="welcome.html" title="Welcome">&uarr;&nbsp;Up</a>
    <a href="a-tree-walk-interpreter.html" title="A Tree-Walk Interpreter" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">3</div>
  <h1>The Lox Language</h1>

<blockquote>
<p>What nicer thing can you do for somebody than make them breakfast?</p>
<p><cite>Anthony Bourdain</cite></p>
</blockquote>
<p>We&rsquo;ll spend the rest of this book illuminating every dark and sundry corner of
the Lox language, but it seems cruel to have you immediately start grinding out
code for the interpreter without at least a glimpse of what we&rsquo;re going to end
up with.</p>
<p>At the same time, I don&rsquo;t want to drag you through reams of language lawyering
and specification-ese before you get to touch your text <span
name="home">editor</span>. So this will be a gentle, friendly introduction to
Lox. It will leave out a lot of details and edge cases. We&rsquo;ve got plenty of time
for those later.</p>
<aside name="home">
<p>A tutorial isn&rsquo;t very fun if you can&rsquo;t try the code out yourself. Alas, you
don&rsquo;t have a Lox interpreter yet, since you haven&rsquo;t built one!</p>
<p>Fear not. You can use <a href="https://github.com/munificent/craftinginterpreters">mine</a>.</p>
</aside>
<h2><a href="#hello-lox" id="hello-lox"><small>3&#8202;.&#8202;1</small>Hello, Lox</a></h2>
<p>Here&rsquo;s your very first taste of <span name="salmon">Lox</span>:</p>
<aside name="salmon">
<p>Your first taste of Lox, the language, that is. I don&rsquo;t know if you&rsquo;ve ever had
the cured, cold-smoked salmon before. If not, give it a try too.</p>
</aside>
<div class="codehilite"><pre><span class="c">// Your first Lox program!</span>
<span class="k">print</span> <span class="s">&quot;Hello, world!&quot;</span>;
</pre></div>
<p>As that <code>//</code> line comment and the trailing semicolon imply, Lox&rsquo;s syntax is a
member of the C family. (There are no parentheses around the string because
<code>print</code> is a built-in statement, and not a library function.)</p>
<p>Now, I won&rsquo;t claim that <span name="c">C</span> has a <em>great</em> syntax. If we
wanted something elegant, we&rsquo;d probably mimic Pascal or Smalltalk. If we wanted
to go full Scandinavian-furniture-minimalism, we&rsquo;d do a Scheme. Those all have
their virtues.</p>
<aside name="c">
<p>I&rsquo;m surely biased, but I think Lox&rsquo;s syntax is pretty clean. C&rsquo;s most egregious
grammar problems are around types. Dennis Ritchie had this idea called
&ldquo;<a href="http://softwareengineering.stackexchange.com/questions/117024/why-was-the-c-syntax-for-arrays-pointers-and-functions-designed-this-way">declaration reflects use</a>&rdquo;, where variable declarations mirror the
operations you would have to perform on the variable to get to a value of the
base type. Clever idea, but I don&rsquo;t think it worked out great in practice.</p>
<p>Lox doesn&rsquo;t have static types, so we avoid that.</p>
</aside>
<p>What C-like syntax has instead is something you&rsquo;ll often find more valuable
in a language: <em>familiarity</em>. I know you are already comfortable with that style
because the two languages we&rsquo;ll be using to <em>implement</em> Lox<span class="em">&mdash;</span>Java and C<span class="em">&mdash;</span>also inherit it. Using a similar syntax for Lox gives you one less thing to
learn.</p>
<h2><a href="#a-high-level-language" id="a-high-level-language"><small>3&#8202;.&#8202;2</small>A High-Level Language</a></h2>
<p>While this book ended up bigger than I was hoping, it&rsquo;s still not big enough to
fit a huge language like Java in it. In order to fit two complete
implementations of Lox in these pages, Lox itself has to be pretty compact.</p>
<p>When I think of languages that are small but useful, what comes to mind are
high-level &ldquo;scripting&rdquo; languages like <span name="js">JavaScript</span>, Scheme,
and Lua. Of those three, Lox looks most like JavaScript, mainly because most
C-syntax languages do. As we&rsquo;ll learn later, Lox&rsquo;s approach to scoping hews
closely to Scheme. The C flavor of Lox we&rsquo;ll build in <a href="a-bytecode-virtual-machine.html">Part III</a> is heavily
indebted to Lua&rsquo;s clean, efficient implementation.</p>
<aside name="js">
<p>Now that JavaScript has taken over the world and is used to build ginormous
applications, it&rsquo;s hard to think of it as a &ldquo;little scripting language&rdquo;. But
Brendan Eich hacked the first JS interpreter into Netscape Navigator in <em>ten
days</em> to make buttons animate on web pages. JavaScript has grown up since then,
but it was once a cute little language.</p>
<p>Because Eich slapped JS together with roughly the same raw materials and time as
an episode of MacGyver, it has some weird semantic corners where the duct tape
and paper clips show through. Things like variable hoisting, dynamically bound
<code>this</code>, holes in arrays, and implicit conversions.</p>
<p>I had the luxury of taking my time on Lox, so it should be a little cleaner.</p>
</aside>
<p>Lox shares two other aspects with those three languages:</p>
<h3><a href="#dynamic-typing" id="dynamic-typing"><small>3&#8202;.&#8202;2&#8202;.&#8202;1</small>Dynamic typing</a></h3>
<p>Lox is dynamically typed. Variables can store values of any type, and a single
variable can even store values of different types at different times. If you try
to perform an operation on values of the wrong type<span class="em">&mdash;</span>say, dividing a number by
a string<span class="em">&mdash;</span>then the error is detected and reported at runtime.</p>
<p>There are plenty of reasons to like <span name="static">static</span> types, but
they don&rsquo;t outweigh the pragmatic reasons to pick dynamic types for Lox. A
static type system is a ton of work to learn and implement. Skipping it gives
you a simpler language and a shorter book. We&rsquo;ll get our interpreter up and
executing bits of code sooner if we defer our type checking to runtime.</p>
<aside name="static">
<p>After all, the two languages we&rsquo;ll be using to <em>implement</em> Lox are both
statically typed.</p>
</aside>
<h3><a href="#automatic-memory-management" id="automatic-memory-management"><small>3&#8202;.&#8202;2&#8202;.&#8202;2</small>Automatic memory management</a></h3>
<p>High-level languages exist to eliminate error-prone, low-level drudgery, and what
could be more tedious than manually managing the allocation and freeing of
storage? No one rises and greets the morning sun with, &ldquo;I can&rsquo;t wait to figure
out the correct place to call <code>free()</code> for every byte of memory I allocate
today!&rdquo;</p>
<p>There are two main <span name="gc">techniques</span> for managing memory:
<strong>reference counting</strong> and <strong>tracing garbage collection</strong> (usually just called
<strong>garbage collection</strong> or <strong>GC</strong>). Ref counters are much simpler to implement<span class="em">&mdash;</span>I think that&rsquo;s why Perl, PHP, and Python all started out using them. But, over
time, the limitations of ref counting become too troublesome. All of those
languages eventually ended up adding a full tracing GC, or at least enough of
one to clean up object cycles.</p>
<aside name="gc">
<p>In practice, ref counting and tracing are more ends of a continuum than
opposing sides. Most ref counting systems end up doing some tracing to handle
cycles, and the write barriers of a generational collector look a bit like
retain calls if you squint.</p>
<p>For lots more on this, see &ldquo;<a href="https://researcher.watson.ibm.com/researcher/files/us-bacon/Bacon04Unified.pdf">A Unified Theory of Garbage Collection</a>&rdquo; (PDF).</p>
</aside>
<p>Tracing garbage collection has a fearsome reputation. It <em>is</em> a little harrowing
working at the level of raw memory. Debugging a GC can sometimes leave you
seeing hex dumps in your dreams. But, remember, this book is about dispelling
magic and slaying those monsters, so we <em>are</em> going to write our own garbage
collector. I think you&rsquo;ll find the algorithm is quite simple and a lot of fun to
implement.</p>
<h2><a href="#data-types" id="data-types"><small>3&#8202;.&#8202;3</small>Data Types</a></h2>
<p>In Lox&rsquo;s little universe, the atoms that make up all matter are the built-in
data types. There are only a few:</p>
<ul>
<li>
<p><strong><span name="bool">Booleans</span>.</strong> You can&rsquo;t code without logic and you
can&rsquo;t logic without Boolean values. &ldquo;True&rdquo; and &ldquo;false&rdquo;, the yin and yang of
software. Unlike some ancient languages that repurpose an existing type to
represent truth and falsehood, Lox has a dedicated Boolean type. We may
be roughing it on this expedition, but we aren&rsquo;t <em>savages</em>.</p>
<aside name="bool">
<p>Boolean variables are the only data type in Lox named after a person, George
Boole, which is why &ldquo;Boolean&rdquo; is capitalized. He died in 1864, nearly a
century before digital computers turned his algebra into electricity. I
wonder what he&rsquo;d think to see his name all over billions of lines of Java
code.</p>
</aside>
<p>There are two Boolean values, obviously, and a literal for each one.</p>
<div class="codehilite"><pre><span class="k">true</span>;  <span class="c">// Not false.</span>
<span class="k">false</span>; <span class="c">// Not *not* false.</span>
</pre></div>
</li>
<li>
<p><strong>Numbers.</strong> Lox has only one kind of number: double-precision floating
point. Since floating-point numbers can also represent a wide range of
integers, that covers a lot of territory, while keeping things simple.</p>
<p>Full-featured languages have lots of syntax for numbers<span class="em">&mdash;</span>hexadecimal,
scientific notation, octal, all sorts of fun stuff. We&rsquo;ll settle for basic
integer and decimal literals.</p>
<div class="codehilite"><pre><span class="n">1234</span>;  <span class="c">// An integer.</span>
<span class="n">12.34</span>; <span class="c">// A decimal number.</span>
</pre></div>
</li>
<li>
<p><strong>Strings.</strong> We&rsquo;ve already seen one string literal in the first example.
Like most languages, they are enclosed in double quotes.</p>
<div class="codehilite"><pre><span class="s">&quot;I am a string&quot;</span>;
<span class="s">&quot;&quot;</span>;    <span class="c">// The empty string.</span>
<span class="s">&quot;123&quot;</span>; <span class="c">// This is a string, not a number.</span>
</pre></div>
<p>As we&rsquo;ll see when we get to implementing them, there is quite a lot of
complexity hiding in that innocuous sequence of <span
name="char">characters</span>.</p>
<aside name="char">
<p>Even that word &ldquo;character&rdquo; is a trickster. Is it ASCII? Unicode? A
code point or a &ldquo;grapheme cluster&rdquo;? How are characters encoded? Is each
character a fixed size, or can they vary?</p>
</aside></li>
<li>
<p><strong>Nil.</strong> There&rsquo;s one last built-in value who&rsquo;s never invited to the party
but always seems to show up. It represents &ldquo;no value&rdquo;. It&rsquo;s called &ldquo;null&rdquo; in
many other languages. In Lox we spell it <code>nil</code>. (When we get to implementing
it, that will help distinguish when we&rsquo;re talking about Lox&rsquo;s <code>nil</code> versus
Java or C&rsquo;s <code>null</code>.)</p>
<p>There are good arguments for not having a null value in a language since
null pointer errors are the scourge of our industry. If we were doing a
statically typed language, it would be worth trying to ban it. In a
dynamically typed one, though, eliminating it is often more annoying
than having it.</p>
</li>
</ul>
<h2><a href="#expressions" id="expressions"><small>3&#8202;.&#8202;4</small>Expressions</a></h2>
<p>If built-in data types and their literals are atoms, then <strong>expressions</strong> must
be the molecules. Most of these will be familiar.</p>
<h3><a href="#arithmetic" id="arithmetic"><small>3&#8202;.&#8202;4&#8202;.&#8202;1</small>Arithmetic</a></h3>
<p>Lox features the basic arithmetic operators you know and love from C and other
languages:</p>
<div class="codehilite"><pre><span class="i">add</span> + <span class="i">me</span>;
<span class="i">subtract</span> - <span class="i">me</span>;
<span class="i">multiply</span> * <span class="i">me</span>;
<span class="i">divide</span> / <span class="i">me</span>;
</pre></div>
<p>The subexpressions on either side of the operator are <strong>operands</strong>. Because
there are <em>two</em> of them, these are called <strong>binary</strong> operators. (It has nothing
to do with the ones-and-zeroes use of &ldquo;binary&rdquo;.) Because the operator is <span
name="fixity">fixed</span> <em>in</em> the middle of the operands, these are also
called <strong>infix</strong> operators (as opposed to <strong>prefix</strong> operators where the
operator comes before the operands, and <strong>postfix</strong> where it comes after).</p>
<aside name="fixity">
<p>There are some operators that have more than two operands and the operators are
interleaved between them. The only one in wide usage is the &ldquo;conditional&rdquo; or
&ldquo;ternary&rdquo; operator of C and friends:</p>
<div class="codehilite"><pre><span class="i">condition</span> ? <span class="i">thenArm</span> : <span class="i">elseArm</span>;
</pre></div>
<p>Some call these <strong>mixfix</strong> operators. A few languages let you define your own
operators and control how they are positioned<span class="em">&mdash;</span>their &ldquo;fixity&rdquo;.</p>
</aside>
<p>One arithmetic operator is actually <em>both</em> an infix and a prefix one. The <code>-</code>
operator can also be used to negate a number.</p>
<div class="codehilite"><pre>-<span class="i">negateMe</span>;
</pre></div>
<p>All of these operators work on numbers, and it&rsquo;s an error to pass any other
types to them. The exception is the <code>+</code> operator<span class="em">&mdash;</span>you can also pass it two
strings to concatenate them.</p>
<h3><a href="#comparison-and-equality" id="comparison-and-equality"><small>3&#8202;.&#8202;4&#8202;.&#8202;2</small>Comparison and equality</a></h3>
<p>Moving along, we have a few more operators that always return a Boolean result.
We can compare numbers (and only numbers), using Ye Olde Comparison Operators.</p>
<div class="codehilite"><pre><span class="i">less</span> &lt; <span class="i">than</span>;
<span class="i">lessThan</span> &lt;= <span class="i">orEqual</span>;
<span class="i">greater</span> &gt; <span class="i">than</span>;
<span class="i">greaterThan</span> &gt;= <span class="i">orEqual</span>;
</pre></div>
<p>We can test two values of any kind for equality or inequality.</p>
<div class="codehilite"><pre><span class="n">1</span> == <span class="n">2</span>;         <span class="c">// false.</span>
<span class="s">&quot;cat&quot;</span> != <span class="s">&quot;dog&quot;</span>; <span class="c">// true.</span>
</pre></div>
<p>Even different types.</p>
<div class="codehilite"><pre><span class="n">314</span> == <span class="s">&quot;pi&quot;</span>; <span class="c">// false.</span>
</pre></div>
<p>Values of different types are <em>never</em> equivalent.</p>
<div class="codehilite"><pre><span class="n">123</span> == <span class="s">&quot;123&quot;</span>; <span class="c">// false.</span>
</pre></div>
<p>I&rsquo;m generally against implicit conversions.</p>
<h3><a href="#logical-operators" id="logical-operators"><small>3&#8202;.&#8202;4&#8202;.&#8202;3</small>Logical operators</a></h3>
<p>The not operator, a prefix <code>!</code>, returns <code>false</code> if its operand is true, and vice
versa.</p>
<div class="codehilite"><pre>!<span class="k">true</span>;  <span class="c">// false.</span>
!<span class="k">false</span>; <span class="c">// true.</span>
</pre></div>
<p>The other two logical operators really are control flow constructs in the guise
of expressions. An <span name="and"><code>and</code></span> expression determines if two
values are <em>both</em> true. It returns the left operand if it&rsquo;s false, or the
right operand otherwise.</p>
<div class="codehilite"><pre><span class="k">true</span> <span class="k">and</span> <span class="k">false</span>; <span class="c">// false.</span>
<span class="k">true</span> <span class="k">and</span> <span class="k">true</span>;  <span class="c">// true.</span>
</pre></div>
<p>And an <code>or</code> expression determines if <em>either</em> of two values (or both) are true.
It returns the left operand if it is true and the right operand otherwise.</p>
<div class="codehilite"><pre><span class="k">false</span> <span class="k">or</span> <span class="k">false</span>; <span class="c">// false.</span>
<span class="k">true</span> <span class="k">or</span> <span class="k">false</span>;  <span class="c">// true.</span>
</pre></div>
<aside name="and">
<p>I used <code>and</code> and <code>or</code> for these instead of <code>&amp;&amp;</code> and <code>||</code> because Lox doesn&rsquo;t use
<code>&amp;</code> and <code>|</code> for bitwise operators. It felt weird to introduce the
double-character forms without the single-character ones.</p>
<p>I also kind of like using words for these since they are really control flow
structures and not simple operators.</p>
</aside>
<p>The reason <code>and</code> and <code>or</code> are like control flow structures is that they
<strong>short-circuit</strong>. Not only does <code>and</code> return the left operand if it is false,
it doesn&rsquo;t even <em>evaluate</em> the right one in that case. Conversely
(contrapositively?), if the left operand of an <code>or</code> is true, the right is
skipped.</p>
<h3><a href="#precedence-and-grouping" id="precedence-and-grouping"><small>3&#8202;.&#8202;4&#8202;.&#8202;4</small>Precedence and grouping</a></h3>
<p>All of these operators have the same precedence and associativity that you&rsquo;d
expect coming from C. (When we get to parsing, we&rsquo;ll get <em>way</em> more precise
about that.) In cases where the precedence isn&rsquo;t what you want, you can use <code>()</code>
to group stuff.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">average</span> = (<span class="i">min</span> + <span class="i">max</span>) / <span class="n">2</span>;
</pre></div>
<p>Since they aren&rsquo;t very technically interesting, I&rsquo;ve cut the remainder of the
typical operator menagerie out of our little language. No bitwise, shift,
modulo, or conditional operators. I&rsquo;m not grading you, but you will get bonus
points in my heart if you augment your own implementation of Lox with them.</p>
<p>Those are the expression forms (except for a couple related to specific features
that we&rsquo;ll get to later), so let&rsquo;s move up a level.</p>
<h2><a href="#statements" id="statements"><small>3&#8202;.&#8202;5</small>Statements</a></h2>
<p>Now we&rsquo;re at statements. Where an expression&rsquo;s main job is to produce a <em>value</em>,
a statement&rsquo;s job is to produce an <em>effect</em>. Since, by definition, statements
don&rsquo;t evaluate to a value, to be useful they have to otherwise change the world
in some way<span class="em">&mdash;</span>usually modifying some state, reading input, or producing output.</p>
<p>You&rsquo;ve seen a couple of kinds of statements already. The first one was:</p>
<div class="codehilite"><pre><span class="k">print</span> <span class="s">&quot;Hello, world!&quot;</span>;
</pre></div>
<p>A <span name="print"><code>print</code> statement</span> evaluates a single expression
and displays the result to the user. You&rsquo;ve also seen some statements like:</p>
<aside name="print">
<p>Baking <code>print</code> into the language instead of just making it a core library
function is a hack. But it&rsquo;s a <em>useful</em> hack for us: it means our in-progress
interpreter can start producing output before we&rsquo;ve implemented all of the
machinery required to define functions, look them up by name, and call them.</p>
</aside>
<div class="codehilite"><pre><span class="s">&quot;some expression&quot;</span>;
</pre></div>
<p>An expression followed by a semicolon (<code>;</code>) promotes the expression to
statement-hood. This is called (imaginatively enough), an <strong>expression
statement</strong>.</p>
<p>If you want to pack a series of statements where a single one is expected, you
can wrap them up in a <strong>block</strong>.</p>
<div class="codehilite"><pre>{
  <span class="k">print</span> <span class="s">&quot;One statement.&quot;</span>;
  <span class="k">print</span> <span class="s">&quot;Two statements.&quot;</span>;
}
</pre></div>
<p>Blocks also affect scoping, which leads us to the next section<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span></p>
<h2><a href="#variables" id="variables"><small>3&#8202;.&#8202;6</small>Variables</a></h2>
<p>You declare variables using <code>var</code> statements. If you <span
name="omit">omit</span> the initializer, the variable&rsquo;s value defaults to <code>nil</code>.</p>
<aside name="omit">
<p>This is one of those cases where not having <code>nil</code> and forcing every variable to
be initialized to some value would be more annoying than dealing with <code>nil</code>
itself.</p>
</aside>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">imAVariable</span> = <span class="s">&quot;here is my value&quot;</span>;
<span class="k">var</span> <span class="i">iAmNil</span>;
</pre></div>
<p>Once declared, you can, naturally, access and assign a variable using its name.</p>
<p><span name="breakfast"></span></p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">breakfast</span> = <span class="s">&quot;bagels&quot;</span>;
<span class="k">print</span> <span class="i">breakfast</span>; <span class="c">// &quot;bagels&quot;.</span>
<span class="i">breakfast</span> = <span class="s">&quot;beignets&quot;</span>;
<span class="k">print</span> <span class="i">breakfast</span>; <span class="c">// &quot;beignets&quot;.</span>
</pre></div>
<aside name="breakfast">
<p>Can you tell that I tend to work on this book in the morning before I&rsquo;ve had
anything to eat?</p>
</aside>
<p>I won&rsquo;t get into the rules for variable scope here, because we&rsquo;re going to spend
a surprising amount of time in later chapters mapping every square inch of the
rules. In most cases, it works like you would expect coming from C or Java.</p>
<h2><a href="#control-flow" id="control-flow"><small>3&#8202;.&#8202;7</small>Control Flow</a></h2>
<p>It&rsquo;s hard to write <span name="flow">useful</span> programs if you can&rsquo;t skip
some code or execute some more than once. That means control flow. In addition
to the logical operators we already covered, Lox lifts three statements straight
from C.</p>
<aside name="flow">
<p>We already have <code>and</code> and <code>or</code> for branching, and we <em>could</em> use recursion to
repeat code, so that&rsquo;s theoretically sufficient. It would be pretty awkward to
program that way in an imperative-styled language, though.</p>
<p>Scheme, on the other hand, has no built-in looping constructs. It <em>does</em> rely on
recursion for repetition. Smalltalk has no built-in branching constructs, and
relies on dynamic dispatch for selectively executing code.</p>
</aside>
<p>An <code>if</code> statement executes one of two statements based on some condition.</p>
<div class="codehilite"><pre><span class="k">if</span> (<span class="i">condition</span>) {
  <span class="k">print</span> <span class="s">&quot;yes&quot;</span>;
} <span class="k">else</span> {
  <span class="k">print</span> <span class="s">&quot;no&quot;</span>;
}
</pre></div>
<p>A <code>while</code> <span name="do">loop</span> executes the body repeatedly as long as
the condition expression evaluates to true.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>;
<span class="k">while</span> (<span class="i">a</span> &lt; <span class="n">10</span>) {
  <span class="k">print</span> <span class="i">a</span>;
  <span class="i">a</span> = <span class="i">a</span> + <span class="n">1</span>;
}
</pre></div>
<aside name="do">
<p>I left <code>do while</code> loops out of Lox because they aren&rsquo;t that common and wouldn&rsquo;t
teach you anything that you won&rsquo;t already learn from <code>while</code>. Go ahead and add
it to your implementation if it makes you happy. It&rsquo;s your party.</p>
</aside>
<p>Finally, we have <code>for</code> loops.</p>
<div class="codehilite"><pre><span class="k">for</span> (<span class="k">var</span> <span class="i">a</span> = <span class="n">1</span>; <span class="i">a</span> &lt; <span class="n">10</span>; <span class="i">a</span> = <span class="i">a</span> + <span class="n">1</span>) {
  <span class="k">print</span> <span class="i">a</span>;
}
</pre></div>
<p>This loop does the same thing as the previous <code>while</code> loop. Most modern
languages also have some sort of <span name="foreach"><code>for-in</code></span> or
<code>foreach</code> loop for explicitly iterating over various sequence types. In a real
language, that&rsquo;s nicer than the crude C-style <code>for</code> loop we got here. Lox keeps
it basic.</p>
<aside name="foreach">
<p>This is a concession I made because of how the implementation is split across
chapters. A <code>for-in</code> loop needs some sort of dynamic dispatch in the iterator
protocol to handle different kinds of sequences, but we don&rsquo;t get that until
after we&rsquo;re done with control flow. We could circle back and add <code>for-in</code> loops
later, but I didn&rsquo;t think doing so would teach you anything super interesting.</p>
</aside>
<h2><a href="#functions" id="functions"><small>3&#8202;.&#8202;8</small>Functions</a></h2>
<p>A function call expression looks the same as it does in C.</p>
<div class="codehilite"><pre><span class="i">makeBreakfast</span>(<span class="i">bacon</span>, <span class="i">eggs</span>, <span class="i">toast</span>);
</pre></div>
<p>You can also call a function without passing anything to it.</p>
<div class="codehilite"><pre><span class="i">makeBreakfast</span>();
</pre></div>
<p>Unlike in, say, Ruby, the parentheses are mandatory in this case. If you leave them
off, the name doesn&rsquo;t <em>call</em> the function, it just refers to it.</p>
<p>A language isn&rsquo;t very fun if you can&rsquo;t define your own functions. In Lox, you do
that with <span name="fun"><code>fun</code></span>.</p>
<aside name="fun">
<p>I&rsquo;ve seen languages that use <code>fn</code>, <code>fun</code>, <code>func</code>, and <code>function</code>. I&rsquo;m still
hoping to discover a <code>funct</code>, <code>functi</code>, or <code>functio</code> somewhere.</p>
</aside>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">printSum</span>(<span class="i">a</span>, <span class="i">b</span>) {
  <span class="k">print</span> <span class="i">a</span> + <span class="i">b</span>;
}
</pre></div>
<p>Now&rsquo;s a good time to clarify some <span name="define">terminology</span>. Some
people throw around &ldquo;parameter&rdquo; and &ldquo;argument&rdquo; like they are interchangeable
and, to many, they are. We&rsquo;re going to spend a lot of time splitting the finest
of downy hairs around semantics, so let&rsquo;s sharpen our words. From here on out:</p>
<ul>
<li>
<p>An <strong>argument</strong> is an actual value you pass to a function when you call it.
So a function <em>call</em> has an <em>argument</em> list. Sometimes you hear <strong>actual
parameter</strong> used for these.</p>
</li>
<li>
<p>A <strong>parameter</strong> is a variable that holds the value of the argument inside
the body of the function. Thus, a function <em>declaration</em> has a <em>parameter</em>
list. Others call these <strong>formal parameters</strong> or simply <strong>formals</strong>.</p>
</li>
</ul>
<aside name="define">
<p>Speaking of terminology, some statically typed languages like C make a
distinction between <em>declaring</em> a function and <em>defining</em> it. A declaration
binds the function&rsquo;s type to its name so that calls can be type-checked but does
not provide a body. A definition declares the function and also fills in the
body so that the function can be compiled.</p>
<p>Since Lox is dynamically typed, this distinction isn&rsquo;t meaningful. A function
declaration fully specifies the function including its body.</p>
</aside>
<p>The body of a function is always a block. Inside it, you can return a value
using a <code>return</code> statement.</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">returnSum</span>(<span class="i">a</span>, <span class="i">b</span>) {
  <span class="k">return</span> <span class="i">a</span> + <span class="i">b</span>;
}
</pre></div>
<p>If execution reaches the end of the block without hitting a <code>return</code>, it
<span name="sneaky">implicitly</span> returns <code>nil</code>.</p>
<aside name="sneaky">
<p>See, I told you <code>nil</code> would sneak in when we weren&rsquo;t looking.</p>
</aside>
<h3><a href="#closures" id="closures"><small>3&#8202;.&#8202;8&#8202;.&#8202;1</small>Closures</a></h3>
<p>Functions are <em>first class</em> in Lox, which just means they are real values that
you can get a reference to, store in variables, pass around, etc. This works:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">addPair</span>(<span class="i">a</span>, <span class="i">b</span>) {
  <span class="k">return</span> <span class="i">a</span> + <span class="i">b</span>;
}

<span class="k">fun</span> <span class="i">identity</span>(<span class="i">a</span>) {
  <span class="k">return</span> <span class="i">a</span>;
}

<span class="k">print</span> <span class="i">identity</span>(<span class="i">addPair</span>)(<span class="n">1</span>, <span class="n">2</span>); <span class="c">// Prints &quot;3&quot;.</span>
</pre></div>
<p>Since function declarations are statements, you can declare local functions
inside another function.</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">outerFunction</span>() {
  <span class="k">fun</span> <span class="i">localFunction</span>() {
    <span class="k">print</span> <span class="s">&quot;I&#39;m local!&quot;</span>;
  }

  <span class="i">localFunction</span>();
}
</pre></div>
<p>If you combine local functions, first-class functions, and block scope, you run
into this interesting situation:</p>
<div class="codehilite"><pre><span class="k">fun</span> <span class="i">returnFunction</span>() {
  <span class="k">var</span> <span class="i">outside</span> = <span class="s">&quot;outside&quot;</span>;

  <span class="k">fun</span> <span class="i">inner</span>() {
    <span class="k">print</span> <span class="i">outside</span>;
  }

  <span class="k">return</span> <span class="i">inner</span>;
}

<span class="k">var</span> <span class="i">fn</span> = <span class="i">returnFunction</span>();
<span class="i">fn</span>();
</pre></div>
<p>Here, <code>inner()</code> accesses a local variable declared outside of its body in the
surrounding function. Is this kosher? Now that lots of languages have borrowed
this feature from Lisp, you probably know the answer is yes.</p>
<p>For that to work, <code>inner()</code> has to &ldquo;hold on&rdquo; to references to any surrounding
variables that it uses so that they stay around even after the outer function
has returned. We call functions that do this <span
name="closure"><strong>closures</strong></span>. These days, the term is often used for <em>any</em>
first-class function, though it&rsquo;s sort of a misnomer if the function doesn&rsquo;t
happen to close over any variables.</p>
<aside name="closure">
<p>Peter J. Landin coined the term &ldquo;closure&rdquo;. Yes, he invented damn near half the
terms in programming languages. Most of them came out of one incredible paper,
&ldquo;<a href="https://homepages.inf.ed.ac.uk/wadler/papers/papers-we-love/landin-next-700.pdf">The Next 700 Programming Languages</a>&rdquo;.</p>
<p>In order to implement these kind of functions, you need to create a data
structure that bundles together the function&rsquo;s code and the surrounding
variables it needs. He called this a &ldquo;closure&rdquo; because it <em>closes over</em> and
holds on to the variables it needs.</p>
</aside>
<p>As you can imagine, implementing these adds some complexity because we can no
longer assume variable scope works strictly like a stack where local variables
evaporate the moment the function returns. We&rsquo;re going to have a fun time
learning how to make these work correctly and efficiently.</p>
<h2><a href="#classes" id="classes"><small>3&#8202;.&#8202;9</small>Classes</a></h2>
<p>Since Lox has dynamic typing, lexical (roughly, &ldquo;block&rdquo;) scope, and closures,
it&rsquo;s about halfway to being a functional language. But as you&rsquo;ll see, it&rsquo;s
<em>also</em> about halfway to being an object-oriented language. Both paradigms have a
lot going for them, so I thought it was worth covering some of each.</p>
<p>Since classes have come under fire for not living up to their hype, let me first
explain why I put them into Lox and this book. There are really two questions:</p>
<h3><a href="#why-might-any-language-want-to-be-object-oriented" id="why-might-any-language-want-to-be-object-oriented"><small>3&#8202;.&#8202;9&#8202;.&#8202;1</small>Why might any language want to be object oriented?</a></h3>
<p>Now that object-oriented languages like Java have sold out and only play arena
shows, it&rsquo;s not cool to like them anymore. Why would anyone make a <em>new</em>
language with objects? Isn&rsquo;t that like releasing music on 8-track?</p>
<p>It is true that the &ldquo;all inheritance all the time&rdquo; binge of the &rsquo;90s produced
some monstrous class hierarchies, but <strong>object-oriented programming</strong> (<strong>OOP</strong>)
is still pretty rad. Billions of lines of successful code have been written in
OOP languages, shipping millions of apps to happy users. Likely a majority of
working programmers today are using an object-oriented language. They can&rsquo;t all
be <em>that</em> wrong.</p>
<p>In particular, for a dynamically typed language, objects are pretty handy. We
need <em>some</em> way of defining compound data types to bundle blobs of stuff
together.</p>
<p>If we can also hang methods off of those, then we avoid the need to prefix all
of our functions with the name of the data type they operate on to avoid
colliding with similar functions for different types. In, say, Racket, you end
up having to name your functions like <code>hash-copy</code> (to copy a hash table) and
<code>vector-copy</code> (to copy a vector) so that they don&rsquo;t step on each other. Methods
are scoped to the object, so that problem goes away.</p>
<h3><a href="#why-is-lox-object-oriented" id="why-is-lox-object-oriented"><small>3&#8202;.&#8202;9&#8202;.&#8202;2</small>Why is Lox object oriented?</a></h3>
<p>I could claim objects are groovy but still out of scope for the book. Most
programming language books, especially ones that try to implement a whole
language, leave objects out. To me, that means the topic isn&rsquo;t well covered.
With such a widespread paradigm, that omission makes me sad.</p>
<p>Given how many of us spend all day <em>using</em> OOP languages, it seems like the
world could use a little documentation on how to <em>make</em> one. As you&rsquo;ll see, it
turns out to be pretty interesting. Not as hard as you might fear, but not as
simple as you might presume, either.</p>
<h3><a href="#classes-or-prototypes" id="classes-or-prototypes"><small>3&#8202;.&#8202;9&#8202;.&#8202;3</small>Classes or prototypes</a></h3>
<p>When it comes to objects, there are actually two approaches to them, <a href="https://en.wikipedia.org/wiki/Class-based_programming">classes</a>
and <a href="https://en.wikipedia.org/wiki/Prototype-based_programming">prototypes</a>. Classes came first, and are more common thanks to C++, Java,
C#, and friends. Prototypes were a virtually forgotten offshoot until JavaScript
accidentally took over the world.</p>
<p>In class-based languages, there are two core concepts: instances and classes.
Instances store the state for each object and have a reference to the instance&rsquo;s
class. Classes contain the methods and inheritance chain. To call a method on an
instance, there is always a level of indirection. You <span
name="dispatch">look</span> up the instance&rsquo;s class and then you find the method
<em>there</em>:</p>
<aside name="dispatch">
<p>In a statically typed language like C++, method lookup typically happens at
compile time based on the <em>static</em> type of the instance, giving you <strong>static
dispatch</strong>. In contrast, <strong>dynamic dispatch</strong> looks up the class of the actual
instance object at runtime. This is how virtual methods in statically typed
languages and all methods in a dynamically typed language like Lox work.</p>
</aside><img src="image/the-lox-language/class-lookup.png" alt="How fields and methods are looked up on classes and instances" />
<p>Prototype-based languages <span name="blurry">merge</span> these two concepts.
There are only objects<span class="em">&mdash;</span>no classes<span class="em">&mdash;</span>and each individual object may contain
state and methods. Objects can directly inherit from each other (or &ldquo;delegate
to&rdquo; in prototypal lingo):</p>
<aside name="blurry">
<p>In practice the line between class-based and prototype-based languages blurs.
JavaScript&rsquo;s &ldquo;constructor function&rdquo; notion <a href="http://gameprogrammingpatterns.com/prototype.html#what-about-javascript">pushes you pretty hard</a>
towards defining class-like objects. Meanwhile, class-based Ruby is perfectly
happy to let you attach methods to individual instances.</p>
</aside><img src="image/the-lox-language/prototype-lookup.png" alt="How fields and methods are looked up in a prototypal system" />
<p>This means that in some ways prototypal languages are more fundamental than
classes. They are really neat to implement because they&rsquo;re <em>so</em> simple. Also,
they can express lots of unusual patterns that classes steer you away from.</p>
<p>But I&rsquo;ve looked at a <em>lot</em> of code written in prototypal languages<span class="em">&mdash;</span>including
<a href="http://finch.stuffwithstuff.com/">some of my own devising</a>. Do you know what people generally do with all
of the power and flexibility of prototypes? <span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>They use them to reinvent
classes.</p>
<p>I don&rsquo;t know <em>why</em> that is, but people naturally seem to prefer a class-based
(Classic? Classy?) style. Prototypes <em>are</em> simpler in the language, but they
seem to accomplish that only by <span name="waterbed">pushing</span> the
complexity onto the user. So, for Lox, we&rsquo;ll save our users the trouble and bake
classes right in.</p>
<aside name="waterbed">
<p>Larry Wall, Perl&rsquo;s inventor/prophet calls this the &ldquo;<a href="http://wiki.c2.com/?WaterbedTheory">waterbed theory</a>&rdquo;. Some
complexity is essential and cannot be eliminated. If you push it down in one
place, it swells up in another.</p>
<p>Prototypal languages don&rsquo;t so much <em>eliminate</em> the complexity of classes as they
do make the <em>user</em> take that complexity by building their own class-like
metaprogramming libraries.</p>
</aside>
<h3><a href="#classes-in-lox" id="classes-in-lox"><small>3&#8202;.&#8202;9&#8202;.&#8202;4</small>Classes in Lox</a></h3>
<p>Enough rationale, let&rsquo;s see what we actually have. Classes encompass a
constellation of features in most languages. For Lox, I&rsquo;ve selected what I think
are the brightest stars. You declare a class and its methods like so:</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Breakfast</span> {
  <span class="i">cook</span>() {
    <span class="k">print</span> <span class="s">&quot;Eggs a-fryin&#39;!&quot;</span>;
  }

  <span class="i">serve</span>(<span class="i">who</span>) {
    <span class="k">print</span> <span class="s">&quot;Enjoy your breakfast, &quot;</span> + <span class="i">who</span> + <span class="s">&quot;.&quot;</span>;
  }
}
</pre></div>
<p>The body of a class contains its methods. They look like function declarations
but without the <code>fun</code> <span name="method">keyword</span>. When the class
declaration is executed, Lox creates a class object and stores that in a
variable named after the class. Just like functions, classes are first class in
Lox.</p>
<aside name="method">
<p>They are still just as fun, though.</p>
</aside>
<div class="codehilite"><pre><span class="c">// Store it in variables.</span>
<span class="k">var</span> <span class="i">someVariable</span> = <span class="t">Breakfast</span>;

<span class="c">// Pass it to functions.</span>
<span class="i">someFunction</span>(<span class="t">Breakfast</span>);
</pre></div>
<p>Next, we need a way to create instances. We could add some sort of <code>new</code>
keyword, but to keep things simple, in Lox the class itself is a factory
function for instances. Call a class like a function, and it produces a new
instance of itself.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">breakfast</span> = <span class="t">Breakfast</span>();
<span class="k">print</span> <span class="i">breakfast</span>; <span class="c">// &quot;Breakfast instance&quot;.</span>
</pre></div>
<h3><a href="#instantiation-and-initialization" id="instantiation-and-initialization"><small>3&#8202;.&#8202;9&#8202;.&#8202;5</small>Instantiation and initialization</a></h3>
<p>Classes that only have behavior aren&rsquo;t super useful. The idea behind
object-oriented programming is encapsulating behavior <em>and state</em> together. To
do that, you need fields. Lox, like other dynamically typed languages, lets you
freely add properties onto objects.</p>
<div class="codehilite"><pre><span class="i">breakfast</span>.<span class="i">meat</span> = <span class="s">&quot;sausage&quot;</span>;
<span class="i">breakfast</span>.<span class="i">bread</span> = <span class="s">&quot;sourdough&quot;</span>;
</pre></div>
<p>Assigning to a field creates it if it doesn&rsquo;t already exist.</p>
<p>If you want to access a field or method on the current object from within a
method, you use good old <code>this</code>.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Breakfast</span> {
  <span class="i">serve</span>(<span class="i">who</span>) {
    <span class="k">print</span> <span class="s">&quot;Enjoy your &quot;</span> + <span class="k">this</span>.<span class="i">meat</span> + <span class="s">&quot; and &quot;</span> +
        <span class="k">this</span>.<span class="i">bread</span> + <span class="s">&quot;, &quot;</span> + <span class="i">who</span> + <span class="s">&quot;.&quot;</span>;
  }

  <span class="c">// ...</span>
}
</pre></div>
<p>Part of encapsulating data within an object is ensuring the object is in a valid
state when it&rsquo;s created. To do that, you can define an initializer. If your
class has a method named <code>init()</code>, it is called automatically when the object is
constructed. Any parameters passed to the class are forwarded to its
initializer.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Breakfast</span> {
  <span class="i">init</span>(<span class="i">meat</span>, <span class="i">bread</span>) {
    <span class="k">this</span>.<span class="i">meat</span> = <span class="i">meat</span>;
    <span class="k">this</span>.<span class="i">bread</span> = <span class="i">bread</span>;
  }

  <span class="c">// ...</span>
}

<span class="k">var</span> <span class="i">baconAndToast</span> = <span class="t">Breakfast</span>(<span class="s">&quot;bacon&quot;</span>, <span class="s">&quot;toast&quot;</span>);
<span class="i">baconAndToast</span>.<span class="i">serve</span>(<span class="s">&quot;Dear Reader&quot;</span>);
<span class="c">// &quot;Enjoy your bacon and toast, Dear Reader.&quot;</span>
</pre></div>
<h3><a href="#inheritance" id="inheritance"><small>3&#8202;.&#8202;9&#8202;.&#8202;6</small>Inheritance</a></h3>
<p>Every object-oriented language lets you not only define methods, but reuse them
across multiple classes or objects. For that, Lox supports single inheritance.
When you declare a class, you can specify a class that it inherits from using a less-than
<span name="less">(<code>&lt;</code>)</span> operator.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Brunch</span> &lt; <span class="t">Breakfast</span> {
  <span class="i">drink</span>() {
    <span class="k">print</span> <span class="s">&quot;How about a Bloody Mary?&quot;</span>;
  }
}
</pre></div>
<aside name="less">
<p>Why the <code>&lt;</code> operator? I didn&rsquo;t feel like introducing a new keyword like
<code>extends</code>. Lox doesn&rsquo;t use <code>:</code> for anything else so I didn&rsquo;t want to reserve
that either. Instead, I took a page from Ruby and used <code>&lt;</code>.</p>
<p>If you know any type theory, you&rsquo;ll notice it&rsquo;s not a <em>totally</em> arbitrary
choice. Every instance of a subclass is an instance of its superclass too, but
there may be instances of the superclass that are not instances of the subclass.
That means, in the universe of objects, the set of subclass objects is smaller
than the superclass&rsquo;s set, though type nerds usually use <code>&lt;:</code> for that relation.</p>
</aside>
<p>Here, Brunch is the <strong>derived class</strong> or <strong>subclass</strong>, and Breakfast is the
<strong>base class</strong> or <strong>superclass</strong>.</p>
<p>Every method defined in the superclass is also available to its subclasses.</p>
<div class="codehilite"><pre><span class="k">var</span> <span class="i">benedict</span> = <span class="t">Brunch</span>(<span class="s">&quot;ham&quot;</span>, <span class="s">&quot;English muffin&quot;</span>);
<span class="i">benedict</span>.<span class="i">serve</span>(<span class="s">&quot;Noble Reader&quot;</span>);
</pre></div>
<p>Even the <code>init()</code> method gets <span name="init">inherited</span>. In practice,
the subclass usually wants to define its own <code>init()</code> method too. But the
original one also needs to be called so that the superclass can maintain its
state. We need some way to call a method on our own <em>instance</em> without hitting
our own <em>methods</em>.</p>
<aside name="init">
<p>Lox is different from C++, Java, and C#, which do not inherit constructors, but
similar to Smalltalk and Ruby, which do.</p>
</aside>
<p>As in Java, you use <code>super</code> for that.</p>
<div class="codehilite"><pre><span class="k">class</span> <span class="t">Brunch</span> &lt; <span class="t">Breakfast</span> {
  <span class="i">init</span>(<span class="i">meat</span>, <span class="i">bread</span>, <span class="i">drink</span>) {
    <span class="k">super</span>.<span class="i">init</span>(<span class="i">meat</span>, <span class="i">bread</span>);
    <span class="k">this</span>.<span class="i">drink</span> = <span class="i">drink</span>;
  }
}
</pre></div>
<p>That&rsquo;s about it for object orientation. I tried to keep the feature set minimal.
The structure of the book did force one compromise. Lox is not a <em>pure</em>
object-oriented language. In a true OOP language every object is an instance of
a class, even primitive values like numbers and Booleans.</p>
<p>Because we don&rsquo;t implement classes until well after we start working with the
built-in types, that would have been hard. So values of primitive types aren&rsquo;t
real objects in the sense of being instances of classes. They don&rsquo;t have methods
or properties. If I were trying to make Lox a real language for real users, I
would fix that.</p>
<h2><a href="#the-standard-library" id="the-standard-library"><small>3&#8202;.&#8202;10</small>The Standard Library</a></h2>
<p>We&rsquo;re almost done. That&rsquo;s the whole language, so all that&rsquo;s left is the &ldquo;core&rdquo;
or &ldquo;standard&rdquo; library<span class="em">&mdash;</span>the set of functionality that is implemented directly
in the interpreter and that all user-defined behavior is built on top of.</p>
<p>This is the saddest part of Lox. Its standard library goes beyond minimalism and
veers close to outright nihilism. For the sample code in the book, we only need
to demonstrate that code is running and doing what it&rsquo;s supposed to do. For
that, we already have the built-in <code>print</code> statement.</p>
<p>Later, when we start optimizing, we&rsquo;ll write some benchmarks and see how long it
takes to execute code. That means we need to track time, so we&rsquo;ll define one
built-in function, <code>clock()</code>, that returns the number of seconds since the
program started.</p>
<p>And<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>that&rsquo;s it. I know, right? It&rsquo;s embarrassing.</p>
<p>If you wanted to turn Lox into an actual useful language, the very first thing
you should do is flesh this out. String manipulation, trigonometric functions,
file I/O, networking, heck, even <em>reading input from the user</em> would help. But we
don&rsquo;t need any of that for this book, and adding it wouldn&rsquo;t teach you anything
interesting, so I&rsquo;ve left it out.</p>
<p>Don&rsquo;t worry, we&rsquo;ll have plenty of exciting stuff in the language itself to keep
us busy.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>Write some sample Lox programs and run them (you can use the implementations
of Lox in <a href="https://github.com/munificent/craftinginterpreters">my repository</a>). Try to come up with edge case behavior I
didn&rsquo;t specify here. Does it do what you expect? Why or why not?</p>
</li>
<li>
<p>This informal introduction leaves a <em>lot</em> unspecified. List several open
questions you have about the language&rsquo;s syntax and semantics. What do you
think the answers should be?</p>
</li>
<li>
<p>Lox is a pretty tiny language. What features do you think it is missing that
would make it annoying to use for real programs? (Aside from the standard
library, of course.)</p>
</li>
</ol>
</div>
<div class="design-note">
<h2><a href="#design-note" id="design-note">Design Note: Expressions and Statements</a></h2>
<p>Lox has both expressions and statements. Some languages omit the latter.
Instead, they treat declarations and control flow constructs as expressions too.
These &ldquo;everything is an expression&rdquo; languages tend to have functional pedigrees
and include most Lisps, SML, Haskell, Ruby, and CoffeeScript.</p>
<p>To do that, for each &ldquo;statement-like&rdquo; construct in the language, you need to
decide what value it evaluates to. Some of those are easy:</p>
<ul>
<li>
<p>An <code>if</code> expression evaluates to the result of whichever branch is chosen.
Likewise, a <code>switch</code> or other multi-way branch evaluates to whichever case
is picked.</p>
</li>
<li>
<p>A variable declaration evaluates to the value of the variable.</p>
</li>
<li>
<p>A block evaluates to the result of the last expression in the sequence.</p>
</li>
</ul>
<p>Some get a little stranger. What should a loop evaluate to? A <code>while</code> loop in
CoffeeScript evaluates to an array containing each element that the body
evaluated to. That can be handy, or a waste of memory if you don&rsquo;t need the
array.</p>
<p>You also have to decide how these statement-like expressions compose with other
expressions<span class="em">&mdash;</span>you have to fit them into the grammar&rsquo;s precedence table. For
example, Ruby allows:</p>
<div class="codehilite"><pre><span class="i">puts</span> <span class="n">1</span> + <span class="k">if</span> <span class="k">true</span> <span class="k">then</span> <span class="n">2</span> <span class="k">else</span> <span class="n">3</span> <span class="k">end</span> + <span class="n">4</span>
</pre></div>
<p>Is this what you&rsquo;d expect? Is it what your <em>users</em> expect? How does this affect
how you design the syntax for your &ldquo;statements&rdquo;? Note that Ruby has an explicit
<code>end</code> to tell when the <code>if</code> expression is complete. Without it, the <code>+ 4</code> would
likely be parsed as part of the <code>else</code> clause.</p>
<p>Turning every statement into an expression forces you to answer a few hairy
questions like that. In return, you eliminate some redundancy. C has both blocks
for sequencing statements, and the comma operator for sequencing expressions. It
has both the <code>if</code> statement and the <code>?:</code> conditional operator. If everything was
an expression in C, you could unify each of those.</p>
<p>Languages that do away with statements usually also feature <strong>implicit returns</strong><span class="em">&mdash;</span>a function automatically returns whatever value its body evaluates to without
need for some explicit <code>return</code> syntax. For small functions and methods, this is
really handy. In fact, many languages that do have statements have added syntax
like <code>=&gt;</code> to be able to define functions whose body is the result of evaluating
a single expression.</p>
<p>But making <em>all</em> functions work that way can be a little strange. If you aren&rsquo;t
careful, your function will leak a return value even if you only intend it to
produce a side effect. In practice, though, users of these languages don&rsquo;t find
it to be a problem.</p>
<p>For Lox, I gave it statements for prosaic reasons. I picked a C-like syntax for
familiarity&rsquo;s sake, and trying to take the existing C statement syntax and
interpret it like expressions gets weird pretty fast.</p>
</div>

<footer>
<a href="a-tree-walk-interpreter.html" class="next">
  Next Part: &ldquo;A Tree-Walk Interpreter&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/types-of-values.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Types of Values &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h3><a href="#top">Types of Values<small>18</small></a></h3>

<ul>
    <li><a href="#tagged-unions"><small>18.1</small> Tagged Unions</a></li>
    <li><a href="#lox-values-and-c-values"><small>18.2</small> Lox Values and C Values</a></li>
    <li><a href="#dynamically-typed-numbers"><small>18.3</small> Dynamically Typed Numbers</a></li>
    <li><a href="#two-new-types"><small>18.4</small> Two New Types</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="compiling-expressions.html" title="Compiling Expressions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="strings.html" title="Strings" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="compiling-expressions.html" title="Compiling Expressions" class="prev">←</a>
<a href="strings.html" title="Strings" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h3><a href="#top">Types of Values<small>18</small></a></h3>

<ul>
    <li><a href="#tagged-unions"><small>18.1</small> Tagged Unions</a></li>
    <li><a href="#lox-values-and-c-values"><small>18.2</small> Lox Values and C Values</a></li>
    <li><a href="#dynamically-typed-numbers"><small>18.3</small> Dynamically Typed Numbers</a></li>
    <li><a href="#two-new-types"><small>18.4</small> Two New Types</a></li>
    <li class="divider"></li>
    <li class="end-part"><a href="#challenges">Challenges</a></li>
</ul>


<div class="prev-next">
    <a href="compiling-expressions.html" title="Compiling Expressions" class="left">&larr;&nbsp;Previous</a>
    <a href="a-bytecode-virtual-machine.html" title="A Bytecode Virtual Machine">&uarr;&nbsp;Up</a>
    <a href="strings.html" title="Strings" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">18</div>
  <h1>Types of Values</h1>

<blockquote>
<p>When you are a Bear of Very Little Brain, and you Think of Things, you find
sometimes that a Thing which seemed very Thingish inside you is quite
different when it gets out into the open and has other people looking at it.</p>
<p><cite>A. A. Milne, <em>Winnie-the-Pooh</em></cite></p>
</blockquote>
<p>The past few chapters were huge, packed full of complex techniques and pages of
code. In this chapter, there&rsquo;s only one new concept to learn and a scattering of
straightforward code. You&rsquo;ve earned a respite.</p>
<p>Lox is <span name="unityped">dynamically</span> typed. A single variable can
hold a Boolean, number, or string at different points in time. At least, that&rsquo;s
the idea. Right now, in clox, all values are numbers. By the end of the chapter,
it will also support Booleans and <code>nil</code>. While those aren&rsquo;t super interesting,
they force us to figure out how our value representation can dynamically handle
different types.</p>
<aside name="unityped">
<p>There is a third category next to statically typed and dynamically typed:
<strong>unityped</strong>. In that paradigm, all variables have a single type, usually a
machine register integer. Unityped languages aren&rsquo;t common today, but some
Forths and BCPL, the language that inspired C, worked like this.</p>
<p>As of this moment, clox is unityped.</p>
</aside>
<h2><a href="#tagged-unions" id="tagged-unions"><small>18&#8202;.&#8202;1</small>Tagged Unions</a></h2>
<p>The nice thing about working in C is that we can build our data structures from
the raw bits up. The bad thing is that we <em>have</em> to do that. C doesn&rsquo;t give you
much for free at compile time and even less at runtime. As far as C is
concerned, the universe is an undifferentiated array of bytes. It&rsquo;s up to us to
decide how many of those bytes to use and what they mean.</p>
<p>In order to choose a value representation, we need to answer two key questions:</p>
<ol>
<li>
<p><strong>How do we represent the type of a value?</strong> If you try to, say, multiply a
number by <code>true</code>, we need to detect that error at runtime and report it. In
order to do that, we need to be able to tell what a value&rsquo;s type is.</p>
</li>
<li>
<p><strong>How do we store the value itself?</strong> We need to not only be able to tell
that three is a number, but that it&rsquo;s different from the number four. I
know, seems obvious, right? But we&rsquo;re operating at a level where it&rsquo;s good
to spell these things out.</p>
</li>
</ol>
<p>Since we&rsquo;re not just designing this language but building it ourselves, when
answering these two questions we also have to keep in mind the implementer&rsquo;s
eternal quest: to do it <em>efficiently</em>.</p>
<p>Language hackers over the years have come up with a variety of clever ways to
pack the above information into as few bits as possible. For now, we&rsquo;ll start
with the simplest, classic solution: a <strong>tagged union</strong>. A value contains two
parts: a type &ldquo;tag&rdquo;, and a payload for the actual value. To store the value&rsquo;s
type, we define an enum for each kind of value the VM supports.</p>
<div class="codehilite"><pre class="insert-before">#include &quot;common.h&quot;

</pre><div class="source-file"><em>value.h</em></div>
<pre class="insert"><span class="k">typedef</span> <span class="k">enum</span> {
  <span class="a">VAL_BOOL</span>,
  <span class="a">VAL_NIL</span>,<span name="user-types"> </span>
  <span class="a">VAL_NUMBER</span>,
} <span class="t">ValueType</span>;

</pre><pre class="insert-after">typedef double Value;
</pre></div>
<div class="source-file-narrow"><em>value.h</em></div>

<aside name="user-types">
<p>The cases here cover each kind of value that has <em>built-in support in the VM</em>.
When we get to adding classes to the language, each class the user defines
doesn&rsquo;t need its own entry in this enum. As far as the VM is concerned, every
instance of a class is the same type: &ldquo;instance&rdquo;.</p>
<p>In other words, this is the VM&rsquo;s notion of &ldquo;type&rdquo;, not the user&rsquo;s.</p>
</aside>
<p>For now, we have only a couple of cases, but this will grow as we add strings,
functions, and classes to clox. In addition to the type, we also need to store
the data for the value<span class="em">&mdash;</span>the <code>double</code> for a number, <code>true</code> or <code>false</code> for a
Boolean. We could define a struct with fields for each possible type.</p><img src="image/types-of-values/struct.png" alt="A struct with two fields laid next to each other in memory." />
<p>But this is a waste of memory. A value can&rsquo;t simultaneously be both a number and
a Boolean. So at any point in time, only one of those fields will be used. C
lets you optimize this by defining a <span name="sum">union</span>. A union
looks like a struct except that all of its fields overlap in memory.</p>
<aside name="sum">
<p>If you&rsquo;re familiar with a language in the ML family, structs and unions in C
roughly mirror the difference between product and sum types, between tuples
and algebraic data types.</p>
</aside><img src="image/types-of-values/union.png" alt="A union with two fields overlapping in memory." />
<p>The size of a union is the size of its largest field. Since the fields all reuse
the same bits, you have to be very careful when working with them. If you store
data using one field and then access it using <span
name="reinterpret">another</span>, you will reinterpret what the underlying bits
mean.</p>
<aside name="reinterpret">
<p>Using a union to interpret bits as different types is the quintessence of C. It
opens up a number of clever optimizations and lets you slice and dice each byte
of memory in ways that memory-safe languages disallow. But it is also wildly
unsafe and will happily saw your fingers off if you don&rsquo;t watch out.</p>
</aside>
<p>As the name &ldquo;tagged union&rdquo; implies, our new value representation combines these
two parts into a single struct.</p>
<div class="codehilite"><pre class="insert-before">} ValueType;

</pre><div class="source-file"><em>value.h</em><br>
add after enum <em>ValueType</em><br>
replace 1 line</div>
<pre class="insert"><span class="k">typedef</span> <span class="k">struct</span> {
  <span class="t">ValueType</span> <span class="i">type</span>;
  <span class="k">union</span> {
    <span class="t">bool</span> <span class="i">boolean</span>;
    <span class="t">double</span> <span class="i">number</span>;
  } <span class="i">as</span>;<span name="as"> </span>
} <span class="t">Value</span>;
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after enum <em>ValueType</em>, replace 1 line</div>

<p>There&rsquo;s a field for the type tag, and then a second field containing the union
of all of the underlying values. On a 64-bit machine with a typical C compiler,
the layout looks like this:</p>
<aside name="as">
<p>A smart language hacker gave me the idea to use &ldquo;as&rdquo; for the name of the union
field because it reads nicely, almost like a cast, when you pull the various
values out.</p>
</aside><img src="image/types-of-values/value.png" alt="The full value struct, with the type and as fields next to each other in memory." />
<p>The four-byte type tag comes first, then the union. Most architectures prefer
values be aligned to their size. Since the union field contains an eight-byte
double, the compiler adds four bytes of <span name="pad">padding</span> after
the type field to keep that double on the nearest eight-byte boundary. That
means we&rsquo;re effectively spending eight bytes on the type tag, which only needs
to represent a number between zero and three. We could stuff the enum in a
smaller size, but all that would do is increase the padding.</p>
<aside name="pad">
<p>We could move the tag field <em>after</em> the union, but that doesn&rsquo;t help much
either. Whenever we create an array of Values<span class="em">&mdash;</span>which is where most of our
memory usage for Values will be<span class="em">&mdash;</span>the C compiler will insert that same padding
<em>between</em> each Value to keep the doubles aligned.</p>
</aside>
<p>So our Values are 16 bytes, which seems a little large. We&rsquo;ll improve it
<a href="optimization.html">later</a>. In the meantime, they&rsquo;re still small enough to store on
the C stack and pass around by value. Lox&rsquo;s semantics allow that because the
only types we support so far are <strong>immutable</strong>. If we pass a copy of a Value
containing the number three to some function, we don&rsquo;t need to worry about the
caller seeing modifications to the value. You can&rsquo;t &ldquo;modify&rdquo; three. It&rsquo;s three
forever.</p>
<h2><a href="#lox-values-and-c-values" id="lox-values-and-c-values"><small>18&#8202;.&#8202;2</small>Lox Values and C Values</a></h2>
<p>That&rsquo;s our new value representation, but we aren&rsquo;t done. Right now, the rest of
clox assumes Value is an alias for <code>double</code>. We have code that does a straight C
cast from one to the other. That code is all broken now. So sad.</p>
<p>With our new representation, a Value can <em>contain</em> a double, but it&rsquo;s not
<em>equivalent</em> to it. There is a mandatory conversion step to get from one to the
other. We need to go through the code and insert those conversions to get clox
working again.</p>
<p>We&rsquo;ll implement these conversions as a handful of macros, one for each type and
operation. First, to promote a native C value to a clox Value:</p>
<div class="codehilite"><pre class="insert-before">} Value;
</pre><div class="source-file"><em>value.h</em><br>
add after struct <em>Value</em></div>
<pre class="insert">

<span class="a">#define BOOL_VAL(value)   ((Value){VAL_BOOL, {.boolean = value}})</span>
<span class="a">#define NIL_VAL           ((Value){VAL_NIL, {.number = 0}})</span>
<span class="a">#define NUMBER_VAL(value) ((Value){VAL_NUMBER, {.number = value}})</span>
</pre><pre class="insert-after">

typedef struct {
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after struct <em>Value</em></div>

<p>Each one of these takes a C value of the appropriate type and produces a Value
that has the correct type tag and contains the underlying value. This hoists
statically typed values up into clox&rsquo;s dynamically typed universe. In order to
<em>do</em> anything with a Value, though, we need to unpack it and get the C value
back out.</p>
<div class="codehilite"><pre class="insert-before">} Value;
</pre><div class="source-file"><em>value.h</em><br>
add after struct <em>Value</em></div>
<pre class="insert">

<span class="a">#define AS_BOOL(value)    ((value).as.boolean)</span>
<span class="a">#define AS_NUMBER(value)  ((value).as.number)</span>
</pre><pre class="insert-after">

#define BOOL_VAL(value)   ((Value){VAL_BOOL, {.boolean = value}})
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after struct <em>Value</em></div>

<aside name="as-null">
<p>There&rsquo;s no <code>AS_NIL</code> macro because there is only one <code>nil</code> value, so a Value with
type <code>VAL_NIL</code> doesn&rsquo;t carry any extra data.</p>
</aside>
<p><span name="as-null">These</span> macros go in the opposite direction. Given a
Value of the right type, they unwrap it and return the corresponding raw C
value. The &ldquo;right type&rdquo; part is important! These macros directly access the
union fields. If we were to do something like:</p>
<div class="codehilite"><pre><span class="t">Value</span> <span class="i">value</span> = <span class="a">BOOL_VAL</span>(<span class="k">true</span>);
<span class="t">double</span> <span class="i">number</span> = <span class="a">AS_NUMBER</span>(<span class="i">value</span>);
</pre></div>
<p>Then we may open a smoldering portal to the Shadow Realm. It&rsquo;s not safe to use
any of the <code>AS_</code> macros unless we know the Value contains the appropriate type.
To that end, we define a last few macros to check a Value&rsquo;s type.</p>
<div class="codehilite"><pre class="insert-before">} Value;
</pre><div class="source-file"><em>value.h</em><br>
add after struct <em>Value</em></div>
<pre class="insert">

<span class="a">#define IS_BOOL(value)    ((value).type == VAL_BOOL)</span>
<span class="a">#define IS_NIL(value)     ((value).type == VAL_NIL)</span>
<span class="a">#define IS_NUMBER(value)  ((value).type == VAL_NUMBER)</span>
</pre><pre class="insert-after">

#define AS_BOOL(value)    ((value).as.boolean)
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after struct <em>Value</em></div>

<p><span name="universe">These</span> macros return <code>true</code> if the Value has that
type. Any time we call one of the <code>AS_</code> macros, we need to guard it behind a
call to one of these first. With these eight macros, we can now safely shuttle
data between Lox&rsquo;s dynamic world and C&rsquo;s static one.</p>
<aside name="universe"><img src="image/types-of-values/universe.png" alt="The earthly C firmament with the Lox heavens above." />
<p>The <code>_VAL</code> macros lift a C value into the heavens. The <code>AS_</code> macros bring it
back down.</p>
</aside>
<h2><a href="#dynamically-typed-numbers" id="dynamically-typed-numbers"><small>18&#8202;.&#8202;3</small>Dynamically Typed Numbers</a></h2>
<p>We&rsquo;ve got our value representation and the tools to convert to and from it. All
that&rsquo;s left to get clox running again is to grind through the code and fix every
place where data moves across that boundary. This is one of those sections of
the book that isn&rsquo;t exactly mind-blowing, but I promised I&rsquo;d show you every
single line of code, so here we are.</p>
<p>The first values we create are the constants generated when we compile number
literals. After we convert the lexeme to a C double, we simply wrap it in a
Value before storing it in the constant table.</p>
<div class="codehilite"><pre class="insert-before">  double value = strtod(parser.previous.start, NULL);
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>number</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="i">emitConstant</span>(<span class="a">NUMBER_VAL</span>(<span class="i">value</span>));
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>number</em>(), replace 1 line</div>

<p>Over in the runtime, we have a function to print values.</p>
<div class="codehilite"><pre class="insert-before">void printValue(Value value) {
</pre><div class="source-file"><em>value.c</em><br>
in <em>printValue</em>()<br>
replace 1 line</div>
<pre class="insert"> <span class="i">printf</span>(<span class="s">&quot;%g&quot;</span>, <span class="a">AS_NUMBER</span>(<span class="i">value</span>));
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>printValue</em>(), replace 1 line</div>

<p>Right before we send the Value to <code>printf()</code>, we unwrap it and extract the
double value. We&rsquo;ll revisit this function shortly to add the other types, but
let&rsquo;s get our existing code working first.</p>
<h3><a href="#unary-negation-and-runtime-errors" id="unary-negation-and-runtime-errors"><small>18&#8202;.&#8202;3&#8202;.&#8202;1</small>Unary negation and runtime errors</a></h3>
<p>The next simplest operation is unary negation. It pops a value off the stack,
negates it, and pushes the result. Now that we have other types of values, we
can&rsquo;t assume the operand is a number anymore. The user could just as well do:</p>
<div class="codehilite"><pre><span class="k">print</span> -<span class="k">false</span>; <span class="c">// Uh...</span>
</pre></div>
<p>We need to handle that gracefully, which means it&rsquo;s time for <em>runtime errors</em>.
Before performing an operation that requires a certain type, we need to make
sure the Value <em>is</em> that type.</p>
<p>For unary negation, the check looks like this:</p>
<div class="codehilite"><pre class="insert-before">      case OP_DIVIDE:   BINARY_OP(/); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 1 line</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_NEGATE</span>:
        <span class="k">if</span> (!<span class="a">IS_NUMBER</span>(<span class="i">peek</span>(<span class="n">0</span>))) {
          <span class="i">runtimeError</span>(<span class="s">&quot;Operand must be a number.&quot;</span>);
          <span class="k">return</span> <span class="a">INTERPRET_RUNTIME_ERROR</span>;
        }
        <span class="i">push</span>(<span class="a">NUMBER_VAL</span>(-<span class="a">AS_NUMBER</span>(<span class="i">pop</span>())));
        <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_RETURN: {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 1 line</div>

<p>First, we check to see if the Value on top of the stack is a number. If it&rsquo;s
not, we report the runtime error and <span name="halt">stop</span> the
interpreter. Otherwise, we keep going. Only after this validation do we unwrap
the operand, negate it, wrap the result and push it.</p>
<aside name="halt">
<p>Lox&rsquo;s approach to error-handling is rather<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span><em>spare</em>. All errors are fatal and
immediately halt the interpreter. There&rsquo;s no way for user code to recover from
an error. If Lox were a real language, this is one of the first things I would
remedy.</p>
</aside>
<p>To access the Value, we use a new little function.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>pop</em>()</div>
<pre><span class="k">static</span> <span class="t">Value</span> <span class="i">peek</span>(<span class="t">int</span> <span class="i">distance</span>) {
  <span class="k">return</span> <span class="i">vm</span>.<span class="i">stackTop</span>[-<span class="n">1</span> - <span class="i">distance</span>];
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>pop</em>()</div>

<p>It returns a Value from the stack but doesn&rsquo;t <span name="peek">pop</span> it.
The <code>distance</code> argument is how far down from the top of the stack to look: zero
is the top, one is one slot down, etc.</p>
<aside name="peek">
<p>Why not just pop the operand and then validate it? We could do that. In later
chapters, it will be important to leave operands on the stack to ensure the
garbage collector can find them if a collection is triggered in the middle of
the operation. I do the same thing here mostly out of habit.</p>
</aside>
<p>We report the runtime error using a new function that we&rsquo;ll get a lot of mileage
out of over the remainder of the book.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>resetStack</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">runtimeError</span>(<span class="k">const</span> <span class="t">char</span>* <span class="i">format</span>, ...) {
  <span class="t">va_list</span> <span class="i">args</span>;
  <span class="i">va_start</span>(<span class="i">args</span>, <span class="i">format</span>);
  <span class="i">vfprintf</span>(<span class="i">stderr</span>, <span class="i">format</span>, <span class="i">args</span>);
  <span class="i">va_end</span>(<span class="i">args</span>);
  <span class="i">fputs</span>(<span class="s">&quot;</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">stderr</span>);

  <span class="t">size_t</span> <span class="i">instruction</span> = <span class="i">vm</span>.<span class="i">ip</span> - <span class="i">vm</span>.<span class="i">chunk</span>-&gt;<span class="i">code</span> - <span class="n">1</span>;
  <span class="t">int</span> <span class="i">line</span> = <span class="i">vm</span>.<span class="i">chunk</span>-&gt;<span class="i">lines</span>[<span class="i">instruction</span>];
  <span class="i">fprintf</span>(<span class="i">stderr</span>, <span class="s">&quot;[line %d] in script</span><span class="e">\n</span><span class="s">&quot;</span>, <span class="i">line</span>);
  <span class="i">resetStack</span>();
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>resetStack</em>()</div>

<p>You&rsquo;ve certainly <em>called</em> variadic functions<span class="em">&mdash;</span>ones that take a varying number
of arguments<span class="em">&mdash;</span>in C before: <code>printf()</code> is one. But you may not have <em>defined</em>
your own. This book isn&rsquo;t a C <span name="tutorial">tutorial</span>, so I&rsquo;ll
skim over it here, but basically the <code>...</code> and <code>va_list</code> stuff let us pass an
arbitrary number of arguments to <code>runtimeError()</code>. It forwards those on to
<code>vfprintf()</code>, which is the flavor of <code>printf()</code> that takes an explicit
<code>va_list</code>.</p>
<aside name="tutorial">
<p>If you are looking for a C tutorial, I love <em><a href="https://www.cs.princeton.edu/~bwk/cbook.html">The C Programming Language</a></em>,
usually called &ldquo;K&amp;R&rdquo; in honor of its authors. It&rsquo;s not entirely up to date, but
the quality of the writing more than makes up for it.</p>
</aside>
<p>Callers can pass a format string to <code>runtimeError()</code> followed by a number of
arguments, just like they can when calling <code>printf()</code> directly. <code>runtimeError()</code>
then formats and prints those arguments. We won&rsquo;t take advantage of that in this
chapter, but later chapters will produce formatted runtime error messages that
contain other data.</p>
<p>After we show the hopefully helpful error message, we tell the user which <span
name="stack">line</span> of their code was being executed when the error
occurred. Since we left the tokens behind in the compiler, we look up the line
in the debug information compiled into the chunk. If our compiler did its job
right, that corresponds to the line of source code that the bytecode was
compiled from.</p>
<p>We look into the chunk&rsquo;s debug line array using the current bytecode instruction
index <em>minus one</em>. That&rsquo;s because the interpreter advances past each instruction
before executing it. So, at the point that we call <code>runtimeError()</code>, the failed
instruction is the previous one.</p>
<aside name="stack">
<p>Just showing the immediate line where the error occurred doesn&rsquo;t provide much
context. Better would be a full stack trace. But we don&rsquo;t even have functions to
call yet, so there is no call stack to trace.</p>
</aside>
<p>In order to use <code>va_list</code> and the macros for working with it, we need to bring
in a standard header.</p>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add to top of file</div>
<pre class="insert"><span class="a">#include &lt;stdarg.h&gt;</span>
</pre><pre class="insert-after">#include &lt;stdio.h&gt;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add to top of file</div>

<p>With this, our VM can not only do the right thing when we negate numbers (like
it used to before we broke it), but it also gracefully handles erroneous
attempts to negate other types (which we don&rsquo;t have yet, but still).</p>
<h3><a href="#binary-arithmetic-operators" id="binary-arithmetic-operators"><small>18&#8202;.&#8202;3&#8202;.&#8202;2</small>Binary arithmetic operators</a></h3>
<p>We have our runtime error machinery in place now, so fixing the binary operators
is easier even though they&rsquo;re more complex. We support four binary operators
today: <code>+</code>, <code>-</code>, <code>*</code>, and <code>/</code>. The only difference between them is which
underlying C operator they use. To minimize redundant code between the four
operators, we wrapped up the commonality in a big preprocessor macro that takes
the operator token as a parameter.</p>
<p>That macro seemed like overkill a <a href="a-virtual-machine.html#binary-operators">few chapters ago</a>, but we get the benefit
from it today. It lets us add the necessary type checking and conversions in one
place.</p>
<div class="codehilite"><pre class="insert-before">#define READ_CONSTANT() (vm.chunk-&gt;constants.values[READ_BYTE()])
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 6 lines</div>
<pre class="insert"><span class="a">#define BINARY_OP(valueType, op) \</span>
<span class="a">    do { \</span>
<span class="a">      if (!IS_NUMBER(peek(0)) || !IS_NUMBER(peek(1))) { \</span>
<span class="a">        runtimeError(&quot;Operands must be numbers.&quot;); \</span>
<span class="a">        return INTERPRET_RUNTIME_ERROR; \</span>
<span class="a">      } \</span>
<span class="a">      double b = AS_NUMBER(pop()); \</span>
<span class="a">      double a = AS_NUMBER(pop()); \</span>
<span class="a">      push(valueType(a op b)); \</span>
<span class="a">    } while (false)</span>
</pre><pre class="insert-after">

  for (;;) {
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 6 lines</div>

<p>Yeah, I realize that&rsquo;s a monster of a macro. It&rsquo;s not what I&rsquo;d normally consider
good C practice, but let&rsquo;s roll with it. The changes are similar to what we did
for unary negate. First, we check that the two operands are both numbers. If
either isn&rsquo;t, we report a runtime error and yank the ejection seat lever.</p>
<p>If the operands are fine, we pop them both and unwrap them. Then we apply the
given operator, wrap the result, and push it back on the stack. Note that we
don&rsquo;t wrap the result by directly using <code>NUMBER_VAL()</code>. Instead, the wrapper to
use is passed in as a macro <span name="macro">parameter</span>. For our
existing arithmetic operators, the result is a number, so we pass in the
<code>NUMBER_VAL</code> macro.</p>
<aside name="macro">
<p>Did you know you can pass macros as parameters to macros? Now you do!</p>
</aside>
<div class="codehilite"><pre class="insert-before">      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()<br>
replace 4 lines</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_ADD</span>:      <span class="a">BINARY_OP</span>(<span class="a">NUMBER_VAL</span>, +); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_SUBTRACT</span>: <span class="a">BINARY_OP</span>(<span class="a">NUMBER_VAL</span>, -); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_MULTIPLY</span>: <span class="a">BINARY_OP</span>(<span class="a">NUMBER_VAL</span>, *); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_DIVIDE</span>:   <span class="a">BINARY_OP</span>(<span class="a">NUMBER_VAL</span>, /); <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_NEGATE:
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>(), replace 4 lines</div>

<p>Soon, I&rsquo;ll show you why we made the wrapping macro an argument.</p>
<h2><a href="#two-new-types" id="two-new-types"><small>18&#8202;.&#8202;4</small>Two New Types</a></h2>
<p>All of our existing clox code is back in working order. Finally, it&rsquo;s time to
add some new types. We&rsquo;ve got a running numeric calculator that now does a
number of pointless paranoid runtime type checks. We can represent other types
internally, but there&rsquo;s no way for a user&rsquo;s program to ever create a Value of
one of those types.</p>
<p>Not until now, that is. We&rsquo;ll start by adding compiler support for the three new
literals: <code>true</code>, <code>false</code>, and <code>nil</code>. They&rsquo;re all pretty simple, so we&rsquo;ll do all
three in a single batch.</p>
<p>With number literals, we had to deal with the fact that there are billions of
possible numeric values. We attended to that by storing the literal&rsquo;s value in
the chunk&rsquo;s constant table and emitting a bytecode instruction that simply
loaded that constant. We could do the same thing for the new types. We&rsquo;d store,
say, <code>true</code>, in the constant table, and use an <code>OP_CONSTANT</code> to read it out.</p>
<p>But given that there are literally (heh) only three possible values we need to
worry about with these new types, it&rsquo;s gratuitous<span class="em">&mdash;</span>and <span
name="small">slow!</span><span class="em">&mdash;</span>to waste a two-byte instruction and a constant
table entry on them. Instead, we&rsquo;ll define three dedicated instructions to push
each of these literals on the stack.</p>
<aside name="small" class="bottom">
<p>I&rsquo;m not kidding about dedicated operations for certain constant values being
faster. A bytecode VM spends much of its execution time reading and decoding
instructions. The fewer, simpler instructions you need for a given piece of
behavior, the faster it goes. Short instructions dedicated to common operations
are a classic optimization.</p>
<p>For example, the Java bytecode instruction set has dedicated instructions for
loading 0.0, 1.0, 2.0, and the integer values from -1 through 5. (This ends up
being a vestigial optimization given that most mature JVMs now JIT-compile the
bytecode to machine code before execution anyway.)</p>
</aside>
<div class="codehilite"><pre class="insert-before">  OP_CONSTANT,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_NIL</span>,
  <span class="a">OP_TRUE</span>,
  <span class="a">OP_FALSE</span>,
</pre><pre class="insert-after">  OP_ADD,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>Our scanner already treats <code>true</code>, <code>false</code>, and <code>nil</code> as keywords, so we can
skip right to the parser. With our table-based Pratt parser, we just need to
slot parser functions into the rows associated with those keyword token types.
We&rsquo;ll use the same function in all three slots. Here:</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_ELSE]          = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_FALSE</span>]         = {<span class="i">literal</span>,  <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_FOR]           = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>Here:</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_THIS]          = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_TRUE</span>]          = {<span class="i">literal</span>,  <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_VAR]           = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>And here:</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_IF]            = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_NIL</span>]           = {<span class="i">literal</span>,  <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_OR]            = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>When the parser encounters <code>false</code>, <code>nil</code>, or <code>true</code>, in prefix position, it
calls this new parser function:</p>
<div class="codehilite"><div class="source-file"><em>compiler.c</em><br>
add after <em>binary</em>()</div>
<pre><span class="k">static</span> <span class="t">void</span> <span class="i">literal</span>() {
  <span class="k">switch</span> (<span class="i">parser</span>.<span class="i">previous</span>.<span class="i">type</span>) {
    <span class="k">case</span> <span class="a">TOKEN_FALSE</span>: <span class="i">emitByte</span>(<span class="a">OP_FALSE</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_NIL</span>: <span class="i">emitByte</span>(<span class="a">OP_NIL</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_TRUE</span>: <span class="i">emitByte</span>(<span class="a">OP_TRUE</span>); <span class="k">break</span>;
    <span class="k">default</span>: <span class="k">return</span>; <span class="c">// Unreachable.</span>
  }
}
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, add after <em>binary</em>()</div>

<p>Since <code>parsePrecedence()</code> has already consumed the keyword token, all we need to
do is output the proper instruction. We <span name="switch">figure</span> that
out based on the type of token we parsed. Our front end can now compile Boolean
and nil literals to bytecode. Moving down the execution pipeline, we reach the
interpreter.</p>
<aside name="switch">
<p>We could have used separate parser functions for each literal and saved
ourselves a switch but that felt needlessly verbose to me. I think it&rsquo;s mostly a
matter of taste.</p>
</aside>
<div class="codehilite"><pre class="insert-before">      case OP_CONSTANT: {
        Value constant = READ_CONSTANT();
        push(constant);
        break;
      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_NIL</span>: <span class="i">push</span>(<span class="a">NIL_VAL</span>); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_TRUE</span>: <span class="i">push</span>(<span class="a">BOOL_VAL</span>(<span class="k">true</span>)); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_FALSE</span>: <span class="i">push</span>(<span class="a">BOOL_VAL</span>(<span class="k">false</span>)); <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_ADD:      BINARY_OP(NUMBER_VAL, +); break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>This is pretty self-explanatory. Each instruction summons the appropriate value
and pushes it onto the stack. We shouldn&rsquo;t forget our disassembler either.</p>
<div class="codehilite"><pre class="insert-before">    case OP_CONSTANT:
      return constantInstruction(&quot;OP_CONSTANT&quot;, chunk, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_NIL</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_NIL&quot;</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_TRUE</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_TRUE&quot;</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_FALSE</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_FALSE&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_ADD:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>With this in place, we can run this Earth-shattering program:</p>
<div class="codehilite"><pre><span class="k">true</span>
</pre></div>
<p>Except that when the interpreter tries to print the result, it blows up. We need
to extend <code>printValue()</code> to handle the new types too:</p>
<div class="codehilite"><pre class="insert-before">void printValue(Value value) {
</pre><div class="source-file"><em>value.c</em><br>
in <em>printValue</em>()<br>
replace 1 line</div>
<pre class="insert">  <span class="k">switch</span> (<span class="i">value</span>.<span class="i">type</span>) {
    <span class="k">case</span> <span class="a">VAL_BOOL</span>:
      <span class="i">printf</span>(<span class="a">AS_BOOL</span>(<span class="i">value</span>) ? <span class="s">&quot;true&quot;</span> : <span class="s">&quot;false&quot;</span>);
      <span class="k">break</span>;
    <span class="k">case</span> <span class="a">VAL_NIL</span>: <span class="i">printf</span>(<span class="s">&quot;nil&quot;</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">VAL_NUMBER</span>: <span class="i">printf</span>(<span class="s">&quot;%g&quot;</span>, <span class="a">AS_NUMBER</span>(<span class="i">value</span>)); <span class="k">break</span>;
  }
</pre><pre class="insert-after">}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, in <em>printValue</em>(), replace 1 line</div>

<p>There we go! Now we have some new types. They just aren&rsquo;t very useful yet. Aside
from the literals, you can&rsquo;t really <em>do</em> anything with them. It will be a while
before <code>nil</code> comes into play, but we can start putting Booleans to work in the
logical operators.</p>
<h3><a href="#logical-not-and-falsiness" id="logical-not-and-falsiness"><small>18&#8202;.&#8202;4&#8202;.&#8202;1</small>Logical not and falsiness</a></h3>
<p>The simplest logical operator is our old exclamatory friend unary not.</p>
<div class="codehilite"><pre><span class="k">print</span> !<span class="k">true</span>; <span class="c">// &quot;false&quot;</span>
</pre></div>
<p>This new operation gets a new instruction.</p>
<div class="codehilite"><pre class="insert-before">  OP_DIVIDE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_NOT</span>,
</pre><pre class="insert-after">  OP_NEGATE,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>We can reuse the <code>unary()</code> parser function we wrote for unary negation to
compile a not expression. We just need to slot it into the parsing table.</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_STAR]          = {NULL,     binary, PREC_FACTOR},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_BANG</span>]          = {<span class="i">unary</span>,    <span class="a">NULL</span>,   <span class="a">PREC_NONE</span>},
</pre><pre class="insert-after">  [TOKEN_BANG_EQUAL]    = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>Because I knew we were going to do this, the <code>unary()</code> function already has a
switch on the token type to figure out which bytecode instruction to output. We
merely add another case.</p>
<div class="codehilite"><pre class="insert-before">  switch (operatorType) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>unary</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">TOKEN_BANG</span>: <span class="i">emitByte</span>(<span class="a">OP_NOT</span>); <span class="k">break</span>;
</pre><pre class="insert-after">    case TOKEN_MINUS: emitByte(OP_NEGATE); break;
    default: return; // Unreachable.
  }
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>unary</em>()</div>

<p>That&rsquo;s it for the front end. Let&rsquo;s head over to the VM and conjure this
instruction into life.</p>
<div class="codehilite"><pre class="insert-before">      case OP_DIVIDE:   BINARY_OP(NUMBER_VAL, /); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_NOT</span>:
        <span class="i">push</span>(<span class="a">BOOL_VAL</span>(<span class="i">isFalsey</span>(<span class="i">pop</span>())));
        <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_NEGATE:
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>Like our previous unary operator, it pops the one operand, performs the
operation, and pushes the result. And, as we did there, we have to worry about
dynamic typing. Taking the logical not of <code>true</code> is easy, but there&rsquo;s nothing
preventing an unruly programmer from writing something like this:</p>
<div class="codehilite"><pre><span class="k">print</span> !<span class="k">nil</span>;
</pre></div>
<p>For unary minus, we made it an error to negate anything that isn&rsquo;t a <span
name="negate">number</span>. But Lox, like most scripting languages, is more
permissive when it comes to <code>!</code> and other contexts where a Boolean is expected.
The rule for how other types are handled is called &ldquo;falsiness&rdquo;, and we implement
it here:</p>
<aside name="negate">
<p>Now I can&rsquo;t help but try to figure out what it would mean to negate other types
of values. <code>nil</code> is probably its own negation, sort of like a weird pseudo-zero.
Negating a string could, uh, reverse it?</p>
</aside>
<div class="codehilite"><div class="source-file"><em>vm.c</em><br>
add after <em>peek</em>()</div>
<pre><span class="k">static</span> <span class="t">bool</span> <span class="i">isFalsey</span>(<span class="t">Value</span> <span class="i">value</span>) {
  <span class="k">return</span> <span class="a">IS_NIL</span>(<span class="i">value</span>) || (<span class="a">IS_BOOL</span>(<span class="i">value</span>) &amp;&amp; !<span class="a">AS_BOOL</span>(<span class="i">value</span>));
}
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, add after <em>peek</em>()</div>

<p>Lox follows Ruby in that <code>nil</code> and <code>false</code> are falsey and every other value
behaves like <code>true</code>. We&rsquo;ve got a new instruction we can generate, so we also
need to be able to <em>un</em>generate it in the disassembler.</p>
<div class="codehilite"><pre class="insert-before">    case OP_DIVIDE:
      return simpleInstruction(&quot;OP_DIVIDE&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_NOT</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_NOT&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_NEGATE:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<h3><a href="#equality-and-comparison-operators" id="equality-and-comparison-operators"><small>18&#8202;.&#8202;4&#8202;.&#8202;2</small>Equality and comparison operators</a></h3>
<p>That wasn&rsquo;t too bad. Let&rsquo;s keep the momentum going and knock out the equality
and comparison operators too: <code>==</code>, <code>!=</code>, <code>&lt;</code>, <code>&gt;</code>, <code>&lt;=</code>, and <code>&gt;=</code>. That covers
all of the operators that return Boolean results except the logical operators
<code>and</code> and <code>or</code>. Since those need to short-circuit (basically do a little
control flow) we aren&rsquo;t ready for them yet.</p>
<p>Here are the new instructions for those operators:</p>
<div class="codehilite"><pre class="insert-before">  OP_FALSE,
</pre><div class="source-file"><em>chunk.h</em><br>
in enum <em>OpCode</em></div>
<pre class="insert">  <span class="a">OP_EQUAL</span>,
  <span class="a">OP_GREATER</span>,
  <span class="a">OP_LESS</span>,
</pre><pre class="insert-after">  OP_ADD,
</pre></div>
<div class="source-file-narrow"><em>chunk.h</em>, in enum <em>OpCode</em></div>

<p>Wait, only three? What about <code>!=</code>, <code>&lt;=</code>, and <code>&gt;=</code>? We could create instructions
for those too. Honestly, the VM would execute faster if we did, so we <em>should</em>
do that if the goal is performance.</p>
<p>But my main goal is to teach you about bytecode compilers. I want you to start
internalizing the idea that the bytecode instructions don&rsquo;t need to closely
follow the user&rsquo;s source code. The VM has total freedom to use whatever
instruction set and code sequences it wants as long as they have the right
user-visible behavior.</p>
<p>The expression <code>a != b</code> has the same semantics as <code>!(a == b)</code>, so the compiler
is free to compile the former as if it were the latter. Instead of a dedicated
<code>OP_NOT_EQUAL</code> instruction, it can output an <code>OP_EQUAL</code> followed by an <code>OP_NOT</code>.
Likewise, <code>a &lt;= b</code> is the <span name="same">same</span> as <code>!(a &gt; b)</code> and <code>a &gt;= b</code> is <code>!(a &lt; b)</code>. Thus, we only need three new instructions.</p>
<aside name="same" class="bottom">
<p><em>Is</em> <code>a &lt;= b</code> always the same as <code>!(a &gt; b)</code>? According to <a href="https://en.wikipedia.org/wiki/IEEE_754">IEEE 754</a>, all
comparison operators return false when an operand is NaN. That means <code>NaN &lt;= 1</code>
is false and <code>NaN &gt; 1</code> is also false. But our desugaring assumes the latter is
always the negation of the former.</p>
<p>For the book, we won&rsquo;t get hung up on this, but these kinds of details will
matter in your real language implementations.</p>
</aside>
<p>Over in the parser, though, we do have six new operators to slot into the parse
table. We use the same <code>binary()</code> parser function from before. Here&rsquo;s the row
for <code>!=</code>:</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_BANG]          = {unary,    NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 1 line</div>
<pre class="insert">  [<span class="a">TOKEN_BANG_EQUAL</span>]    = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_EQUALITY</span>},
</pre><pre class="insert-after">  [TOKEN_EQUAL]         = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 1 line</div>

<p>The remaining five operators are a little farther down in the table.</p>
<div class="codehilite"><pre class="insert-before">  [TOKEN_EQUAL]         = {NULL,     NULL,   PREC_NONE},
</pre><div class="source-file"><em>compiler.c</em><br>
replace 5 lines</div>
<pre class="insert">  [<span class="a">TOKEN_EQUAL_EQUAL</span>]   = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_EQUALITY</span>},
  [<span class="a">TOKEN_GREATER</span>]       = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_COMPARISON</span>},
  [<span class="a">TOKEN_GREATER_EQUAL</span>] = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_COMPARISON</span>},
  [<span class="a">TOKEN_LESS</span>]          = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_COMPARISON</span>},
  [<span class="a">TOKEN_LESS_EQUAL</span>]    = {<span class="a">NULL</span>,     <span class="i">binary</span>, <span class="a">PREC_COMPARISON</span>},
</pre><pre class="insert-after">  [TOKEN_IDENTIFIER]    = {NULL,     NULL,   PREC_NONE},
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, replace 5 lines</div>

<p>Inside <code>binary()</code> we already have a switch to generate the right bytecode for
each token type. We add cases for the six new operators.</p>
<div class="codehilite"><pre class="insert-before">  switch (operatorType) {
</pre><div class="source-file"><em>compiler.c</em><br>
in <em>binary</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">TOKEN_BANG_EQUAL</span>:    <span class="i">emitBytes</span>(<span class="a">OP_EQUAL</span>, <span class="a">OP_NOT</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_EQUAL_EQUAL</span>:   <span class="i">emitByte</span>(<span class="a">OP_EQUAL</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_GREATER</span>:       <span class="i">emitByte</span>(<span class="a">OP_GREATER</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_GREATER_EQUAL</span>: <span class="i">emitBytes</span>(<span class="a">OP_LESS</span>, <span class="a">OP_NOT</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_LESS</span>:          <span class="i">emitByte</span>(<span class="a">OP_LESS</span>); <span class="k">break</span>;
    <span class="k">case</span> <span class="a">TOKEN_LESS_EQUAL</span>:    <span class="i">emitBytes</span>(<span class="a">OP_GREATER</span>, <span class="a">OP_NOT</span>); <span class="k">break</span>;
</pre><pre class="insert-after">    case TOKEN_PLUS:          emitByte(OP_ADD); break;
</pre></div>
<div class="source-file-narrow"><em>compiler.c</em>, in <em>binary</em>()</div>

<p>The <code>==</code>, <code>&lt;</code>, and <code>&gt;</code> operators output a single instruction. The others output
a pair of instructions, one to evalute the inverse operation, and then an
<code>OP_NOT</code> to flip the result. Six operators for the price of three instructions!</p>
<p>That means over in the VM, our job is simpler. Equality is the most general
operation.</p>
<div class="codehilite"><pre class="insert-before">      case OP_FALSE: push(BOOL_VAL(false)); break;
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_EQUAL</span>: {
        <span class="t">Value</span> <span class="i">b</span> = <span class="i">pop</span>();
        <span class="t">Value</span> <span class="i">a</span> = <span class="i">pop</span>();
        <span class="i">push</span>(<span class="a">BOOL_VAL</span>(<span class="i">valuesEqual</span>(<span class="i">a</span>, <span class="i">b</span>)));
        <span class="k">break</span>;
      }
</pre><pre class="insert-after">      case OP_ADD:      BINARY_OP(NUMBER_VAL, +); break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>You can evaluate <code>==</code> on any pair of objects, even objects of different types.
There&rsquo;s enough complexity that it makes sense to shunt that logic over to a
separate function. That function always returns a C <code>bool</code>, so we can safely
wrap the result in a <code>BOOL_VAL</code>. The function relates to Values, so it lives
over in the &ldquo;value&rdquo; module.</p>
<div class="codehilite"><pre class="insert-before">} ValueArray;

</pre><div class="source-file"><em>value.h</em><br>
add after struct <em>ValueArray</em></div>
<pre class="insert"><span class="t">bool</span> <span class="i">valuesEqual</span>(<span class="t">Value</span> <span class="i">a</span>, <span class="t">Value</span> <span class="i">b</span>);
</pre><pre class="insert-after">void initValueArray(ValueArray* array);
</pre></div>
<div class="source-file-narrow"><em>value.h</em>, add after struct <em>ValueArray</em></div>

<p>And here&rsquo;s the implementation:</p>
<div class="codehilite"><div class="source-file"><em>value.c</em><br>
add after <em>printValue</em>()</div>
<pre><span class="t">bool</span> <span class="i">valuesEqual</span>(<span class="t">Value</span> <span class="i">a</span>, <span class="t">Value</span> <span class="i">b</span>) {
  <span class="k">if</span> (<span class="i">a</span>.<span class="i">type</span> != <span class="i">b</span>.<span class="i">type</span>) <span class="k">return</span> <span class="k">false</span>;
  <span class="k">switch</span> (<span class="i">a</span>.<span class="i">type</span>) {
    <span class="k">case</span> <span class="a">VAL_BOOL</span>:   <span class="k">return</span> <span class="a">AS_BOOL</span>(<span class="i">a</span>) == <span class="a">AS_BOOL</span>(<span class="i">b</span>);
    <span class="k">case</span> <span class="a">VAL_NIL</span>:    <span class="k">return</span> <span class="k">true</span>;
    <span class="k">case</span> <span class="a">VAL_NUMBER</span>: <span class="k">return</span> <span class="a">AS_NUMBER</span>(<span class="i">a</span>) == <span class="a">AS_NUMBER</span>(<span class="i">b</span>);
    <span class="k">default</span>:         <span class="k">return</span> <span class="k">false</span>; <span class="c">// Unreachable.</span>
  }
}
</pre></div>
<div class="source-file-narrow"><em>value.c</em>, add after <em>printValue</em>()</div>

<p>First, we check the types. If the Values have <span
name="equal">different</span> types, they are definitely not equal. Otherwise,
we unwrap the two Values and compare them directly.</p>
<aside name="equal">
<p>Some languages have &ldquo;implicit conversions&rdquo; where values of different types may
be considered equal if one can be converted to the other&rsquo;s type. For example,
the number 0 is equivalent to the string &ldquo;0&rdquo; in JavaScript. This looseness was a
large enough source of pain that JS added a separate &ldquo;strict equality&rdquo; operator,
<code>===</code>.</p>
<p>PHP considers the strings &ldquo;1&rdquo; and &ldquo;01&rdquo; to be equivalent because both can be
converted to equivalent numbers, though the ultimate reason is because PHP was
designed by a Lovecraftian eldritch god to destroy the mind.</p>
<p>Most dynamically typed languages that have separate integer and floating-point
number types consider values of different number types equal if the numeric
values are the same (so, say, 1.0 is equal to 1), though even that seemingly
innocuous convenience can bite the unwary.</p>
</aside>
<p>For each value type, we have a separate case that handles comparing the value
itself. Given how similar the cases are, you might wonder why we can&rsquo;t simply
<code>memcmp()</code> the two Value structs and be done with it. The problem is that
because of padding and different-sized union fields, a Value contains unused
bits. C gives no guarantee about what is in those, so it&rsquo;s possible that two
equal Values actually differ in memory that isn&rsquo;t used.</p><img src="image/types-of-values/memcmp.png" alt="The memory respresentations of two equal values that differ in unused bytes." />
<p>(You wouldn&rsquo;t believe how much pain I went through before learning this fact.)</p>
<p>Anyway, as we add more types to clox, this function will grow new cases. For
now, these three are sufficient. The other comparison operators are easier since
they work only on numbers.</p>
<div class="codehilite"><pre class="insert-before">        push(BOOL_VAL(valuesEqual(a, b)));
        break;
      }
</pre><div class="source-file"><em>vm.c</em><br>
in <em>run</em>()</div>
<pre class="insert">      <span class="k">case</span> <span class="a">OP_GREATER</span>:  <span class="a">BINARY_OP</span>(<span class="a">BOOL_VAL</span>, &gt;); <span class="k">break</span>;
      <span class="k">case</span> <span class="a">OP_LESS</span>:     <span class="a">BINARY_OP</span>(<span class="a">BOOL_VAL</span>, &lt;); <span class="k">break</span>;
</pre><pre class="insert-after">      case OP_ADD:      BINARY_OP(NUMBER_VAL, +); break;
</pre></div>
<div class="source-file-narrow"><em>vm.c</em>, in <em>run</em>()</div>

<p>We already extended the <code>BINARY_OP</code> macro to handle operators that return
non-numeric types. Now we get to use that. We pass in <code>BOOL_VAL</code> since the
result value type is Boolean. Otherwise, it&rsquo;s no different from plus or minus.</p>
<p>As always, the coda to today&rsquo;s aria is disassembling the new instructions.</p>
<div class="codehilite"><pre class="insert-before">    case OP_FALSE:
      return simpleInstruction(&quot;OP_FALSE&quot;, offset);
</pre><div class="source-file"><em>debug.c</em><br>
in <em>disassembleInstruction</em>()</div>
<pre class="insert">    <span class="k">case</span> <span class="a">OP_EQUAL</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_EQUAL&quot;</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_GREATER</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_GREATER&quot;</span>, <span class="i">offset</span>);
    <span class="k">case</span> <span class="a">OP_LESS</span>:
      <span class="k">return</span> <span class="i">simpleInstruction</span>(<span class="s">&quot;OP_LESS&quot;</span>, <span class="i">offset</span>);
</pre><pre class="insert-after">    case OP_ADD:
</pre></div>
<div class="source-file-narrow"><em>debug.c</em>, in <em>disassembleInstruction</em>()</div>

<p>With that, our numeric calculator has become something closer to a general
expression evaluator. Fire up clox and type in:</p>
<div class="codehilite"><pre>!(<span class="n">5</span> - <span class="n">4</span> &gt; <span class="n">3</span> * <span class="n">2</span> == !<span class="k">nil</span>)
</pre></div>
<p>OK, I&rsquo;ll admit that&rsquo;s maybe not the most <em>useful</em> expression, but we&rsquo;re making
progress. We have one missing built-in type with its own literal form: strings.
Those are much more complex because strings can vary in size. That tiny
difference turns out to have implications so large that we give strings <a href="strings.html">their
very own chapter</a>.</p>
<div class="challenges">
<h2><a href="#challenges" id="challenges">Challenges</a></h2>
<ol>
<li>
<p>We could reduce our binary operators even further than we did here. Which
other instructions can you eliminate, and how would the compiler cope with
their absence?</p>
</li>
<li>
<p>Conversely, we can improve the speed of our bytecode VM by adding more
specific instructions that correspond to higher-level operations. What
instructions would you define to speed up the kind of user code we added
support for in this chapter?</p>
</li>
</ol>
</div>

<footer>
<a href="strings.html" class="next">
  Next Chapter: &ldquo;Strings&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: site/welcome.html
================================================
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
<title>Welcome &middot; Crafting Interpreters</title>

<!-- Tell mobile browsers we're optimized for them and they don't need to crop
     the viewport. -->
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" type="text/css" href="style.css" />

<!-- Oh, God, Source Code Pro is so beautiful it makes me want to cry. -->
<link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400|Source+Sans+Pro:300,400,600' rel='stylesheet' type='text/css'>

<link rel="icon" type="image/png" href="image/favicon.png" />
<script src="jquery-3.4.1.min.js"></script>
<script src="script.js"></script>

<!-- Google analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-42804721-2', 'auto');
  ga('send', 'pageview');
</script>

</head>
<body id="top">

<!-- <div class="scrim"></div> -->
<nav class="wide">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="contents">
<h2><small>I</small>Welcome</h2>

<ul>
    <li><a href="introduction.html"><small>1</small>Introduction</a></li>
    <li><a href="a-map-of-the-territory.html"><small>2</small>A Map of the Territory</a></li>
    <li><a href="the-lox-language.html"><small>3</small>The Lox Language</a></li>
</ul>


<div class="prev-next">
    <a href="contents.html" title="Table of Contents" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="introduction.html" title="Introduction" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
</nav>

<nav class="narrow">
<a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
<a href="contents.html" title="Table of Contents" class="prev">←</a>
<a href="introduction.html" title="Introduction" class="next">→</a>
</nav>

<div class="page">
<div class="nav-wrapper">
<nav class="floating">
  <a href="/"><img src="image/logotype.png" title="Crafting Interpreters"></a>
  <div class="expandable">
<h2><small>I</small>Welcome</h2>

<ul>
    <li><a href="introduction.html"><small>1</small>Introduction</a></li>
    <li><a href="a-map-of-the-territory.html"><small>2</small>A Map of the Territory</a></li>
    <li><a href="the-lox-language.html"><small>3</small>The Lox Language</a></li>
</ul>


<div class="prev-next">
    <a href="contents.html" title="Table of Contents" class="left">&larr;&nbsp;Previous</a>
    <a href="contents.html" title="Table of Contents">&uarr;&nbsp;Up</a>
    <a href="introduction.html" title="Introduction" class="right">Next&nbsp;&rarr;</a>
</div>  </div>
  <a id="expand-nav">≡</a>
</nav>
</div>

<article class="chapter">

  <div class="number">I</div>
  <h1 class="part">Welcome</h1>

<p>This may be the beginning of a grand adventure. Programming languages encompass
a huge space to explore and play in. Plenty of room for your own creations to
share with others or just enjoy yourself. Brilliant computer scientists and
software engineers have spent entire careers traversing this land without ever
reaching the end. If this book is your first entry into the country, welcome.</p>
<p>The pages of this book give you a guided tour through some of the world of
languages. But before we strap on our hiking boots and venture out, we should
familiarize ourselves with the territory. The chapters in this part introduce
you to the basic concepts used by programming languages and how those concepts
are organized.</p>
<p>We will also get acquainted with Lox, the language we&rsquo;ll spend the rest of the
book implementing (twice).</p>

<footer>
<a href="introduction.html" class="next">
  Next Chapter: &ldquo;Introduction&rdquo; &rarr;
</a>
Handcrafted by Robert Nystrom&ensp;&mdash;&ensp;<a href="https://github.com/munificent/craftinginterpreters/blob/master/LICENSE" target="_blank">&copy; 2015&hairsp;&ndash;&hairsp;2021</a>
</footer>
</article>

</div>
</body>
</html>


================================================
FILE: test/assignment/associativity.lox
================================================
var a = "a";
var b = "b";
var c = "c";

// Assignment is right-associative.
a = b = c;
print a; // expect: c
print b; // expect: c
print c; // expect: c


================================================
FILE: test/assignment/global.lox
================================================
var a = "before";
print a; // expect: before

a = "after";
print a; // expect: after

print a = "arg"; // expect: arg
print a; // expect: arg


================================================
FILE: test/assignment/grouping.lox
================================================
var a = "a";
(a) = "value"; // Error at '=': Invalid assignment target.


================================================
FILE: test/assignment/infix_operator.lox
================================================
var a = "a";
var b = "b";
a + b = "value"; // Error at '=': Invalid assignment target.


================================================
FILE: test/assignment/local.lox
================================================
{
  var a = "before";
  print a; // expect: before

  a = "after";
  print a; // expect: after

  print a = "arg"; // expect: arg
  print a; // expect: arg
}


================================================
FILE: test/assignment/prefix_operator.lox
================================================
var a = "a";
!a = "value"; // Error at '=': Invalid assignment target.


================================================
FILE: test/assignment/syntax.lox
================================================
// Assignment on RHS of variable.
var a = "before";
var c = a = "var";
print a; // expect: var
print c; // expect: var


================================================
FILE: test/assignment/to_this.lox
================================================
class Foo {
  Foo() {
    this = "value"; // Error at '=': Invalid assignment target.
  }
}

Foo();


================================================
FILE: test/assignment/undefined.lox
================================================
unknown = "what"; // expect runtime error: Undefined variable 'unknown'.


================================================
FILE: test/benchmark/binary_trees.lox
================================================
class Tree {
  init(item, depth) {
    this.item = item;
    this.depth = depth;
    if (depth > 0) {
      var item2 = item + item;
      depth = depth - 1;
      this.left = Tree(item2 - 1, depth);
      this.right = Tree(item2, depth);
    } else {
      this.left = nil;
      this.right = nil;
    }
  }

  check() {
    if (this.left == nil) {
      return this.item;
    }

    return this.item + this.left.check() - this.right.check();
  }
}

var minDepth = 4;
var maxDepth = 14;
var stretchDepth = maxDepth + 1;

var start = clock();

print "stretch tree of depth:";
print stretchDepth;
print "check:";
print Tree(0, stretchDepth).check();

var longLivedTree = Tree(0, maxDepth);

// iterations = 2 ** maxDepth
var iterations = 1;
var d = 0;
while (d < maxDepth) {
  iterations = iterations * 2;
  d = d + 1;
}

var depth = minDepth;
while (depth < stretchDepth) {
  var check = 0;
  var i = 1;
  while (i <= iterations) {
    check = check + Tree(i, depth).check() + Tree(-i, depth).check();
    i = i + 1;
  }

  print "num trees:";
  print iterations * 2;
  print "depth:";
  print depth;
  print "check:";
  print check;

  iterations = iterations / 4;
  depth = depth + 2;
}

print "long lived tree of depth:";
print maxDepth;
print "check:";
print longLivedTree.check();
print "elapsed:";
print clock() - start;


================================================
FILE: test/benchmark/equality.lox
================================================
var i = 0;

var loopStart = clock();

while (i < 10000000) {
  i = i + 1;

  1; 1; 1; 2; 1; nil; 1; "str"; 1; true;
  nil; nil; nil; 1; nil; "str"; nil; true;
  true; true; true; 1; true; false; true; "str"; true; nil;
  "str"; "str"; "str"; "stru"; "str"; 1; "str"; nil; "str"; true;
}

var loopTime = clock() - loopStart;

var start = clock();

i = 0;
while (i < 10000000) {
  i = i + 1;

  1 == 1; 1 == 2; 1 == nil; 1 == "str"; 1 == true;
  nil == nil; nil == 1; nil == "str"; nil == true;
  true == true; true == 1; true == false; true == "str"; true == nil;
  "str" == "str"; "str" == "stru"; "str" == 1; "str" == nil; "str" == true;
}

var elapsed = clock() - start;
print "loop";
print loopTime;
print "elapsed";
print elapsed;
print "equals";
print elapsed - loopTime;


================================================
FILE: test/benchmark/fib.lox
================================================
fun fib(n) {
  if (n < 2) return n;
  return fib(n - 2) + fib(n - 1);
}

var start = clock();
print fib(35) == 9227465;
print clock() - start;


================================================
FILE: test/benchmark/instantiation.lox
================================================
// This benchmark stresses instance creation and initializer calling.

class Foo {
  init() {}
}

var start = clock();
var i = 0;
while (i < 500000) {
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  Foo();
  i = i + 1;
}

print clock() - start;


================================================
FILE: test/benchmark/invocation.lox
================================================
// This benchmark stresses just method invocation.

class Foo {
  method0() {}
  method1() {}
  method2() {}
  method3() {}
  method4() {}
  method5() {}
  method6() {}
  method7() {}
  method8() {}
  method9() {}
  method10() {}
  method11() {}
  method12() {}
  method13() {}
  method14() {}
  method15() {}
  method16() {}
  method17() {}
  method18() {}
  method19() {}
  method20() {}
  method21() {}
  method22() {}
  method23() {}
  method24() {}
  method25() {}
  method26() {}
  method27() {}
  method28() {}
  method29() {}
}

var foo = Foo();
var start = clock();
var i = 0;
while (i < 500000) {
  foo.method0();
  foo.method1();
  foo.method2();
  foo.method3();
  foo.method4();
  foo.method5();
  foo.method6();
  foo.method7();
  foo.method8();
  foo.method9();
  foo.method10();
  foo.method11();
  foo.method12();
  foo.method13();
  foo.method14();
  foo.method15();
  foo.method16();
  foo.method17();
  foo.method18();
  foo.method19();
  foo.method20();
  foo.method21();
  foo.method22();
  foo.method23();
  foo.method24();
  foo.method25();
  foo.method26();
  foo.method27();
  foo.method28();
  foo.method29();
  i = i + 1;
}

print clock() - start;


================================================
FILE: test/benchmark/method_call.lox
================================================
class Toggle {
  init(startState) {
    this.state = startState;
  }

  value() { return this.state; }

  activate() {
    this.state = !this.state;
    return this;
  }
}

class NthToggle < Toggle {
  init(startState, maxCounter) {
    super.init(startState);
    this.countMax = maxCounter;
    this.count = 0;
  }

  activate() {
    this.count = this.count + 1;
    if (this.count >= this.countMax) {
      super.activate();
      this.count = 0;
    }

    return this;
  }
}

var start = clock();
var n = 100000;
var val = true;
var toggle = Toggle(val);

for (var i = 0; i < n; i = i + 1) {
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
  val = toggle.activate().value();
}

print toggle.value();

val = true;
var ntoggle = NthToggle(val, 3);

for (var i = 0; i < n; i = i + 1) {
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
  val = ntoggle.activate().value();
}

print ntoggle.value();
print clock() - start;


================================================
FILE: test/benchmark/properties.lox
================================================
// This benchmark stresses both field and method lookup.

class Foo {
  init() {
    this.field0 = 1;
    this.field1 = 1;
    this.field2 = 1;
    this.field3 = 1;
    this.field4 = 1;
    this.field5 = 1;
    this.field6 = 1;
    this.field7 = 1;
    this.field8 = 1;
    this.field9 = 1;
    this.field10 = 1;
    this.field11 = 1;
    this.field12 = 1;
    this.field13 = 1;
    this.field14 = 1;
    this.field15 = 1;
    this.field16 = 1;
    this.field17 = 1;
    this.field18 = 1;
    this.field19 = 1;
    this.field20 = 1;
    this.field21 = 1;
    this.field22 = 1;
    this.field23 = 1;
    this.field24 = 1;
    this.field25 = 1;
    this.field26 = 1;
    this.field27 = 1;
    this.field28 = 1;
    this.field29 = 1;
  }

  method0() { return this.field0; }
  method1() { return this.field1; }
  method2() { return this.field2; }
  method3() { return this.field3; }
  method4() { return this.field4; }
  method5() { return this.field5; }
  method6() { return this.field6; }
  method7() { return this.field7; }
  method8() { return this.field8; }
  method9() { return this.field9; }
  method10() { return this.field10; }
  method11() { return this.field11; }
  method12() { return this.field12; }
  method13() { return this.field13; }
  method14() { return this.field14; }
  method15() { return this.field15; }
  method16() { return this.field16; }
  method17() { return this.field17; }
  method18() { return this.field18; }
  method19() { return this.field19; }
  method20() { return this.field20; }
  method21() { return this.field21; }
  method22() { return this.field22; }
  method23() { return this.field23; }
  method24() { return this.field24; }
  method25() { return this.field25; }
  method26() { return this.field26; }
  method27() { return this.field27; }
  method28() { return this.field28; }
  method29() { return this.field29; }
}

var foo = Foo();
var start = clock();
var i = 0;
while (i < 500000) {
  foo.method0();
  foo.method1();
  foo.method2();
  foo.method3();
  foo.method4();
  foo.method5();
  foo.method6();
  foo.method7();
  foo.method8();
  foo.method9();
  foo.method10();
  foo.method11();
  foo.method12();
  foo.method13();
  foo.method14();
  foo.method15();
  foo.method16();
  foo.method17();
  foo.method18();
  foo.method19();
  foo.method20();
  foo.method21();
  foo.method22();
  foo.method23();
  foo.method24();
  foo.method25();
  foo.method26();
  foo.method27();
  foo.method28();
  foo.method29();
  i = i + 1;
}

print clock() - start;


================================================
FILE: test/benchmark/string_equality.lox
================================================
var a1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1";
var a2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa2";
var a3 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa3";
var a4 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4";
var a5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa5";
var a6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa6";
var a7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa7";
var a8 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa8";

var i = 0;

var loopStart = clock();

while (i < 100000) {
  i = i + 1;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;

  a1; a1; a1; a2; a1; a3; a1; a4; a1; a5; a1; a6; a1; a7; a1; a8;
  a2; a1; a2; a2; a2; a3; a2; a4; a2; a5; a2; a6; a2; a7; a2; a8;
  a3; a1; a3; a2; a3; a3; a3; a4; a3; a5; a3; a6; a3; a7; a3; a8;
  a4; a1; a4; a2; a4; a3; a4; a4; a4; a5; a4; a6; a4; a7; a4; a8;
  a5; a1; a5; a2; a5; a3; a5; a4; a5; a5; a5; a6; a5; a7; a5; a8;
  a6; a1; a6; a2; a6; a3; a6; a4; a6; a5; a6; a6; a6; a7; a6; a8;
  a7; a1; a7; a2; a7; a3; a7; a4; a7; a5; a7; a6; a7; a7; a7; a8;
  a8; a1; a8; a2; a8; a3; a8; a4; a8; a5; a8; a6; a8; a7; a8; a8;
}

var loopTime = clock() - loopStart;

var start = clock();

i = 0;
while (i < 100000) {
  i = i + 1;

  // 1 == 1; 1 == 2; 1 == nil; 1 == "str"; 1 == true;
  // nil == nil; nil == 1; nil == "str"; nil == true;
  // true == true; true == 1; true == false; true == "str"; true == nil;
  // "str" == "str"; "str" == "stru"; "str" == 1; "str" == nil; "str" == true;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

  a1 == a1; a1 == a2; a1 == a3; a1 == a4; a1 == a5; a1 == a6; a1 == a7; a1 == a8;
  a2 == a1; a2 == a2; a2 == a3; a2 == a4; a2 == a5; a2 == a6; a2 == a7; a2 == a8;
  a3 == a1; a3 == a2; a3 == a3; a3 == a4; a3 == a5; a3 == a6; a3 == a7; a3 == a8;
  a4 == a1; a4 == a2; a4 == a3; a4 == a4; a4 == a5; a4 == a6; a4 == a7; a4 == a8;
  a5 == a1; a5 == a2; a5 == a3; a5 == a4; a5 == a5; a5 == a6; a5 == a7; a5 == a8;
  a6 == a1; a6 == a2; a6 == a3; a6 == a4; a6 == a5; a6 == a6; a6 == a7; a6 == a8;
  a7 == a1; a7 == a2; a7 == a3; a7 == a4; a7 == a5; a7 == a6; a7 == a7; a7 == a8;
  a8 == a1; a8 == a2; a8 == a3; a8 == a4; a8 == a5; a8 == a6; a8 == a7; a8 == a8;

}

var elapsed = clock() - start;
print "loop";
print loopTime;
print "elapsed";
print elapsed;
print "equals";
print elapsed - loopTime;


================================================
FILE: test/benchmark/trees.lox
================================================
class Tree {
  init(depth) {
    this.depth = depth;
    if (depth > 0) {
      this.a = Tree(depth - 1);
      this.b = Tree(depth - 1);
      this.c = Tree(depth - 1);
      this.d = Tree(depth - 1);
      this.e = Tree(depth - 1);
    }
  }

  walk() {
    if (this.depth == 0) return 0;
    return this.depth 
        + this.a.walk()
        + this.b.walk()
        + this.c.walk()
        + this.d.walk()
        + this.e.walk();
  }
}

var tree = Tree(8);
var start = clock();
for (var i = 0; i < 100; i = i + 1) {
  if (tree.walk() != 122068) print "Error";
}
print clock() - start;


================================================
FILE: test/benchmark/zoo.lox
================================================
class Zoo {
  init() {
    this.aarvark  = 1;
    this.baboon   = 1;
    this.cat      = 1;
    this.donkey   = 1;
    this.elephant = 1;
    this.fox      = 1;
  }
  ant()    { return this.aarvark; }
  banana() { return this.baboon; }
  tuna()   { return this.cat; }
  hay()    { return this.donkey; }
  grass()  { return this.elephant; }
  mouse()  { return this.fox; }
}

var zoo = Zoo();
var sum = 0;
var start = clock();
while (sum < 10000000) {
  sum = sum + zoo.ant()
            + zoo.banana()
            + zoo.tuna()
            + zoo.hay()
            + zoo.grass()
            + zoo.mouse();
}

print sum;
print clock() - start;


================================================
FILE: test/benchmark/zoo_batch.lox
================================================
class Zoo {
  init() {
    this.aarvark  = 1;
    this.baboon   = 1;
    this.cat      = 1;
    this.donkey   = 1;
    this.elephant = 1;
    this.fox      = 1;
  }
  ant()    { return this.aarvark; }
  banana() { return this.baboon; }
  tuna()   { return this.cat; }
  hay()    { return this.donkey; }
  grass()  { return this.elephant; }
  mouse()  { return this.fox; }
}

var zoo = Zoo();
var sum = 0;
var start = clock();
var batch = 0;
while (clock() - start < 10) {
  for (var i = 0; i < 10000; i = i + 1) {
    sum = sum + zoo.ant()
              + zoo.banana()
              + zoo.tuna()
              + zoo.hay()
              + zoo.grass()
              + zoo.mouse();
  }
  batch = batch + 1;
}

print sum;
print batch;
print clock() - start;


================================================
FILE: test/block/empty.lox
================================================
{} // By itself.

// In a statement.
if (true) {}
if (false) {} else {}

print "ok"; // expect: ok


================================================
FILE: test/block/scope.lox
================================================
var a = "outer";

{
  var a = "inner";
  print a; // expect: inner
}

print a; // expect: outer


================================================
FILE: test/bool/equality.lox
================================================
print true == true;    // expect: true
print true == false;   // expect: false
print false == true;   // expect: false
print false == false;  // expect: true

// Not equal to other types.
print true == 1;        // expect: false
print false == 0;       // expect: false
print true == "true";   // expect: false
print false == "false"; // expect: false
print false == "";      // expect: false

print true != true;    // expect: false
print true != false;   // expect: true
print false != true;   // expect: true
print false != false;  // expect: false

// Not equal to other types.
print true != 1;        // expect: true
print false != 0;       // expect: true
print true != "true";   // expect: true
print false != "false"; // expect: true
print false != "";      // expect: true


================================================
FILE: test/bool/not.lox
================================================
print !true;    // expect: false
print !false;   // expect: true
print !!true;   // expect: true


================================================
FILE: test/call/bool.lox
================================================
true(); // expect runtime error: Can only call functions and classes.


================================================
FILE: test/call/nil.lox
================================================
nil(); // expect runtime error: Can only call functions and classes.


================================================
FILE: test/call/num.lox
================================================
123(); // expect runtime error: Can only call functions and classes.


================================================
FILE: test/call/object.lox
================================================
class Foo {}

var foo = Foo();
foo(); // expect runtime error: Can only call functions and classes.


================================================
FILE: test/call/string.lox
================================================
"str"(); // expect runtime error: Can only call functions and classes.


================================================
FILE: test/class/empty.lox
================================================
class Foo {}

print Foo; // expect: Foo


================================================
FILE: test/class/inherit_self.lox
================================================
class Foo < Foo {} // Error at 'Foo': A class can't inherit from itself.


================================================
FILE: test/class/inherited_method.lox
================================================
class Foo {
  inFoo() {
    print "in foo";
  }
}

class Bar < Foo {
  inBar() {
    print "in bar";
  }
}

class Baz < Bar {
  inBaz() {
    print "in baz";
  }
}

var baz = Baz();
baz.inFoo(); // expect: in foo
baz.inBar(); // expect: in bar
baz.inBaz(); // expect: in baz


================================================
FILE: test/class/local_inherit_other.lox
================================================
class A {}

fun f() {
  class B < A {}
  return B;
}

print f(); // expect: B


================================================
FILE: test/class/local_inherit_self.lox
================================================
{
  class Foo < Foo {} // Error at 'Foo': A class can't inherit from itself.
}
// [c line 5] Error at end: Expect '}' after block.


================================================
FILE: test/class/local_reference_self.lox
================================================
{
  class Foo {
    returnSelf() {
      return Foo;
    }
  }

  print Foo().returnSelf(); // expect: Foo
}


================================================
FILE: test/class/reference_self.lox
================================================
class Foo {
  returnSelf() {
    return Foo;
  }
}

print Foo().returnSelf(); // expect: Foo


================================================
FILE: test/closure/assign_to_closure.lox
================================================
var f;
var g;

{
  var local = "local";
  fun f_() {
    print local;
    local = "after f";
    print local;
  }
  f = f_;

  fun g_() {
    print local;
    local = "after g";
    print local;
  }
  g = g_;
}

f();
// expect: local
// expect: after f

g();
// expect: after f
// expect: after g


================================================
FILE: test/closure/assign_to_shadowed_later.lox
================================================
var a = "global";

{
  fun assign() {
    a = "assigned";
  }

  var a = "inner";
  assign();
  print a; // expect: inner
}

print a; // expect: assigned


================================================
FILE: test/closure/close_over_function_parameter.lox
================================================
var f;

fun foo(param) {
  fun f_() {
    print param;
  }
  f = f_;
}
foo("param");

f(); // expect: param


================================================
FILE: test/closure/close_over_later_variable.lox
================================================
// This is a regression test. There was a bug where if an upvalue for an
// earlier local (here "a") was captured *after* a later one ("b"), then it
// would crash because it walked to the end of the upvalue list (correct), but
// then didn't handle not finding the variable.

fun f() {
  var a = "a";
  var b = "b";
  fun g() {
    print b; // expect: b
    print a; // expect: a
  }
  g();
}
f();


================================================
FILE: test/closure/close_over_method_parameter.lox
================================================
var f;

class Foo {
  method(param) {
    fun f_() {
      print param;
    }
    f = f_;
  }
}

Foo().method("param");
f(); // expect: param


================================================
FILE: test/closure/closed_closure_in_function.lox
================================================
var f;

{
  var local = "local";
  fun f_() {
    print local;
  }
  f = f_;
}

f(); // expect: local


================================================
FILE: test/closure/nested_closure.lox
================================================
var f;

fun f1() {
  var a = "a";
  fun f2() {
    var b = "b";
    fun f3() {
      var c = "c";
      fun f4() {
        print a;
        print b;
        print c;
      }
      f = f4;
    }
    f3();
  }
  f2();
}
f1();

f();
// expect: a
// expect: b
// expect: c


================================================
FILE: test/closure/open_closure_in_function.lox
================================================
{
  var local = "local";
  fun f() {
    print local; // expect: local
  }
  f();
}


================================================
FILE: test/closure/reference_closure_multiple_times.lox
================================================
var f;

{
  var a = "a";
  fun f_() {
    print a;
    print a;
  }
  f = f_;
}

f();
// expect: a
// expect: a


================================================
FILE: test/closure/reuse_closure_slot.lox
================================================
{
  var f;

  {
    var a = "a";
    fun f_() { print a; }
    f = f_;
  }

  {
    // Since a is out of scope, the local slot will be reused by b. Make sure
    // that f still closes over a.
    var b = "b";
    f(); // expect: a
  }
}


================================================
FILE: test/closure/shadow_closure_with_local.lox
================================================
{
  var foo = "closure";
  fun f() {
    {
      print foo; // expect: closure
      var foo = "shadow";
      print foo; // expect: shadow
    }
    print foo; // expect: closure
  }
  f();
}


================================================
FILE: test/closure/unused_closure.lox
================================================
// This is a regression test. There was a bug where the VM would try to close
// an upvalue even if the upvalue was never created because the codepath for
// the closure was not executed.

{
  var a = "a";
  if (false) {
    fun foo() { a; }
  }
}

// If we get here, we didn't segfault when a went out of scope.
print "ok"; // expect: ok


================================================
FILE: test/closure/unused_later_closure.lox
================================================
// This is a regression test. When closing upvalues for discarded locals, it
// wouldn't make sure it discarded the upvalue for the correct stack slot.
//
// Here we create two locals that can be closed over, but only the first one
// actually is. When "b" goes out of scope, we need to make sure we don't
// prematurely close "a".
var closure;

{
  var a = "a";

  {
    var b = "b";
    fun returnA() {
      return a;
    }

    closure = returnA;

    if (false) {
      fun returnB() {
        return b;
      }
    }
  }

  print closure(); // expect: a
}


================================================
FILE: test/comments/line_at_eof.lox
================================================
print "ok"; // expect: ok
// comment

================================================
FILE: test/comments/only_line_comment.lox
================================================
// comment

================================================
FILE: test/comments/only_line_comment_and_line.lox
================================================
// comment


================================================
FILE: test/comments/unicode.lox
================================================
// Unicode characters are allowed in comments.
//
// Latin 1 Supplement: £§¶ÜÞ
// Latin Extended-A: ĐĦŋœ
// Latin Extended-B: ƂƢƩǁ
// Other stuff: ឃᢆ᯽₪ℜ↩⊗┺░
// Emoji: ☃☺♣

print "ok"; // expect: ok


================================================
FILE: test/constructor/arguments.lox
================================================
class Foo {
  init(a, b) {
    print "init"; // expect: init
    this.a = a;
    this.b = b;
  }
}

var foo = Foo(1, 2);
print foo.a; // expect: 1
print foo.b; // expect: 2


================================================
FILE: test/constructor/call_init_early_return.lox
================================================
class Foo {
  init() {
    print "init";
    return;
    print "nope";
  }
}

var foo = Foo(); // expect: init
print foo.init(); // expect: init
// expect: Foo instance


================================================
FILE: test/constructor/call_init_explicitly.lox
================================================
class Foo {
  init(arg) {
    print "Foo.init(" + arg + ")";
    this.field = "init";
  }
}

var foo = Foo("one"); // expect: Foo.init(one)
foo.field = "field";

var foo2 = foo.init("two"); // expect: Foo.init(two)
print foo2; // expect: Foo instance

// Make sure init() doesn't create a fresh instance.
print foo.field; // expect: init


================================================
FILE: test/constructor/default.lox
================================================
class Foo {}

var foo = Foo();
print foo; // expect: Foo instance


================================================
FILE: test/constructor/default_arguments.lox
================================================
class Foo {}

var foo = Foo(1, 2, 3); // expect runtime error: Expected 0 arguments but got 3.


================================================
FILE: test/constructor/early_return.lox
================================================
class Foo {
  init() {
    print "init";
    return;
    print "nope";
  }
}

var foo = Foo(); // expect: init
print foo; // expect: Foo instance


================================================
FILE: test/constructor/extra_arguments.lox
================================================
class Foo {
  init(a, b) {
    this.a = a;
    this.b = b;
  }
}

var foo = Foo(1, 2, 3, 4); // expect runtime error: Expected 2 arguments but got 4.

================================================
FILE: test/constructor/init_not_method.lox
================================================
class Foo {
  init(arg) {
    print "Foo.init(" + arg + ")";
    this.field = "init";
  }
}

fun init() {
  print "not initializer";
}

init(); // expect: not initializer


================================================
FILE: test/constructor/missing_arguments.lox
================================================
class Foo {
  init(a, b) {}
}

var foo = Foo(1); // expect runtime error: Expected 2 arguments but got 1.


================================================
FILE: test/constructor/return_in_nested_function.lox
================================================
class Foo {
  init() {
    fun init() {
      return "bar";
    }
    print init(); // expect: bar
  }
}

print Foo(); // expect: Foo instance


================================================
FILE: test/constructor/return_value.lox
================================================
class Foo {
  init() {
    return "result"; // Error at 'return': Can't return a value from an initializer.
  }
}


================================================
FILE: test/empty_file.lox
================================================


================================================
FILE: test/expressions/evaluate.lox
================================================
// Note: This is just for the expression evaluating chapter which evaluates an
// expression directly.
(5 - (3 - 1)) + -1
// expect: 2


================================================
FILE: test/expressions/parse.lox
================================================
// Note: This is just for the expression parsing chapter which prints the AST.
(5 - (3 - 1)) + -1
// expect: (+ (group (- 5.0 (group (- 3.0 1.0)))) (- 1.0))


================================================
FILE: test/field/call_function_field.lox
================================================
class Foo {}

fun bar(a, b) {
  print "bar";
  print a;
  print b;
}

var foo = Foo();
foo.bar = bar;

foo.bar(1, 2);
// expect: bar
// expect: 1
// expect: 2


================================================
FILE: test/field/call_nonfunction_field.lox
================================================
class Foo {}

var foo = Foo();
foo.bar = "not fn";

foo.bar(); // expect runtime error: Can only call functions and classes.


================================================
FILE: test/field/get_and_set_method.lox
================================================
// Bound methods have identity equality.
class Foo {
  method(a) {
    print "method";
    print a;
  }
  other(a) {
    print "other";
    print a;
  }
}

var foo = Foo();
var method = foo.method;

// Setting a property shadows the instance method.
foo.method = foo.other;
foo.method(1);
// expect: other
// expect: 1

// The old method handle still points to the original method.
method(2);
// expect: method
// expect: 2


================================================
FILE: test/field/get_on_bool.lox
================================================
true.foo; // expect runtime error: Only instances have properties.


================================================
FILE: test/field/get_on_class.lox
================================================
class Foo {}
Foo.bar; // expect runtime error: Only instances have properties.


================================================
FILE: test/field/get_on_function.lox
================================================
fun foo() {}

foo.bar; // expect runtime error: Only instances have properties.


================================================
FILE: test/field/get_on_nil.lox
================================================
nil.foo; // expect runtime error: Only instances have properties.


================================================
FILE: test/field/get_on_num.lox
================================================
123.foo; // expect runtime error: Only instances have properties.


================================================
FILE: test/field/get_on_string.lox
================================================
"str".foo; // expect runtime error: Only instances have properties.


================================================
FILE: test/field/many.lox
================================================
class Foo {}

var foo = Foo();
fun setFields() {
  foo.bilberry = "bilberry";
  foo.lime = "lime";
  foo.elderberry = "elderberry";
  foo.raspberry = "raspberry";
  foo.gooseberry = "gooseberry";
  foo.longan = "longan";
  foo.mandarine = "mandarine";
  foo.kiwifruit = "kiwifruit";
  foo.orange = "orange";
  foo.pomegranate = "pomegranate";
  foo.tomato = "tomato";
  foo.banana = "banana";
  foo.juniper = "juniper";
  foo.damson = "damson";
  foo.blackcurrant = "blackcurrant";
  foo.peach = "peach";
  foo.grape = "grape";
  foo.mango = "mango";
  foo.redcurrant = "redcurrant";
  foo.watermelon = "watermelon";
  foo.plumcot = "plumcot";
  foo.papaya = "papaya";
  foo.cloudberry = "cloudberry";
  foo.rambutan = "rambutan";
  foo.salak = "salak";
  foo.physalis = "physalis";
  foo.huckleberry = "huckleberry";
  foo.coconut = "coconut";
  foo.date = "date";
  foo.tamarind = "tamarind";
  foo.lychee = "lychee";
  foo.raisin = "raisin";
  foo.apple = "apple";
  foo.avocado = "avocado";
  foo.nectarine = "nectarine";
  foo.pomelo = "pomelo";
  foo.melon = "melon";
  foo.currant = "currant";
  foo.plum = "plum";
  foo.persimmon = "persimmon";
  foo.olive = "olive";
  foo.cranberry = "cranberry";
  foo.boysenberry = "boysenberry";
  foo.blackberry = "blackberry";
  foo.passionfruit = "passionfruit";
  foo.mulberry = "mulberry";
  foo.marionberry = "marionberry";
  foo.plantain = "plantain";
  foo.lemon = "lemon";
  foo.yuzu = "yuzu";
  foo.loquat = "loquat";
  foo.kumquat = "kumquat";
  foo.salmonberry = "salmonberry";
  foo.tangerine = "tangerine";
  foo.durian = "durian";
  foo.pear = "pear";
  foo.cantaloupe = "cantaloupe";
  foo.quince = "quince";
  foo.guava = "guava";
  foo.strawberry = "strawberry";
  foo.nance = "nance";
  foo.apricot = "apricot";
  foo.jambul = "jambul";
  foo.grapefruit = "grapefruit";
  foo.clementine = "clementine";
  foo.jujube = "jujube";
  foo.cherry = "cherry";
  foo.feijoa = "feijoa";
  foo.jackfruit = "jackfruit";
  foo.fig = "fig";
  foo.cherimoya = "cherimoya";
  foo.pineapple = "pineapple";
  foo.blueberry = "blueberry";
  foo.jabuticaba = "jabuticaba";
  foo.miracle = "miracle";
  foo.dragonfruit = "dragonfruit";
  foo.satsuma = "satsuma";
  foo.tamarillo = "tamarillo";
  foo.honeydew = "honeydew";
}

setFields();

fun printFields() {
  print foo.apple; // expect: apple
  print foo.apricot; // expect: apricot
  print foo.avocado; // expect: avocado
  print foo.banana; // expect: banana
  print foo.bilberry; // expect: bilberry
  print foo.blackberry; // expect: blackberry
  print foo.blackcurrant; // expect: blackcurrant
  print foo.blueberry; // expect: blueberry
  print foo.boysenberry; // expect: boysenberry
  print foo.cantaloupe; // expect: cantaloupe
  print foo.cherimoya; // expect: cherimoya
  print foo.cherry; // expect: cherry
  print foo.clementine; // expect: clementine
  print foo.cloudberry; // expect: cloudberry
  print foo.coconut; // expect: coconut
  print foo.cranberry; // expect: cranberry
  print foo.currant; // expect: currant
  print foo.damson; // expect: damson
  print foo.date; // expect: date
  print foo.dragonfruit; // expect: dragonfruit
  print foo.durian; // expect: durian
  print foo.elderberry; // expect: elderberry
  print foo.feijoa; // expect: feijoa
  print foo.fig; // expect: fig
  print foo.gooseberry; // expect: gooseberry
  print foo.grape; // expect: grape
  print foo.grapefruit; // expect: grapefruit
  print foo.guava; // expect: guava
  print foo.honeydew; // expect: honeydew
  print foo.huckleberry; // expect: huckleberry
  print foo.jabuticaba; // expect: jabuticaba
  print foo.jackfruit; // expect: jackfruit
  print foo.jambul; // expect: jambul
  print foo.jujube; // expect: jujube
  print foo.juniper; // expect: juniper
  print foo.kiwifruit; // expect: kiwifruit
  print foo.kumquat; // expect: kumquat
  print foo.lemon; // expect: lemon
  print foo.lime; // expect: lime
  print foo.longan; // expect: longan
  print foo.loquat; // expect: loquat
  print foo.lychee; // expect: lychee
  print foo.mandarine; // expect: mandarine
  print foo.mango; // expect: mango
  print foo.marionberry; // expect: marionberry
  print foo.melon; // expect: melon
  print foo.miracle; // expect: miracle
  print foo.mulberry; // expect: mulberry
  print foo.nance; // expect: nance
  print foo.nectarine; // expect: nectarine
  print foo.olive; // expect: olive
  print foo.orange; // expect: orange
  print foo.papaya; // expect: papaya
  print foo.passionfruit; // expect: passionfruit
  print foo.peach; // expect: peach
  print foo.pear; // expect: pear
  print foo.persimmon; // expect: persimmon
  print foo.physalis; // expect: physalis
  print foo.pineapple; // expect: pineapple
  print foo.plantain; // expect: plantain
  print foo.plum; // expect: plum
  print foo.plumcot; // expect: plumcot
  print foo.pomegranate; // expect: pomegranate
  print foo.pomelo; // expect: pomelo
  print foo.quince; // expect: quince
  print foo.raisin; // expect: raisin
  print foo.rambutan; // expect: rambutan
  print foo.raspberry; // expect: raspberry
  print foo.redcurrant; // expect: redcurrant
  print foo.salak; // expect: salak
  print foo.salmonberry; // expect: salmonberry
  print foo.satsuma; // expect: satsuma
  print foo.strawberry; // expect: strawberry
  print foo.tamarillo; // expect: tamarillo
  print foo.tamarind; // expect: tamarind
  print foo.tangerine; // expect: tangerine
  print foo.tomato; // expect: tomato
  print foo.watermelon; // expect: watermelon
  print foo.yuzu; // expect: yuzu
}

printFields();


================================================
FILE: test/field/method.lox
================================================
class Foo {
  bar(arg) {
    print arg;
  }
}

var bar = Foo().bar;
print "got method"; // expect: got method
bar("arg");          // expect: arg


================================================
FILE: test/field/method_binds_this.lox
================================================
class Foo {
  sayName(a) {
    print this.name;
    print a;
  }
}

var foo1 = Foo();
foo1.name = "foo1";

var foo2 = Foo();
foo2.name = "foo2";

// Store the method reference on another object.
foo2.fn = foo1.sayName;
// Still retains original receiver.
foo2.fn(1);
// expect: foo1
// expect: 1


================================================
FILE: test/field/on_instance.lox
================================================
class Foo {}

var foo = Foo();

print foo.bar = "bar value"; // expect: bar value
print foo.baz = "baz value"; // expect: baz value

print foo.bar; // expect: bar value
print foo.baz; // expect: baz value


================================================
FILE: test/field/set_evaluation_order.lox
================================================
undefined1.bar // expect runtime error: Undefined variable 'undefined1'.
  = undefined2;

================================================
FILE: test/field/set_on_bool.lox
================================================
true.foo = "value"; // expect runtime error: Only instances have fields.


================================================
FILE: test/field/set_on_class.lox
================================================
class Foo {}
Foo.bar = "value"; // expect runtime error: Only instances have fields.


================================================
FILE: test/field/set_on_function.lox
================================================
fun foo() {}

foo.bar = "value"; // expect runtime error: Only instances have fields.


================================================
FILE: test/field/set_on_nil.lox
================================================
nil.foo = "value"; // expect runtime error: Only instances have fields.


================================================
FILE: test/field/set_on_num.lox
================================================
123.foo = "value"; // expect runtime error: Only instances have fields.


================================================
FILE: test/field/set_on_string.lox
================================================
"str".foo = "value"; // expect runtime error: Only instances have fields.


================================================
FILE: test/field/undefined.lox
================================================
class Foo {}
var foo = Foo();

foo.bar; // expect runtime error: Undefined property 'bar'.


================================================
FILE: test/for/class_in_body.lox
================================================
// [line 2] Error at 'class': Expect expression.
for (;;) class Foo {}


================================================
FILE: test/for/closure_in_body.lox
================================================
var f1;
var f2;
var f3;

for (var i = 1; i < 4; i = i + 1) {
  var j = i;
  fun f() {
    print i;
    print j;
  }

  if (j == 1) f1 = f;
  else if (j == 2) f2 = f;
  else f3 = f;
}

f1(); // expect: 4
      // expect: 1
f2(); // expect: 4
      // expect: 2
f3(); // expect: 4
      // expect: 3


================================================
FILE: test/for/fun_in_body.lox
================================================
// [line 2] Error at 'fun': Expect expression.
for (;;) fun foo() {}


================================================
FILE: test/for/return_closure.lox
================================================
fun f() {
  for (;;) {
    var i = "i";
    fun g() { print i; }
    return g;
  }
}

var h = f();
h(); // expect: i


================================================
FILE: test/for/return_inside.lox
================================================
fun f() {
  for (;;) {
    var i = "i";
    return i;
  }
}

print f();
// expect: i


================================================
FILE: test/for/scope.lox
================================================
{
  var i = "before";

  // New variable is in inner scope.
  for (var i = 0; i < 1; i = i + 1) {
    print i; // expect: 0

    // Loop body is in second inner scope.
    var i = -1;
    print i; // expect: -1
  }
}

{
  // New variable shadows outer variable.
  for (var i = 0; i > 0; i = i + 1) {}

  // Goes out of scope after loop.
  var i = "after";
  print i; // expect: after

  // Can reuse an existing variable.
  for (i = 0; i < 1; i = i + 1) {
    print i; // expect: 0
  }
}


================================================
FILE: test/for/statement_condition.lox
================================================
// [line 3] Error at '{': Expect expression.
// [line 3] Error at ')': Expect ';' after expression.
for (var a = 1; {}; a = a + 1) {}


================================================
FILE: test/for/statement_increment.lox
================================================
// [line 2] Error at '{': Expect expression.
for (var a = 1; a < 2; {}) {}


================================================
FILE: test/for/statement_initializer.lox
================================================
// [line 3] Error at '{': Expect expression.
// [line 3] Error at ')': Expect ';' after expression.
for ({}; a < 2; a = a + 1) {}


================================================
FILE: test/for/syntax.lox
================================================
// Single-expression body.
for (var c = 0; c < 3;) print c = c + 1;
// expect: 1
// expect: 2
// expect: 3

// Block body.
for (var a = 0; a < 3; a = a + 1) {
  print a;
}
// expect: 0
// expect: 1
// expect: 2

// No clauses.
fun foo() {
  for (;;) return "done";
}
print foo(); // expect: done

// No variable.
var i = 0;
for (; i < 2; i = i + 1) print i;
// expect: 0
// expect: 1

// No condition.
fun bar() {
  for (var i = 0;; i = i + 1) {
    print i;
    if (i >= 2) return;
  }
}
bar();
// expect: 0
// expect: 1
// expect: 2

// No increment.
for (var i = 0; i < 2;) {
  print i;
  i = i + 1;
}
// expect: 0
// expect: 1

// Statement bodies.
for (; false;) if (true) 1; else 2;
for (; false;) while (true) 1;
for (; false;) for (;;) 1;


================================================
FILE: test/for/var_in_body.lox
================================================
// [line 2] Error at 'var': Expect expression.
for (;;) var foo;


================================================
FILE: test/function/body_must_be_block.lox
================================================
// [line 3] Error at '123': Expect '{' before function body.
// [c line 4] Error at end: Expect '}' after block.
fun f() 123;


================================================
FILE: test/function/empty_body.lox
================================================
fun f() {}
print f(); // expect: nil


================================================
FILE: test/function/extra_arguments.lox
================================================
fun f(a, b) {
  print a;
  print b;
}

f(1, 2, 3, 4); // expect runtime error: Expected 2 arguments but got 4.


================================================
FILE: test/function/local_mutual_recursion.lox
================================================
{
  fun isEven(n) {
    if (n == 0) return true;
    return isOdd(n - 1); // expect runtime error: Undefined variable 'isOdd'.
  }

  fun isOdd(n) {
    if (n == 0) return false;
    return isEven(n - 1);
  }

  isEven(4);
}

================================================
FILE: test/function/local_recursion.lox
================================================
{
  fun fib(n) {
    if (n < 2) return n;
    return fib(n - 1) + fib(n - 2);
  }

  print fib(8); // expect: 21
}


================================================
FILE: test/function/missing_arguments.lox
================================================
fun f(a, b) {}

f(1); // expect runtime error: Expected 2 arguments but got 1.


================================================
FILE: test/function/missing_comma_in_parameters.lox
================================================
// [line 3] Error at 'c': Expect ')' after parameters.
// [c line 4] Error at end: Expect '}' after block.
fun foo(a, b c, d, e, f) {}


================================================
FILE: test/function/mutual_recursion.lox
================================================
fun isEven(n) {
  if (n == 0) return true;
  return isOdd(n - 1);
}

fun isOdd(n) {
  if (n == 0) return false;
  return isEven(n - 1);
}

print isEven(4); // expect: true
print isOdd(3); // expect: true


================================================
FILE: test/function/nested_call_with_arguments.lox
================================================
fun returnArg(arg) {
  return arg;
}

fun returnFunCallWithArg(func, arg) {
  return returnArg(func)(arg);
}

fun printArg(arg) {
  print arg;
}

returnFunCallWithArg(printArg, "hello world"); // expect: hello world


================================================
FILE: test/function/parameters.lox
================================================
fun f0() { return 0; }
print f0(); // expect: 0

fun f1(a) { return a; }
print f1(1); // expect: 1

fun f2(a, b) { return a + b; }
print f2(1, 2); // expect: 3

fun f3(a, b, c) { return a + b + c; }
print f3(1, 2, 3); // expect: 6

fun f4(a, b, c, d) { return a + b + c + d; }
print f4(1, 2, 3, 4); // expect: 10

fun f5(a, b, c, d, e) { return a + b + c + d + e; }
print f5(1, 2, 3, 4, 5); // expect: 15

fun f6(a, b, c, d, e, f) { return a + b + c + d + e + f; }
print f6(1, 2, 3, 4, 5, 6); // expect: 21

fun f7(a, b, c, d, e, f, g) { return a + b + c + d + e + f + g; }
print f7(1, 2, 3, 4, 5, 6, 7); // expect: 28

fun f8(a, b, c, d, e, f, g, h) { return a + b + c + d + e + f + g + h; }
print f8(1, 2, 3, 4, 5, 6, 7, 8); // expect: 36


================================================
FILE: test/function/print.lox
================================================
fun foo() {}
print foo; // expect: <fn foo>

print clock; // expect: <native fn>


================================================
FILE: test/function/recursion.lox
================================================
fun fib(n) {
  if (n < 2) return n;
  return fib(n - 1) + fib(n - 2);
}

print fib(8); // expect: 21


================================================
FILE: test/function/too_many_arguments.lox
================================================
fun foo() {}
{
  var a = 1;
  foo(
     a, // 1
     a, // 2
     a, // 3
     a, // 4
     a, // 5
     a, // 6
     a, // 7
     a, // 8
     a, // 9
     a, // 10
     a, // 11
     a, // 12
     a, // 13
     a, // 14
     a, // 15
     a, // 16
     a, // 17
     a, // 18
     a, // 19
     a, // 20
     a, // 21
     a, // 22
     a, // 23
     a, // 24
     a, // 25
     a, // 26
     a, // 27
     a, // 28
     a, // 29
     a, // 30
     a, // 31
     a, // 32
     a, // 33
     a, // 34
     a, // 35
     a, // 36
     a, // 37
     a, // 38
     a, // 39
     a, // 40
     a, // 41
     a, // 42
     a, // 43
     a, // 44
     a, // 45
     a, // 46
     a, // 47
     a, // 48
     a, // 49
     a, // 50
     a, // 51
     a, // 52
     a, // 53
     a, // 54
     a, // 55
     a, // 56
     a, // 57
     a, // 58
     a, // 59
     a, // 60
     a, // 61
     a, // 62
     a, // 63
     a, // 64
     a, // 65
     a, // 66
     a, // 67
     a, // 68
     a, // 69
     a, // 70
     a, // 71
     a, // 72
     a, // 73
     a, // 74
     a, // 75
     a, // 76
     a, // 77
     a, // 78
     a, // 79
     a, // 80
     a, // 81
     a, // 82
     a, // 83
     a, // 84
     a, // 85
     a, // 86
     a, // 87
     a, // 88
     a, // 89
     a, // 90
     a, // 91
     a, // 92
     a, // 93
     a, // 94
     a, // 95
     a, // 96
     a, // 97
     a, // 98
     a, // 99
     a, // 100
     a, // 101
     a, // 102
     a, // 103
     a, // 104
     a, // 105
     a, // 106
     a, // 107
     a, // 108
     a, // 109
     a, // 110
     a, // 111
     a, // 112
     a, // 113
     a, // 114
     a, // 115
     a, // 116
     a, // 117
     a, // 118
     a, // 119
     a, // 120
     a, // 121
     a, // 122
     a, // 123
     a, // 124
     a, // 125
     a, // 126
     a, // 127
     a, // 128
     a, // 129
     a, // 130
     a, // 131
     a, // 132
     a, // 133
     a, // 134
     a, // 135
     a, // 136
     a, // 137
     a, // 138
     a, // 139
     a, // 140
     a, // 141
     a, // 142
     a, // 143
     a, // 144
     a, // 145
     a, // 146
     a, // 147
     a, // 148
     a, // 149
     a, // 150
     a, // 151
     a, // 152
     a, // 153
     a, // 154
     a, // 155
     a, // 156
     a, // 157
     a, // 158
     a, // 159
     a, // 160
     a, // 161
     a, // 162
     a, // 163
     a, // 164
     a, // 165
     a, // 166
     a, // 167
     a, // 168
     a, // 169
     a, // 170
     a, // 171
     a, // 172
     a, // 173
     a, // 174
     a, // 175
     a, // 176
     a, // 177
     a, // 178
     a, // 179
     a, // 180
     a, // 181
     a, // 182
     a, // 183
     a, // 184
     a, // 185
     a, // 186
     a, // 187
     a, // 188
     a, // 189
     a, // 190
     a, // 191
     a, // 192
     a, // 193
     a, // 194
     a, // 195
     a, // 196
     a, // 197
     a, // 198
     a, // 199
     a, // 200
     a, // 201
     a, // 202
     a, // 203
     a, // 204
     a, // 205
     a, // 206
     a, // 207
     a, // 208
     a, // 209
     a, // 210
     a, // 211
     a, // 212
     a, // 213
     a, // 214
     a, // 215
     a, // 216
     a, // 217
     a, // 218
     a, // 219
     a, // 220
     a, // 221
     a, // 222
     a, // 223
     a, // 224
     a, // 225
     a, // 226
     a, // 227
     a, // 228
     a, // 229
     a, // 230
     a, // 231
     a, // 232
     a, // 233
     a, // 234
     a, // 235
     a, // 236
     a, // 237
     a, // 238
     a, // 239
     a, // 240
     a, // 241
     a, // 242
     a, // 243
     a, // 244
     a, // 245
     a, // 246
     a, // 247
     a, // 248
     a, // 249
     a, // 250
     a, // 251
     a, // 252
     a, // 253
     a, // 254
     a, // 255
     a); // Error at 'a': Can't have more than 255 arguments.
}


================================================
FILE: test/function/too_many_parameters.lox
================================================
// 256 parameters.
fun f(
    a1,
    a2,
    a3,
    a4,
    a5,
    a6,
    a7,
    a8,
    a9,
    a10,
    a11,
    a12,
    a13,
    a14,
    a15,
    a16,
    a17,
    a18,
    a19,
    a20,
    a21,
    a22,
    a23,
    a24,
    a25,
    a26,
    a27,
    a28,
    a29,
    a30,
    a31,
    a32,
    a33,
    a34,
    a35,
    a36,
    a37,
    a38,
    a39,
    a40,
    a41,
    a42,
    a43,
    a44,
    a45,
    a46,
    a47,
    a48,
    a49,
    a50,
    a51,
    a52,
    a53,
    a54,
    a55,
    a56,
    a57,
    a58,
    a59,
    a60,
    a61,
    a62,
    a63,
    a64,
    a65,
    a66,
    a67,
    a68,
    a69,
    a70,
    a71,
    a72,
    a73,
    a74,
    a75,
    a76,
    a77,
    a78,
    a79,
    a80,
    a81,
    a82,
    a83,
    a84,
    a85,
    a86,
    a87,
    a88,
    a89,
    a90,
    a91,
    a92,
    a93,
    a94,
    a95,
    a96,
    a97,
    a98,
    a99,
    a100,
    a101,
    a102,
    a103,
    a104,
    a105,
    a106,
    a107,
    a108,
    a109,
    a110,
    a111,
    a112,
    a113,
    a114,
    a115,
    a116,
    a117,
    a118,
    a119,
    a120,
    a121,
    a122,
    a123,
    a124,
    a125,
    a126,
    a127,
    a128,
    a129,
    a130,
    a131,
    a132,
    a133,
    a134,
    a135,
    a136,
    a137,
    a138,
    a139,
    a140,
    a141,
    a142,
    a143,
    a144,
    a145,
    a146,
    a147,
    a148,
    a149,
    a150,
    a151,
    a152,
    a153,
    a154,
    a155,
    a156,
    a157,
    a158,
    a159,
    a160,
    a161,
    a162,
    a163,
    a164,
    a165,
    a166,
    a167,
    a168,
    a169,
    a170,
    a171,
    a172,
    a173,
    a174,
    a175,
    a176,
    a177,
    a178,
    a179,
    a180,
    a181,
    a182,
    a183,
    a184,
    a185,
    a186,
    a187,
    a188,
    a189,
    a190,
    a191,
    a192,
    a193,
    a194,
    a195,
    a196,
    a197,
    a198,
    a199,
    a200,
    a201,
    a202,
    a203,
    a204,
    a205,
    a206,
    a207,
    a208,
    a209,
    a210,
    a211,
    a212,
    a213,
    a214,
    a215,
    a216,
    a217,
    a218,
    a219,
    a220,
    a221,
    a222,
    a223,
    a224,
    a225,
    a226,
    a227,
    a228,
    a229,
    a230,
    a231,
    a232,
    a233,
    a234,
    a235,
    a236,
    a237,
    a238,
    a239,
    a240,
    a241,
    a242,
    a243,
    a244,
    a245,
    a246,
    a247,
    a248,
    a249,
    a250,
    a251,
    a252,
    a253,
    a254,
    a255, a) {} // Error at 'a': Can't have more than 255 parameters.


================================================
FILE: test/if/class_in_else.lox
================================================
// [line 2] Error at 'class': Expect expression.
if (true) "ok"; else class Foo {}


================================================
FILE: test/if/class_in_then.lox
================================================
// [line 2] Error at 'class': Expect expression.
if (true) class Foo {}


================================================
FILE: test/if/dangling_else.lox
================================================
// A dangling else binds to the right-most if.
if (true) if (false) print "bad"; else print "good"; // expect: good
if (false) if (true) print "bad"; else print "bad";


================================================
FILE: test/if/else.lox
================================================
// Evaluate the 'else' expression if the condition is false.
if (true) print "good"; else print "bad"; // expect: good
if (false) print "bad"; else print "good"; // expect: good

// Allow block body.
if (false) nil; else { print "block"; } // expect: block


================================================
FILE: test/if/fun_in_else.lox
================================================
// [line 2] Error at 'fun': Expect expression.
if (true) "ok"; else fun foo() {}


================================================
FILE: test/if/fun_in_then.lox
================================================
// [line 2] Error at 'fun': Expect expression.
if (true) fun foo() {}


================================================
FILE: test/if/if.lox
================================================
// Evaluate the 'then' expression if the condition is true.
if (true) print "good"; // expect: good
if (false) print "bad";

// Allow block body.
if (true) { print "block"; } // expect: block

// Assignment in if condition.
var a = false;
if (a = true) print a; // expect: true


================================================
FILE: test/if/truth.lox
================================================
// False and nil are false.
if (false) print "bad"; else print "false"; // expect: false
if (nil) print "bad"; else print "nil"; // expect: nil

// Everything else is true.
if (true) print true; // expect: true
if (0) print 0; // expect: 0
if ("") print "empty"; // expect: empty


================================================
FILE: test/if/var_in_else.lox
================================================
// [line 2] Error at 'var': Expect expression.
if (true) "ok"; else var foo;


================================================
FILE: test/if/var_in_then.lox
================================================
// [line 2] Error at 'var': Expect expression.
if (true) var foo;


================================================
FILE: test/inheritance/constructor.lox
================================================
class A {
  init(param) {
    this.field = param;
  }

  test() {
    print this.field;
  }
}

class B < A {}

var b = B("value");
b.test(); // expect: value


================================================
FILE: test/inheritance/inherit_from_function.lox
================================================
fun foo() {}

class Subclass < foo {} // expect runtime error: Superclass must be a class.


================================================
FILE: test/inheritance/inherit_from_nil.lox
================================================
var Nil = nil;
class Foo < Nil {} // expect runtime error: Superclass must be a class.


================================================
FILE: test/inheritance/inherit_from_number.lox
================================================
var Number = 123;
class Foo < Number {} // expect runtime error: Superclass must be a class.


================================================
FILE: test/inheritance/inherit_methods.lox
================================================
class Foo {
  methodOnFoo() { print "foo"; }
  override() { print "foo"; }
}

class Bar < Foo {
  methodOnBar() { print "bar"; }
  override() { print "bar"; }
}

var bar = Bar();
bar.methodOnFoo(); // expect: foo
bar.methodOnBar(); // expect: bar
bar.override(); // expect: bar


================================================
FILE: test/inheritance/parenthesized_superclass.lox
================================================
class Foo {}

// [line 4] Error at '(': Expect superclass name.
class Bar < (Foo) {}


================================================
FILE: test/inheritance/set_fields_from_base_class.lox
================================================
class Foo {
  foo(a, b) {
    this.field1 = a;
    this.field2 = b;
  }

  fooPrint() {
    print this.field1;
    print this.field2;
  }
}

class Bar < Foo {
  bar(a, b) {
    this.field1 = a;
    this.field2 = b;
  }

  barPrint() {
    print this.field1;
    print this.field2;
  }
}

var bar = Bar();
bar.foo("foo 1", "foo 2");
bar.fooPrint();
// expect: foo 1
// expect: foo 2

bar.bar("bar 1", "bar 2");
bar.barPrint();
// expect: bar 1
// expect: bar 2

bar.fooPrint();
// expect: bar 1
// expect: bar 2


================================================
FILE: test/limit/loop_too_large.lox
================================================
var a = 0;
while (false) {
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;


  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;

  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
  nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil; nil;
} // Error at '}': Loop body too large.


================================================
FILE: test/limit/no_reuse_constants.lox
================================================
fun f() {
  0; 1; 2; 3; 4; 5; 6; 7;
  8; 9; 10; 11; 12; 13; 14; 15;
  16; 17; 18; 19; 20; 21; 22; 23;
  24; 25; 26; 27; 28; 29; 30; 31;
  32; 33; 34; 35; 36; 37; 38; 39;
  40; 41; 42; 43; 44; 45; 46; 47;
  48; 49; 50; 51; 52; 53; 54; 55;
  56; 57; 58; 59; 60; 61; 62; 63;
  64; 65; 66; 67; 68; 69; 70; 71;
  72; 73; 74; 75; 76; 77; 78; 79;
  80; 81; 82; 83; 84; 85; 86; 87;
  88; 89; 90; 91; 92; 93; 94; 95;
  96; 97; 98; 99; 100; 101; 102; 103;
  104; 105; 106; 107; 108; 109; 110; 111;
  112; 113; 114; 115; 116; 117; 118; 119;
  120; 121; 122; 123; 124; 125; 126; 127;
  128; 129; 130; 131; 132; 133; 134; 135;
  136; 137; 138; 139; 140; 141; 142; 143;
  144; 145; 146; 147; 148; 149; 150; 151;
  152; 153; 154; 155; 156; 157; 158; 159;
  160; 161; 162; 163; 164; 165; 166; 167;
  168; 169; 170; 171; 172; 173; 174; 175;
  176; 177; 178; 179; 180; 181; 182; 183;
  184; 185; 186; 187; 188; 189; 190; 191;
  192; 193; 194; 195; 196; 197; 198; 199;
  200; 201; 202; 203; 204; 205; 206; 207;
  208; 209; 210; 211; 212; 213; 214; 215;
  216; 217; 218; 219; 220; 221; 222; 223;
  224; 225; 226; 227; 228; 229; 230; 231;
  232; 233; 234; 235; 236; 237; 238; 239;
  240; 241; 242; 243; 244; 245; 246; 247;
  248; 249; 250; 251; 252; 253; 254; 255;

  1; // Error at '1': Too many constants in one chunk.
}


================================================
FILE: test/limit/stack_overflow.lox
================================================
fun foo() {
  var a1;
  var a2;
  var a3;
  var a4;
  var a5;
  var a6;
  var a7;
  var a8;
  var a9;
  var a10;
  var a11;
  var a12;
  var a13;
  var a14;
  var a15;
  var a16;
  foo(); // expect runtime error: Stack overflow.
}

foo();


================================================
FILE: test/limit/too_many_constants.lox
================================================
fun f() {
  0; 1; 2; 3; 4; 5; 6; 7;
  8; 9; 10; 11; 12; 13; 14; 15;
  16; 17; 18; 19; 20; 21; 22; 23;
  24; 25; 26; 27; 28; 29; 30; 31;
  32; 33; 34; 35; 36; 37; 38; 39;
  40; 41; 42; 43; 44; 45; 46; 47;
  48; 49; 50; 51; 52; 53; 54; 55;
  56; 57; 58; 59; 60; 61; 62; 63;
  64; 65; 66; 67; 68; 69; 70; 71;
  72; 73; 74; 75; 76; 77; 78; 79;
  80; 81; 82; 83; 84; 85; 86; 87;
  88; 89; 90; 91; 92; 93; 94; 95;
  96; 97; 98; 99; 100; 101; 102; 103;
  104; 105; 106; 107; 108; 109; 110; 111;
  112; 113; 114; 115; 116; 117; 118; 119;
  120; 121; 122; 123; 124; 125; 126; 127;
  128; 129; 130; 131; 132; 133; 134; 135;
  136; 137; 138; 139; 140; 141; 142; 143;
  144; 145; 146; 147; 148; 149; 150; 151;
  152; 153; 154; 155; 156; 157; 158; 159;
  160; 161; 162; 163; 164; 165; 166; 167;
  168; 169; 170; 171; 172; 173; 174; 175;
  176; 177; 178; 179; 180; 181; 182; 183;
  184; 185; 186; 187; 188; 189; 190; 191;
  192; 193; 194; 195; 196; 197; 198; 199;
  200; 201; 202; 203; 204; 205; 206; 207;
  208; 209; 210; 211; 212; 213; 214; 215;
  216; 217; 218; 219; 220; 221; 222; 223;
  224; 225; 226; 227; 228; 229; 230; 231;
  232; 233; 234; 235; 236; 237; 238; 239;
  240; 241; 242; 243; 244; 245; 246; 247;
  248; 249; 250; 251; 252; 253; 254; 255;

  "oops"; // Error at '"oops"': Too many constants in one chunk.
}


================================================
FILE: test/limit/too_many_locals.lox
================================================
fun f() {
  // var v00; First slot already taken.

  var v01; var v02; var v03; var v04; var v05; var v06; var v07;
  var v08; var v09; var v0a; var v0b; var v0c; var v0d; var v0e; var v0f;

  var v10; var v11; var v12; var v13; var v14; var v15; var v16; var v17;
  var v18; var v19; var v1a; var v1b; var v1c; var v1d; var v1e; var v1f;

  var v20; var v21; var v22; var v23; var v24; var v25; var v26; var v27;
  var v28; var v29; var v2a; var v2b; var v2c; var v2d; var v2e; var v2f;

  var v30; var v31; var v32; var v33; var v34; var v35; var v36; var v37;
  var v38; var v39; var v3a; var v3b; var v3c; var v3d; var v3e; var v3f;

  var v40; var v41; var v42; var v43; var v44; var v45; var v46; var v47;
  var v48; var v49; var v4a; var v4b; var v4c; var v4d; var v4e; var v4f;

  var v50; var v51; var v52; var v53; var v54; var v55; var v56; var v57;
  var v58; var v59; var v5a; var v5b; var v5c; var v5d; var v5e; var v5f;

  var v60; var v61; var v62; var v63; var v64; var v65; var v66; var v67;
  var v68; var v69; var v6a; var v6b; var v6c; var v6d; var v6e; var v6f;

  var v70; var v71; var v72; var v73; var v74; var v75; var v76; var v77;
  var v78; var v79; var v7a; var v7b; var v7c; var v7d; var v7e; var v7f;

  var v80; var v81; var v82; var v83; var v84; var v85; var v86; var v87;
  var v88; var v89; var v8a; var v8b; var v8c; var v8d; var v8e; var v8f;

  var v90; var v91; var v92; var v93; var v94; var v95; var v96; var v97;
  var v98; var v99; var v9a; var v9b; var v9c; var v9d; var v9e; var v9f;

  var va0; var va1; var va2; var va3; var va4; var va5; var va6; var va7;
  var va8; var va9; var vaa; var vab; var vac; var vad; var vae; var vaf;

  var vb0; var vb1; var vb2; var vb3; var vb4; var vb5; var vb6; var vb7;
  var vb8; var vb9; var vba; var vbb; var vbc; var vbd; var vbe; var vbf;

  var vc0; var vc1; var vc2; var vc3; var vc4; var vc5; var vc6; var vc7;
  var vc8; var vc9; var vca; var vcb; var vcc; var vcd; var vce; var vcf;

  var vd0; var vd1; var vd2; var vd3; var vd4; var vd5; var vd6; var vd7;
  var vd8; var vd9; var vda; var vdb; var vdc; var vdd; var vde; var vdf;

  var ve0; var ve1; var ve2; var ve3; var ve4; var ve5; var ve6; var ve7;
  var ve8; var ve9; var vea; var veb; var vec; var ved; var vee; var vef;

  var vf0; var vf1; var vf2; var vf3; var vf4; var vf5; var vf6; var vf7;
  var vf8; var vf9; var vfa; var vfb; var vfc; var vfd; var vfe; var vff;

  var oops; // Error at 'oops': Too many local variables in function.
}


================================================
FILE: test/limit/too_many_upvalues.lox
================================================
fun f() {
  var v00; var v01; var v02; var v03; var v04; var v05; var v06; var v07;
  var v08; var v09; var v0a; var v0b; var v0c; var v0d; var v0e; var v0f;

  var v10; var v11; var v12; var v13; var v14; var v15; var v16; var v17;
  var v18; var v19; var v1a; var v1b; var v1c; var v1d; var v1e; var v1f;

  var v20; var v21; var v22; var v23; var v24; var v25; var v26; var v27;
  var v28; var v29; var v2a; var v2b; var v2c; var v2d; var v2e; var v2f;

  var v30; var v31; var v32; var v33; var v34; var v35; var v36; var v37;
  var v38; var v39; var v3a; var v3b; var v3c; var v3d; var v3e; var v3f;

  var v40; var v41; var v42; var v43; var v44; var v45; var v46; var v47;
  var v48; var v49; var v4a; var v4b; var v4c; var v4d; var v4e; var v4f;

  var v50; var v51; var v52; var v53; var v54; var v55; var v56; var v57;
  var v58; var v59; var v5a; var v5b; var v5c; var v5d; var v5e; var v5f;

  var v60; var v61; var v62; var v63; var v64; var v65; var v66; var v67;
  var v68; var v69; var v6a; var v6b; var v6c; var v6d; var v6e; var v6f;

  var v70; var v71; var v72; var v73; var v74; var v75; var v76; var v77;
  var v78; var v79; var v7a; var v7b; var v7c; var v7d; var v7e; var v7f;

  fun g() {
    var v80; var v81; var v82; var v83; var v84; var v85; var v86; var v87;
    var v88; var v89; var v8a; var v8b; var v8c; var v8d; var v8e; var v8f;

    var v90; var v91; var v92; var v93; var v94; var v95; var v96; var v97;
    var v98; var v99; var v9a; var v9b; var v9c; var v9d; var v9e; var v9f;

    var va0; var va1; var va2; var va3; var va4; var va5; var va6; var va7;
    var va8; var va9; var vaa; var vab; var vac; var vad; var vae; var vaf;

    var vb0; var vb1; var vb2; var vb3; var vb4; var vb5; var vb6; var vb7;
    var vb8; var vb9; var vba; var vbb; var vbc; var vbd; var vbe; var vbf;

    var vc0; var vc1; var vc2; var vc3; var vc4; var vc5; var vc6; var vc7;
    var vc8; var vc9; var vca; var vcb; var vcc; var vcd; var vce; var vcf;

    var vd0; var vd1; var vd2; var vd3; var vd4; var vd5; var vd6; var vd7;
    var vd8; var vd9; var vda; var vdb; var vdc; var vdd; var vde; var vdf;

    var ve0; var ve1; var ve2; var ve3; var ve4; var ve5; var ve6; var ve7;
    var ve8; var ve9; var vea; var veb; var vec; var ved; var vee; var vef;

    var vf0; var vf1; var vf2; var vf3; var vf4; var vf5; var vf6; var vf7;
    var vf8; var vf9; var vfa; var vfb; var vfc; var vfd; var vfe; var vff;

    var oops;

    fun h() {
      v00; v01; v02; v03; v04; v05; v06; v07;
      v08; v09; v0a; v0b; v0c; v0d; v0e; v0f;

      v10; v11; v12; v13; v14; v15; v16; v17;
      v18; v19; v1a; v1b; v1c; v1d; v1e; v1f;

      v20; v21; v22; v23; v24; v25; v26; v27;
      v28; v29; v2a; v2b; v2c; v2d; v2e; v2f;

      v30; v31; v32; v33; v34; v35; v36; v37;
      v38; v39; v3a; v3b; v3c; v3d; v3e; v3f;

      v40; v41; v42; v43; v44; v45; v46; v47;
      v48; v49; v4a; v4b; v4c; v4d; v4e; v4f;

      v50; v51; v52; v53; v54; v55; v56; v57;
      v58; v59; v5a; v5b; v5c; v5d; v5e; v5f;

      v60; v61; v62; v63; v64; v65; v66; v67;
      v68; v69; v6a; v6b; v6c; v6d; v6e; v6f;

      v70; v71; v72; v73; v74; v75; v76; v77;
      v78; v79; v7a; v7b; v7c; v7d; v7e; v7f;

      v80; v81; v82; v83; v84; v85; v86; v87;
      v88; v89; v8a; v8b; v8c; v8d; v8e; v8f;

      v90; v91; v92; v93; v94; v95; v96; v97;
      v98; v99; v9a; v9b; v9c; v9d; v9e; v9f;

      va0; va1; va2; va3; va4; va5; va6; va7;
      va8; va9; vaa; vab; vac; vad; vae; vaf;

      vb0; vb1; vb2; vb3; vb4; vb5; vb6; vb7;
      vb8; vb9; vba; vbb; vbc; vbd; vbe; vbf;

      vc0; vc1; vc2; vc3; vc4; vc5; vc6; vc7;
      vc8; vc9; vca; vcb; vcc; vcd; vce; vcf;

      vd0; vd1; vd2; vd3; vd4; vd5; vd6; vd7;
      vd8; vd9; vda; vdb; vdc; vdd; vde; vdf;

      ve0; ve1; ve2; ve3; ve4; ve5; ve6; ve7;
      ve8; ve9; vea; veb; vec; ved; vee; vef;

      vf0; vf1; vf2; vf3; vf4; vf5; vf6; vf7;
      vf8; vf9; vfa; vfb; vfc; vfd; vfe; vff;

      oops; // Error at 'oops': Too many closure variables in function.
    }
  }
}


================================================
FILE: test/logical_operator/and.lox
================================================
// Note: These tests implicitly depend on ints being truthy.

// Return the first non-true argument.
print false and 1; // expect: false
print true and 1; // expect: 1
print 1 and 2 and false; // expect: false

// Return the last argument if all are true.
print 1 and true; // expect: true
print 1 and 2 and 3; // expect: 3

// Short-circuit at the first false argument.
var a = "before";
var b = "before";
(a = true) and
    (b = false) and
    (a = "bad");
print a; // expect: true
print b; // expect: false


================================================
FILE: test/logical_operator/and_truth.lox
================================================
// False and nil are false.
print false and "bad"; // expect: false
print nil and "bad"; // expect: nil

// Everything else is true.
print true and "ok"; // expect: ok
print 0 and "ok"; // expect: ok
print "" and "ok"; // expect: ok


================================================
FILE: test/logical_operator/or.lox
================================================
// Note: These tests implicitly depend on ints being truthy.

// Return the first true argument.
print 1 or true; // expect: 1
print false or 1; // expect: 1
print false or false or true; // expect: true

// Return the last argument if all are false.
print false or false; // expect: false
print false or false or false; // expect: false

// Short-circuit at the first true argument.
var a = "before";
var b = "before";
(a = false) or
    (b = true) or
    (a = "bad");
print a; // expect: false
print b; // expect: true


================================================
FILE: test/logical_operator/or_truth.lox
================================================
// False and nil are false.
print false or "ok"; // expect: ok
print nil or "ok"; // expect: ok

// Everything else is true.
print true or "ok"; // expect: true
print 0 or "ok"; // expect: 0
print "s" or "ok"; // expect: s


================================================
FILE: test/method/arity.lox
================================================
class Foo {
  method0() { return "no args"; }
  method1(a) { return a; }
  method2(a, b) { return a + b; }
  method3(a, b, c) { return a + b + c; }
  method4(a, b, c, d) { return a + b + c + d; }
  method5(a, b, c, d, e) { return a + b + c + d + e; }
  method6(a, b, c, d, e, f) { return a + b + c + d + e + f; }
  method7(a, b, c, d, e, f, g) { return a + b + c + d + e + f + g; }
  method8(a, b, c, d, e, f, g, h) { return a + b + c + d + e + f + g + h; }
}

var foo = Foo();
print foo.method0(); // expect: no args
print foo.method1(1); // expect: 1
print foo.method2(1, 2); // expect: 3
print foo.method3(1, 2, 3); // expect: 6
print foo.method4(1, 2, 3, 4); // expect: 10
print foo.method5(1, 2, 3, 4, 5); // expect: 15
print foo.method6(1, 2, 3, 4, 5, 6); // expect: 21
print foo.method7(1, 2, 3, 4, 5, 6, 7); // expect: 28
print foo.method8(1, 2, 3, 4, 5, 6, 7, 8); // expect: 36


================================================
FILE: test/method/empty_block.lox
================================================
class Foo {
  bar() {}
}

print Foo().bar(); // expect: nil


================================================
FILE: test/method/extra_arguments.lox
================================================
class Foo {
  method(a, b) {
    print a;
    print b;
  }
}

Foo().method(1, 2, 3, 4); // expect runtime error: Expected 2 arguments but got 4.


================================================
FILE: test/method/missing_arguments.lox
================================================
class Foo {
  method(a, b) {}
}

Foo().method(1); // expect runtime error: Expected 2 arguments but got 1.


================================================
FILE: test/method/not_found.lox
================================================
class Foo {}

Foo().unknown(); // expect runtime error: Undefined property 'unknown'.


================================================
FILE: test/method/print_bound_method.lox
================================================
class Foo {
  method() { }
}
var foo = Foo();
print foo.method; // expect: <fn method>


================================================
FILE: test/method/refer_to_name.lox
================================================
class Foo {
  method() {
    print method; // expect runtime error: Undefined variable 'method'.
  }
}

Foo().method();


================================================
FILE: test/method/too_many_arguments.lox
================================================
{
  var a = 1;
  true.method(
     a, // 1
     a, // 2
     a, // 3
     a, // 4
     a, // 5
     a, // 6
     a, // 7
     a, // 8
     a, // 9
     a, // 10
     a, // 11
     a, // 12
     a, // 13
     a, // 14
     a, // 15
     a, // 16
     a, // 17
     a, // 18
     a, // 19
     a, // 20
     a, // 21
     a, // 22
     a, // 23
     a, // 24
     a, // 25
     a, // 26
     a, // 27
     a, // 28
     a, // 29
     a, // 30
     a, // 31
     a, // 32
     a, // 33
     a, // 34
     a, // 35
     a, // 36
     a, // 37
     a, // 38
     a, // 39
     a, // 40
     a, // 41
     a, // 42
     a, // 43
     a, // 44
     a, // 45
     a, // 46
     a, // 47
     a, // 48
     a, // 49
     a, // 50
     a, // 51
     a, // 52
     a, // 53
     a, // 54
     a, // 55
     a, // 56
     a, // 57
     a, // 58
     a, // 59
     a, // 60
     a, // 61
     a, // 62
     a, // 63
     a, // 64
     a, // 65
     a, // 66
     a, // 67
     a, // 68
     a, // 69
     a, // 70
     a, // 71
     a, // 72
     a, // 73
     a, // 74
     a, // 75
     a, // 76
     a, // 77
     a, // 78
     a, // 79
     a, // 80
     a, // 81
     a, // 82
     a, // 83
     a, // 84
     a, // 85
     a, // 86
     a, // 87
     a, // 88
     a, // 89
     a, // 90
     a, // 91
     a, // 92
     a, // 93
     a, // 94
     a, // 95
     a, // 96
     a, // 97
     a, // 98
     a, // 99
     a, // 100
     a, // 101
     a, // 102
     a, // 103
     a, // 104
     a, // 105
     a, // 106
     a, // 107
     a, // 108
     a, // 109
     a, // 110
     a, // 111
     a, // 112
     a, // 113
     a, // 114
     a, // 115
     a, // 116
     a, // 117
     a, // 118
     a, // 119
     a, // 120
     a, // 121
     a, // 122
     a, // 123
     a, // 124
     a, // 125
     a, // 126
     a, // 127
     a, // 128
     a, // 129
     a, // 130
     a, // 131
     a, // 132
     a, // 133
     a, // 134
     a, // 135
     a, // 136
     a, // 137
     a, // 138
     a, // 139
     a, // 140
     a, // 141
     a, // 142
     a, // 143
     a, // 144
     a, // 145
     a, // 146
     a, // 147
     a, // 148
     a, // 149
     a, // 150
     a, // 151
     a, // 152
     a, // 153
     a, // 154
     a, // 155
     a, // 156
     a, // 157
     a, // 158
     a, // 159
     a, // 160
     a, // 161
     a, // 162
     a, // 163
     a, // 164
     a, // 165
     a, // 166
     a, // 167
     a, // 168
     a, // 169
     a, // 170
     a, // 171
     a, // 172
     a, // 173
     a, // 174
     a, // 175
     a, // 176
     a, // 177
     a, // 178
     a, // 179
     a, // 180
     a, // 181
     a, // 182
     a, // 183
     a, // 184
     a, // 185
     a, // 186
     a, // 187
     a, // 188
     a, // 189
     a, // 190
     a, // 191
     a, // 192
     a, // 193
     a, // 194
     a, // 195
     a, // 196
     a, // 197
     a, // 198
     a, // 199
     a, // 200
     a, // 201
     a, // 202
     a, // 203
     a, // 204
     a, // 205
     a, // 206
     a, // 207
     a, // 208
     a, // 209
     a, // 210
     a, // 211
     a, // 212
     a, // 213
     a, // 214
     a, // 215
     a, // 216
     a, // 217
     a, // 218
     a, // 219
     a, // 220
     a, // 221
     a, // 222
     a, // 223
     a, // 224
     a, // 225
     a, // 226
     a, // 227
     a, // 228
     a, // 229
     a, // 230
     a, // 231
     a, // 232
     a, // 233
     a, // 234
     a, // 235
     a, // 236
     a, // 237
     a, // 238
     a, // 239
     a, // 240
     a, // 241
     a, // 242
     a, // 243
     a, // 244
     a, // 245
     a, // 246
     a, // 247
     a, // 248
     a, // 249
     a, // 250
     a, // 251
     a, // 252
     a, // 253
     a, // 254
     a, // 255
     a); // Error at 'a': Can't have more than 255 arguments.
}


================================================
FILE: test/method/too_many_parameters.lox
================================================
class Foo {
  // 256 parameters.
  method(
    a1,
    a2,
    a3,
    a4,
    a5,
    a6,
    a7,
    a8,
    a9,
    a10,
    a11,
    a12,
    a13,
    a14,
    a15,
    a16,
    a17,
    a18,
    a19,
    a20,
    a21,
    a22,
    a23,
    a24,
    a25,
    a26,
    a27,
    a28,
    a29,
    a30,
    a31,
    a32,
    a33,
    a34,
    a35,
    a36,
    a37,
    a38,
    a39,
    a40,
    a41,
    a42,
    a43,
    a44,
    a45,
    a46,
    a47,
    a48,
    a49,
    a50,
    a51,
    a52,
    a53,
    a54,
    a55,
    a56,
    a57,
    a58,
    a59,
    a60,
    a61,
    a62,
    a63,
    a64,
    a65,
    a66,
    a67,
    a68,
    a69,
    a70,
    a71,
    a72,
    a73,
    a74,
    a75,
    a76,
    a77,
    a78,
    a79,
    a80,
    a81,
    a82,
    a83,
    a84,
    a85,
    a86,
    a87,
    a88,
    a89,
    a90,
    a91,
    a92,
    a93,
    a94,
    a95,
    a96,
    a97,
    a98,
    a99,
    a100,
    a101,
    a102,
    a103,
    a104,
    a105,
    a106,
    a107,
    a108,
    a109,
    a110,
    a111,
    a112,
    a113,
    a114,
    a115,
    a116,
    a117,
    a118,
    a119,
    a120,
    a121,
    a122,
    a123,
    a124,
    a125,
    a126,
    a127,
    a128,
    a129,
    a130,
    a131,
    a132,
    a133,
    a134,
    a135,
    a136,
    a137,
    a138,
    a139,
    a140,
    a141,
    a142,
    a143,
    a144,
    a145,
    a146,
    a147,
    a148,
    a149,
    a150,
    a151,
    a152,
    a153,
    a154,
    a155,
    a156,
    a157,
    a158,
    a159,
    a160,
    a161,
    a162,
    a163,
    a164,
    a165,
    a166,
    a167,
    a168,
    a169,
    a170,
    a171,
    a172,
    a173,
    a174,
    a175,
    a176,
    a177,
    a178,
    a179,
    a180,
    a181,
    a182,
    a183,
    a184,
    a185,
    a186,
    a187,
    a188,
    a189,
    a190,
    a191,
    a192,
    a193,
    a194,
    a195,
    a196,
    a197,
    a198,
    a199,
    a200,
    a201,
    a202,
    a203,
    a204,
    a205,
    a206,
    a207,
    a208,
    a209,
    a210,
    a211,
    a212,
    a213,
    a214,
    a215,
    a216,
    a217,
    a218,
    a219,
    a220,
    a221,
    a222,
    a223,
    a224,
    a225,
    a226,
    a227,
    a228,
    a229,
    a230,
    a231,
    a232,
    a233,
    a234,
    a235,
    a236,
    a237,
    a238,
    a239,
    a240,
    a241,
    a242,
    a243,
    a244,
    a245,
    a246,
    a247,
    a248,
    a249,
    a250,
    a251,
    a252,
    a253,
    a254,
    a255, a) {} // Error at 'a': Can't have more than 255 parameters.
}


================================================
FILE: test/nil/literal.lox
================================================
print nil; // expect: nil


================================================
FILE: test/number/decimal_point_at_eof.lox
================================================
// [line 2] Error at end: Expect property name after '.'.
123.

================================================
FILE: test/number/leading_dot.lox
================================================
// [line 2] Error at '.': Expect expression.
.123;


================================================
FILE: test/number/literals.lox
================================================
print 123;     // expect: 123
print 987654;  // expect: 987654
print 0;       // expect: 0
print -0;      // expect: -0

print 123.456; // expect: 123.456
print -0.001;  // expect: -0.001


================================================
FILE: test/number/nan_equality.lox
================================================
var nan = 0/0;

print nan == 0; // expect: false
print nan != 1; // expect: true

// NaN is not equal to self.
print nan == nan; // expect: false
print nan != nan; // expect: true


================================================
FILE: test/number/trailing_dot.lox
================================================
// [line 2] Error at ';': Expect property name after '.'.
123.;


================================================
FILE: test/operator/add.lox
================================================
print 123 + 456; // expect: 579
print "str" + "ing"; // expect: string


================================================
FILE: test/operator/add_bool_nil.lox
================================================
true + nil; // expect runtime error: Operands must be two numbers or two strings.


================================================
FILE: test/operator/add_bool_num.lox
================================================
true + 123; // expect runtime error: Operands must be two numbers or two strings.


================================================
FILE: test/operator/add_bool_string.lox
================================================
true + "s"; // expect runtime error: Operands must be two numbers or two strings.


================================================
FILE: test/operator/add_nil_nil.lox
================================================
nil + nil; // expect runtime error: Operands must be two numbers or two strings.


================================================
FILE: test/operator/add_num_nil.lox
================================================
1 + nil; // expect runtime error: Operands must be two numbers or two strings.


================================================
FILE: test/operator/add_string_nil.lox
================================================
"s" + nil; // expect runtime error: Operands must be two numbers or two strings.


================================================
FILE: test/operator/comparison.lox
================================================
print 1 < 2;    // expect: true
print 2 < 2;    // expect: false
print 2 < 1;    // expect: false

print 1 <= 2;    // expect: true
print 2 <= 2;    // expect: true
print 2 <= 1;    // expect: false

print 1 > 2;    // expect: false
print 2 > 2;    // expect: false
print 2 > 1;    // expect: true

print 1 >= 2;    // expect: false
print 2 >= 2;    // expect: true
print 2 >= 1;    // expect: true

// Zero and negative zero compare the same.
print 0 < -0; // expect: false
print -0 < 0; // expect: false
print 0 > -0; // expect: false
print -0 > 0; // expect: false
print 0 <= -0; // expect: true
print -0 <= 0; // expect: true
print 0 >= -0; // expect: true
print -0 >= 0; // expect: true


================================================
FILE: test/operator/divide.lox
================================================
print 8 / 2;         // expect: 4
print 12.34 / 12.34;  // expect: 1


================================================
FILE: test/operator/divide_nonnum_num.lox
================================================
"1" / 1; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/divide_num_nonnum.lox
================================================
1 / "1"; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/equals.lox
================================================
print nil == nil; // expect: true

print true == true; // expect: true
print true == false; // expect: false

print 1 == 1; // expect: true
print 1 == 2; // expect: false

print "str" == "str"; // expect: true
print "str" == "ing"; // expect: false

print nil == false; // expect: false
print false == 0; // expect: false
print 0 == "0"; // expect: false


================================================
FILE: test/operator/equals_class.lox
================================================
// Bound methods have identity equality.
class Foo {}
class Bar {}

print Foo == Foo; // expect: true
print Foo == Bar; // expect: false
print Bar == Foo; // expect: false
print Bar == Bar; // expect: true

print Foo == "Foo"; // expect: false
print Foo == nil;   // expect: false
print Foo == 123;   // expect: false
print Foo == true;  // expect: false


================================================
FILE: test/operator/equals_method.lox
================================================
// Bound methods have identity equality.
class Foo {
  method() {}
}

var foo = Foo();
var fooMethod = foo.method;

// Same bound method.
print fooMethod == fooMethod; // expect: true

// Different closurizations.
print foo.method == foo.method; // expect: false


================================================
FILE: test/operator/greater_nonnum_num.lox
================================================
"1" > 1; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/greater_num_nonnum.lox
================================================
1 > "1"; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/greater_or_equal_nonnum_num.lox
================================================
"1" >= 1; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/greater_or_equal_num_nonnum.lox
================================================
1 >= "1"; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/less_nonnum_num.lox
================================================
"1" < 1; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/less_num_nonnum.lox
================================================
1 < "1"; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/less_or_equal_nonnum_num.lox
================================================
"1" <= 1; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/less_or_equal_num_nonnum.lox
================================================
1 <= "1"; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/multiply.lox
================================================
print 5 * 3; // expect: 15
print 12.34 * 0.3; // expect: 3.702


================================================
FILE: test/operator/multiply_nonnum_num.lox
================================================
"1" * 1; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/multiply_num_nonnum.lox
================================================
1 * "1"; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/negate.lox
================================================
print -(3); // expect: -3
print --(3); // expect: 3
print ---(3); // expect: -3


================================================
FILE: test/operator/negate_nonnum.lox
================================================
-"s"; // expect runtime error: Operand must be a number.


================================================
FILE: test/operator/not.lox
================================================
print !true;     // expect: false
print !false;    // expect: true
print !!true;    // expect: true

print !123;      // expect: false
print !0;        // expect: false

print !nil;     // expect: true

print !"";       // expect: false

fun foo() {}
print !foo;      // expect: false


================================================
FILE: test/operator/not_class.lox
================================================
class Bar {}
print !Bar;      // expect: false
print !Bar();    // expect: false


================================================
FILE: test/operator/not_equals.lox
================================================
print nil != nil; // expect: false

print true != true; // expect: false
print true != false; // expect: true

print 1 != 1; // expect: false
print 1 != 2; // expect: true

print "str" != "str"; // expect: false
print "str" != "ing"; // expect: true

print nil != false; // expect: true
print false != 0; // expect: true
print 0 != "0"; // expect: true


================================================
FILE: test/operator/subtract.lox
================================================
print 4 - 3; // expect: 1
print 1.2 - 1.2; // expect: 0


================================================
FILE: test/operator/subtract_nonnum_num.lox
================================================
"1" - 1; // expect runtime error: Operands must be numbers.


================================================
FILE: test/operator/subtract_num_nonnum.lox
================================================
1 - "1"; // expect runtime error: Operands must be numbers.


================================================
FILE: test/precedence.lox
================================================
// * has higher precedence than +.
print 2 + 3 * 4; // expect: 14

// * has higher precedence than -.
print 20 - 3 * 4; // expect: 8

// / has higher precedence than +.
print 2 + 6 / 3; // expect: 4

// / has higher precedence than -.
print 2 - 6 / 3; // expect: 0

// < has higher precedence than ==.
print false == 2 < 1; // expect: true

// > has higher precedence than ==.
print false == 1 > 2; // expect: true

// <= has higher precedence than ==.
print false == 2 <= 1; // expect: true

// >= has higher precedence than ==.
print false == 1 >= 2; // expect: true

// 1 - 1 is not space-sensitive.
print 1 - 1; // expect: 0
print 1 -1;  // expect: 0
print 1- 1;  // expect: 0
print 1-1;   // expect: 0

// Using () for grouping.
print (2 * (6 - (2 + 2))); // expect: 4


================================================
FILE: test/print/missing_argument.lox
================================================
// [line 2] Error at ';': Expect expression.
print;


================================================
FILE: test/regression/394.lox
================================================
{
  class A {}
  class B < A {}
  print B; // expect: B
}


================================================
FILE: test/regression/40.lox
================================================
fun caller(g) {
  g();
  // g should be a function, not nil.
  print g == nil; // expect: false
}

fun callCaller() {
  var capturedVar = "before";
  var a = "a";

  fun f() {
    // Commenting the next line out prevents the bug!
    capturedVar = "after";

    // Returning anything also fixes it, even nil:
    //return nil;
  }

  caller(f);
}

callCaller();


================================================
FILE: test/return/after_else.lox
================================================
fun f() {
  if (false) "no"; else return "ok";
}

print f(); // expect: ok


================================================
FILE: test/return/after_if.lox
================================================
fun f() {
  if (true) return "ok";
}

print f(); // expect: ok


================================================
FILE: test/return/after_while.lox
================================================
fun f() {
  while (true) return "ok";
}

print f(); // expect: ok


================================================
FILE: test/return/at_top_level.lox
================================================
return "wat"; // Error at 'return': Can't return from top-level code.


================================================
FILE: test/return/in_function.lox
================================================
fun f() {
  return "ok";
  print "bad";
}

print f(); // expect: ok


================================================
FILE: test/return/in_method.lox
================================================
class Foo {
  method() {
    return "ok";
    print "bad";
  }
}

print Foo().method(); // expect: ok


================================================
FILE: test/return/return_nil_if_no_value.lox
================================================
fun f() {
  return;
  print "bad";
}

print f(); // expect: nil


================================================
FILE: test/scanning/identifiers.lox
================================================
andy formless fo _ _123 _abc ab123
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_

// expect: IDENTIFIER andy null
// expect: IDENTIFIER formless null
// expect: IDENTIFIER fo null
// expect: IDENTIFIER _ null
// expect: IDENTIFIER _123 null
// expect: IDENTIFIER _abc null
// expect: IDENTIFIER ab123 null
// expect: IDENTIFIER abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_ null
// expect: EOF  null


================================================
FILE: test/scanning/keywords.lox
================================================
and class else false for fun if nil or return super this true var while

// expect: AND and null
// expect: CLASS class null
// expect: ELSE else null
// expect: FALSE false null
// expect: FOR for null
// expect: FUN fun null
// expect: IF if null
// expect: NIL nil null
// expect: OR or null
// expect: RETURN return null
// expect: SUPER super null
// expect: THIS this null
// expect: TRUE true null
// expect: VAR var null
// expect: WHILE while null
// expect: EOF  null


================================================
FILE: test/scanning/numbers.lox
================================================
123
123.456
.456
123.

// expect: NUMBER 123 123.0
// expect: NUMBER 123.456 123.456
// expect: DOT . null
// expect: NUMBER 456 456.0
// expect: NUMBER 123 123.0
// expect: DOT . null
// expect: EOF  null


================================================
FILE: test/scanning/punctuators.lox
================================================
(){};,+-*!===<=>=!=<>/.

// expect: LEFT_PAREN ( null
// expect: RIGHT_PAREN ) null
// expect: LEFT_BRACE { null
// expect: RIGHT_BRACE } null
// expect: SEMICOLON ; null
// expect: COMMA , null
// expect: PLUS + null
// expect: MINUS - null
// expect: STAR * null
// expect: BANG_EQUAL != null
// expect: EQUAL_EQUAL == null
// expect: LESS_EQUAL <= null
// expect: GREATER_EQUAL >= null
// expect: BANG_EQUAL != null
// expect: LESS < null
// expect: GREATER > null
// expect: SLASH / null
// expect: DOT . null
// expect: EOF  null


================================================
FILE: test/scanning/strings.lox
================================================
""
"string"

// expect: STRING "" 
// expect: STRING "string" string
// expect: EOF  null

================================================
FILE: test/scanning/whitespace.lox
================================================
space    tabs				newlines


end

// expect: IDENTIFIER space null
// expect: IDENTIFIER tabs null
// expect: IDENTIFIER newlines null
// expect: IDENTIFIER end null
// expect: EOF  null


================================================
FILE: test/string/error_after_multiline.lox
================================================
// Tests that we correctly track the line info across multiline strings.
var a = "1
2
3
";

err; // // expect runtime error: Undefined variable 'err'.

================================================
FILE: test/string/literals.lox
================================================
print "(" + "" + ")";   // expect: ()
print "a string"; // expect: a string

// Non-ASCII.
print "A~¶Þॐஃ"; // expect: A~¶Þॐஃ


================================================
FILE: test/string/multiline.lox
================================================
var a = "1
2
3";
print a;
// expect: 1
// expect: 2
// expect: 3


================================================
FILE: test/string/unterminated.lox
================================================
// [line 2] Error: Unterminated string.
"this string has no close quote

================================================
FILE: test/super/bound_method.lox
================================================
class A {
  method(arg) {
    print "A.method(" + arg + ")";
  }
}

class B < A {
  getClosure() {
    return super.method;
  }

  method(arg) {
    print "B.method(" + arg + ")";
  }
}


var closure = B().getClosure();
closure("arg"); // expect: A.method(arg)


================================================
FILE: test/super/call_other_method.lox
================================================
class Base {
  foo() {
    print "Base.foo()";
  }
}

class Derived < Base {
  bar() {
    print "Derived.bar()";
    super.foo();
  }
}

Derived().bar();
// expect: Derived.bar()
// expect: Base.foo()


================================================
FILE: test/super/call_same_method.lox
================================================
class Base {
  foo() {
    print "Base.foo()";
  }
}

class Derived < Base {
  foo() {
    print "Derived.foo()";
    super.foo();
  }
}

Derived().foo();
// expect: Derived.foo()
// expect: Base.foo()


================================================
FILE: test/super/closure.lox
================================================
class Base {
  toString() { return "Base"; }
}

class Derived < Base {
  getClosure() {
    fun closure() {
      return super.toString();
    }
    return closure;
  }

  toString() { return "Derived"; }
}

var closure = Derived().getClosure();
print closure(); // expect: Base


================================================
FILE: test/super/constructor.lox
================================================
class Base {
  init(a, b) {
    print "Base.init(" + a + ", " + b + ")";
  }
}

class Derived < Base {
  init() {
    print "Derived.init()";
    super.init("a", "b");
  }
}

Derived();
// expect: Derived.init()
// expect: Base.init(a, b)


================================================
FILE: test/super/extra_arguments.lox
================================================
class Base {
  foo(a, b) {
    print "Base.foo(" + a + ", " + b + ")";
  }
}

class Derived < Base {
  foo() {
    print "Derived.foo()"; // expect: Derived.foo()
    super.foo("a", "b", "c", "d"); // expect runtime error: Expected 2 arguments but got 4.
  }
}

Derived().foo();


================================================
FILE: test/super/indirectly_inherited.lox
================================================
class A {
  foo() {
    print "A.foo()";
  }
}

class B < A {}

class C < B {
  foo() {
    print "C.foo()";
    super.foo();
  }
}

C().foo();
// expect: C.foo()
// expect: A.foo()


================================================
FILE: test/super/missing_arguments.lox
================================================
class Base {
  foo(a, b) {
    print "Base.foo(" + a + ", " + b + ")";
  }
}

class Derived < Base {
  foo() {
    super.foo(1); // expect runtime error: Expected 2 arguments but got 1.
  }
}

Derived().foo();


================================================
FILE: test/super/no_superclass_bind.lox
================================================
class Base {
  foo() {
    super.doesNotExist; // Error at 'super': Can't use 'super' in a class with no superclass.
  }
}

Base().foo();


================================================
FILE: test/super/no_superclass_call.lox
================================================
class Base {
  foo() {
    super.doesNotExist(1); // Error at 'super': Can't use 'super' in a class with no superclass.
  }
}

Base().foo();


================================================
FILE: test/super/no_superclass_method.lox
================================================
class Base {}

class Derived < Base {
  foo() {
    super.doesNotExist(1); // expect runtime error: Undefined property 'doesNotExist'.
  }
}

Derived().foo();


================================================
FILE: test/super/parenthesized.lox
================================================
class A {
  method() {}
}

class B < A {
  method() {
    // [line 8] Error at ')': Expect '.' after 'super'.
    (super).method();
  }
}


================================================
FILE: test/super/reassign_superclass.lox
================================================
class Base {
  method() {
    print "Base.method()";
  }
}

class Derived < Base {
  method() {
    super.method();
  }
}

class OtherBase {
  method() {
    print "OtherBase.method()";
  }
}

var derived = Derived();
derived.method(); // expect: Base.method()
Base = OtherBase;
derived.method(); // expect: Base.method()


================================================
FILE: test/super/super_at_top_level.lox
================================================
super.foo("bar"); // Error at 'super': Can't use 'super' outside of a class.
super.foo; // Error at 'super': Can't use 'super' outside of a class.

================================================
FILE: test/super/super_in_closure_in_inherited_method.lox
================================================
class A {
  say() {
    print "A";
  }
}

class B < A {
  getClosure() {
    fun closure() {
      super.say();
    }
    return closure;
  }

  say() {
    print "B";
  }
}

class C < B {
  say() {
    print "C";
  }
}

C().getClosure()(); // expect: A


================================================
FILE: test/super/super_in_inherited_method.lox
================================================
class A {
  say() {
    print "A";
  }
}

class B < A {
  test() {
    super.say();
  }

  say() {
    print "B";
  }
}

class C < B {
  say() {
    print "C";
  }
}

C().test(); // expect: A


================================================
FILE: test/super/super_in_top_level_function.lox
================================================
  super.bar(); // Error at 'super': Can't use 'super' outside of a class.
fun foo() {
}

================================================
FILE: test/super/super_without_dot.lox
================================================
class A {}

class B < A {
  method() {
    // [line 6] Error at ';': Expect '.' after 'super'.
    super;
  }
}


================================================
FILE: test/super/super_without_name.lox
================================================
class A {}

class B < A {
  method() {
    super.; // Error at ';': Expect superclass method name.
  }
}


================================================
FILE: test/super/this_in_superclass_method.lox
================================================
class Base {
  init(a) {
    this.a = a;
  }
}

class Derived < Base {
  init(a, b) {
    super.init(a);
    this.b = b;
  }
}

var derived = Derived("a", "b");
print derived.a; // expect: a
print derived.b; // expect: b


================================================
FILE: test/this/closure.lox
================================================
class Foo {
  getClosure() {
    fun closure() {
      return this.toString();
    }
    return closure;
  }

  toString() { return "Foo"; }
}

var closure = Foo().getClosure();
print closure(); // expect: Foo


================================================
FILE: test/this/nested_class.lox
================================================
class Outer {
  method() {
    print this; // expect: Outer instance

    fun f() {
      print this; // expect: Outer instance

      class Inner {
        method() {
          print this; // expect: Inner instance
        }
      }

      Inner().method();
    }
    f();
  }
}

Outer().method();


================================================
FILE: test/this/nested_closure.lox
================================================
class Foo {
  getClosure() {
    fun f() {
      fun g() {
        fun h() {
          return this.toString();
        }
        return h;
      }
      return g;
    }
    return f;
  }

  toString() { return "Foo"; }
}

var closure = Foo().getClosure();
print closure()()(); // expect: Foo


================================================
FILE: test/this/this_at_top_level.lox
================================================
this; // Error at 'this': Can't use 'this' outside of a class.


================================================
FILE: test/this/this_in_method.lox
================================================
class Foo {
  bar() { return this; }
  baz() { return "baz"; }
}

print Foo().bar().baz(); // expect: baz


================================================
FILE: test/this/this_in_top_level_function.lox
================================================
fun foo() {
  this; // Error at 'this': Can't use 'this' outside of a class.
}


================================================
FILE: test/unexpected_character.lox
================================================
// [line 3] Error: Unexpected character.
// [java line 3] Error at 'b': Expect ')' after arguments.
foo(a | b);


================================================
FILE: test/variable/collide_with_parameter.lox
================================================
fun foo(a) {
  var a; // Error at 'a': Already a variable with this name in this scope.
}


================================================
FILE: test/variable/duplicate_local.lox
================================================
{
  var a = "value";
  var a = "other"; // Error at 'a': Already a variable with this name in this scope.
}


================================================
FILE: test/variable/duplicate_parameter.lox
================================================
fun foo(arg,
        arg) { // Error at 'arg': Already a variable with this name in this scope.
  "body";
}


================================================
FILE: test/variable/early_bound.lox
================================================
var a = "outer";
{
  fun foo() {
    print a;
  }

  foo(); // expect: outer
  var a = "inner";
  foo(); // expect: outer
}


================================================
FILE: test/variable/in_middle_of_block.lox
================================================
{
  var a = "a";
  print a; // expect: a
  var b = a + " b";
  print b; // expect: a b
  var c = a + " c";
  print c; // expect: a c
  var d = b + " d";
  print d; // expect: a b d
}


================================================
FILE: test/variable/in_nested_block.lox
================================================
{
  var a = "outer";
  {
    print a; // expect: outer
  }
}

================================================
FILE: test/variable/local_from_method.lox
================================================
var foo = "variable";

class Foo {
  method() {
    print foo;
  }
}

Foo().method(); // expect: variable


================================================
FILE: test/variable/redeclare_global.lox
================================================
var a = "1";
var a;
print a; // expect: nil


================================================
FILE: test/variable/redefine_global.lox
================================================
var a = "1";
var a = "2";
print a; // expect: 2


================================================
FILE: test/variable/scope_reuse_in_different_blocks.lox
================================================
{
  var a = "first";
  print a; // expect: first
}

{
  var a = "second";
  print a; // expect: second
}


================================================
FILE: test/variable/shadow_and_local.lox
================================================
{
  var a = "outer";
  {
    print a; // expect: outer
    var a = "inner";
    print a; // expect: inner
  }
}

================================================
FILE: test/variable/shadow_global.lox
================================================
var a = "global";
{
  var a = "shadow";
  print a; // expect: shadow
}
print a; // expect: global


================================================
FILE: test/variable/shadow_local.lox
================================================
{
  var a = "local";
  {
    var a = "shadow";
    print a; // expect: shadow
  }
  print a; // expect: local
}


================================================
FILE: test/variable/undefined_global.lox
================================================
print notDefined;  // expect runtime error: Undefined variable 'notDefined'.


================================================
FILE: test/variable/undefined_local.lox
================================================
{
  print notDefined;  // expect runtime error: Undefined variable 'notDefined'.
}


================================================
FILE: test/variable/uninitialized.lox
================================================
var a;
print a; // expect: nil


================================================
FILE: test/variable/unreached_undefined.lox
================================================
if (false) {
  print notDefined;
}

print "ok"; // expect: ok


================================================
FILE: test/variable/use_false_as_var.lox
================================================
// [line 2] Error at 'false': Expect variable name.
var false = "value";


================================================
FILE: test/variable/use_global_in_initializer.lox
================================================
var a = "value";
var a = a;
print a; // expect: value


================================================
FILE: test/variable/use_local_in_initializer.lox
================================================
var a = "outer";
{
  var a = a; // Error at 'a': Can't read local variable in its own initializer.
}


================================================
FILE: test/variable/use_nil_as_var.lox
================================================
// [line 2] Error at 'nil': Expect variable name.
var nil = "value";


================================================
FILE: test/variable/use_this_as_var.lox
================================================
// [line 2] Error at 'this': Expect variable name.
var this = "value";


================================================
FILE: test/while/class_in_body.lox
================================================
// [line 2] Error at 'class': Expect expression.
while (true) class Foo {}


================================================
FILE: test/while/closure_in_body.lox
================================================
var f1;
var f2;
var f3;

var i = 1;
while (i < 4) {
  var j = i;
  fun f() { print j; }

  if (j == 1) f1 = f;
  else if (j == 2) f2 = f;
  else f3 = f;

  i = i + 1;
}

f1(); // expect: 1
f2(); // expect: 2
f3(); // expect: 3


================================================
FILE: test/while/fun_in_body.lox
================================================
// [line 2] Error at 'fun': Expect expression.
while (true) fun foo() {}


================================================
FILE: test/while/return_closure.lox
================================================
fun f() {
  while (true) {
    var i = "i";
    fun g() { print i; }
    return g;
  }
}

var h = f();
h(); // expect: i


================================================
FILE: test/while/return_inside.lox
================================================
fun f() {
  while (true) {
    var i = "i";
    return i;
  }
}

print f();
// expect: i


================================================
FILE: test/while/syntax.lox
================================================
// Single-expression body.
var c = 0;
while (c < 3) print c = c + 1;
// expect: 1
// expect: 2
// expect: 3

// Block body.
var a = 0;
while (a < 3) {
  print a;
  a = a + 1;
}
// expect: 0
// expect: 1
// expect: 2

// Statement bodies.
while (false) if (true) 1; else 2;
while (false) while (true) 1;
while (false) for (;;) 1;


================================================
FILE: test/while/var_in_body.lox
================================================
// [line 2] Error at 'var': Expect expression.
while (true) var foo;


================================================
FILE: tool/analysis_options.yaml
================================================
analyzer:
 strong-mode:
# Close, but still false positives around clamp().
  implicit-casts: false
# Too many false positives.
  implicit-dynamic: false
  

================================================
FILE: tool/bin/benchmark.dart
================================================
import 'dart:convert';
import 'dart:io';

import 'package:path/path.dart' as p;

void main(List<String> arguments) {
  if (arguments.isEmpty) {
    print('Usage: benchmark.py [interpreters...] <benchmark>');
    exit(1);
  }

  var interpreters = ['build/clox'];
  var benchmark = arguments.last;
  if (arguments.length > 1) {
    interpreters = arguments.sublist(0, arguments.length - 1);
  }

  if (interpreters.length > 1) {
    runComparison(interpreters, benchmark);
  } else {
    runBenchmark(interpreters[0], benchmark);
  }
}

void runBenchmark(String interpreter, String benchmark) {
  var trial = 1;
  var best = 9999.0;

  for (;;) {
    var elapsed = runTrial(interpreter, benchmark);
    if (elapsed < best) best = elapsed;

    var bestSeconds = best.toStringAsFixed(2);
    print("trial #$trial  $interpreter   best ${bestSeconds}s");
    trial++;
  }
}

/// Runs the benchmark once and returns the elapsed time.
double runTrial(String interpreter, String benchmark) {
  var result = Process.runSync(
      interpreter, [p.join("test", "benchmark", "$benchmark.lox")]);
  var outLines = const LineSplitter().convert(result.stdout as String);

  // Remove the trailing last empty line.
  if (outLines.last == "") outLines.removeLast();

  // The benchmark should print the elapsed time last.
  return double.parse(outLines.last);
}

void runComparison(List<String> interpreters, String benchmark) {
  var trial = 1;
  var best = {for (var interpreter in interpreters) interpreter: 9999.0};

  for (;;) {
    for (var interpreter in interpreters) {
      var elapsed = runTrial(interpreter, benchmark);
      if (elapsed < best[interpreter]) best[interpreter] = elapsed;
    }

    var bestTime = 999.0;
    var worstTime = 0.0;
    String bestInterpreter;
    for (var interpreter in interpreters) {
      if (best[interpreter] < bestTime) {
        bestTime = best[interpreter];
        bestInterpreter = interpreter;
      }
      if (best[interpreter] > worstTime) {
        worstTime = best[interpreter];
      }
    }

    // Turn the time measurement into an effort measurement in units where 1
    // "work" is just the total thing the benchmark does.
    var worstWork = 1.0 / worstTime;

    print("trial #$trial");
    for (var interpreter in interpreters) {
      String suffix;
      if (interpreter == bestInterpreter) {
        var bestWork = 1.0 / best[interpreter];
        var workRatio = bestWork / worstWork;
        var faster = 100 * (workRatio - 1.0);
        suffix = "${faster.toStringAsFixed(4)}% faster";
      } else {
        var ratio = best[interpreter] / bestTime;
        suffix = "${ratio.toStringAsFixed(4)}x time of best";
      }
      var bestString = best[interpreter].toStringAsFixed(4);
      print("  ${interpreter.padRight(30)}   best ${bestString}s  $suffix");
    }

    trial++;
  }
}


================================================
FILE: tool/bin/build.dart
================================================
import 'dart:io';

import 'package:glob/glob.dart';
import 'package:mime_type/mime_type.dart';
import 'package:path/path.dart' as p;
import 'package:sass/sass.dart' as sass;
import 'package:shelf/shelf.dart' as shelf;
import 'package:shelf/shelf_io.dart' as io;

import 'package:tool/src/book.dart';
import 'package:tool/src/format.dart';
import 'package:tool/src/markdown/markdown.dart';
import 'package:tool/src/mustache.dart';
import 'package:tool/src/page.dart';
import 'package:tool/src/term.dart' as term;
import 'package:tool/src/text.dart';

/// Aside comment marker in highlighted code.
final _asideHighlightedCommentPattern =
    RegExp(r' ?<span class="c">// \[([-a-z0-9]+)\] *</span>');

/// Aside comment marker in highlighted code with a comment too.
final _asideHighlightedWithCommentPattern =
    RegExp(r' ?<span class="c">// (.+) \[([-a-z0-9]+)\] *</span>');

/// Aside comment marker in context lines which are not syntax highlighted.
final _asideCommentPattern = RegExp(r' +// \[([-a-z0-9]+)\]');

/// Aside comment marker in context lines which are not syntax highlighted with
/// a comment too.
final _asideWithCommentPattern = RegExp(r' +// (.+) \[([-a-z0-9]+)\]');

Future<void> main(List<String> arguments) async {
  _buildSass();
  _buildPages();

  if (arguments.contains("--serve")) {
    await _runServer();
  }
}

/// Process each Markdown file.
void _buildPages({bool skipUpToDate = false}) {
  var watch = Stopwatch()..start();
  var book = Book();
  var mustache = Mustache();

  DateTime dependenciesModified;
  if (skipUpToDate) {
    dependenciesModified = _mostRecentlyModified(
        ["asset/mustache/*.html", "c/*.{c,h}", "java/**.java"]);
  }

  var proseWords = 0;
  var codeLines = 0;
  var totalWords = 0;
  for (var page in book.pages) {
    var metrics = _buildPage(book, mustache, page,
        dependenciesModified: dependenciesModified);
    proseWords += metrics[0];
    codeLines += metrics[1];
    totalWords += metrics[2];
  }

  if (totalWords > 0) {
    var seconds = (watch.elapsedMilliseconds / 1000).toStringAsFixed(2);
    print("Built ${term.green(proseWords.withCommas)} words and "
        "${term.cyan(codeLines.withCommas)} lines of code "
        "(${totalWords.withCommas} total words) in $seconds seconds");
  }
}

List<int> _buildPage(Book book, Mustache mustache, Page page,
    {DateTime dependenciesModified}) {
  // See if the HTML is up to date.
  if (dependenciesModified != null &&
      _isUpToDate(page.htmlPath, page.markdownPath, dependenciesModified)) {
    return [0, 0, 0];
  }

  var proseCount = 0;
  var codeLineCount = 0;
  for (var line in page.lines) proseCount += line.wordCount;

  var wordCount = proseCount;
  for (var tag in page.codeTags) {
    var snippet = book.findSnippet(tag);
    if (snippet == null) {
      print("No snippet for $tag");
      continue;
    }

    codeLineCount += snippet.added.length;
    for (var line in snippet.added) wordCount += line.wordCount;
    for (var line in snippet.contextBefore) wordCount += line.wordCount;
    for (var line in snippet.contextAfter) wordCount += line.wordCount;
  }

  var body = renderMarkdown(book, page, page.lines, Format.web);
  var output = mustache.render(book, page, body);

  // Turn aside markers in code into spans. In the empty span case, insert a
  // zero-width space because Chrome seems to lose the span's position if it has
  // no content.
  // <span class="c">// [repl]</span>
  // TODO: Do this directly in the syntax highlighter instead of after the fact.
  output = output.replaceAllMapped(_asideHighlightedCommentPattern,
      (match) => '<span name="${match[1]}"> </span>');
  output = output.replaceAllMapped(_asideHighlightedWithCommentPattern,
      (match) => '<span class="c" name="${match[2]}">// ${match[1]}</span>');
  output = output.replaceAllMapped(
      _asideCommentPattern, (match) => '<span name="${match[1]}"> </span>');
  output = output.replaceAllMapped(_asideWithCommentPattern,
      (match) => '<span name="${match[2]}">// ${match[1]}</span>');

  // Write the output.
  File(page.htmlPath).writeAsStringSync(output);

  var words = "$wordCount words";
  if (codeLineCount > 0) words += ", $codeLineCount loc";
  words = term.gray("($words)");

  var number = "";
  if (page.numberString.isNotEmpty) {
    number = "${page.numberString}. ";
  }

  if (page.isChapter) {
    print("  ${term.green('✓')} $number${page.title} $words");
  } else {
    print("${term.green('✓')} $number${page.title} $words");
  }

  return [proseCount, codeLineCount, wordCount];
}

/// Process each SASS file.
void _buildSass({bool skipUpToDate = false}) {
  var moduleModified = _mostRecentlyModified(["asset/sass/*.scss"]);

  for (var source in Glob("asset/*.scss").listSync()) {
    var scssPath = p.normalize(source.path);
    var cssPath =
        p.join("site", p.basenameWithoutExtension(source.path) + ".css");

    if (skipUpToDate && _isUpToDate(cssPath, scssPath, moduleModified)) {
      continue;
    }

    var output =
        sass.compile(scssPath, color: true, style: sass.OutputStyle.expanded);
    File(cssPath).writeAsStringSync(output);
    print("${term.green('-')} $cssPath");
  }
}

Future<void> _runServer() async {
  Future<shelf.Response> handleRequest(shelf.Request request) async {
    var filePath = p.normalize(p.fromUri(request.url));
    if (filePath == ".") filePath = "index.html";
    var extension = p.extension(filePath).replaceAll(".", "");

    // Refresh files that are being requested.
    if (extension == "html") {
      _buildPages(skipUpToDate: true);
    } else if (extension == "css") {
      _buildSass(skipUpToDate: true);
    }

    try {
      var contents = await File(p.join("site", filePath)).readAsBytes();
      return shelf.Response.ok(contents, headers: {
        HttpHeaders.contentTypeHeader: mimeFromExtension(extension)
      });
    } on FileSystemException {
      print(
          "${term.red(request.method)} Not found: ${request.url} ($filePath)");
      return shelf.Response.notFound("Could not find '$filePath'.");
    }
  }

  var handler = const shelf.Pipeline().addHandler(handleRequest);

  var server = await io.serve(handler, "localhost", 8000);
  print("Serving at http://${server.address.host}:${server.port}");
}

/// Returns `true` if [outputPath] was generated after [inputPath] and more
/// recently than [dependenciesModified].
bool _isUpToDate(
    String outputPath, String inputPath, DateTime dependenciesModified) {
  var outputModified = File(outputPath).lastModifiedSync();
  var inputModified = File(inputPath).lastModifiedSync();
  return outputModified.isAfter(dependenciesModified) &&
      outputModified.isAfter(inputModified);
}

/// The most recently modified time of all files that match [globs].
DateTime _mostRecentlyModified(List<String> globs) {
  DateTime latest;
  for (var glob in globs) {
    for (var entry in Glob(glob).listSync()) {
      if (entry is File) {
        var modified = entry.lastModifiedSync();
        if (latest == null || modified.isAfter(latest)) latest = modified;
      }
    }
  }

  return latest;
}


================================================
FILE: tool/bin/build_xml.dart
================================================
import 'dart:io';

import 'package:path/path.dart' as p;

import 'package:tool/src/book.dart';
import 'package:tool/src/format.dart';
import 'package:tool/src/markdown/markdown.dart';
import 'package:tool/src/markdown/xml_renderer.dart';
import 'package:tool/src/mustache.dart';
import 'package:tool/src/page.dart';
import 'package:tool/src/term.dart' as term;

/// Generate the XML used to import into InDesign.

Future<void> main(List<String> arguments) async {
  var book = Book();
  var mustache = Mustache();

  await Directory(p.join("build", "xml")).create(recursive: true);

  for (var page in book.pages) {
    if (!page.isChapter) continue;

    if (arguments.isNotEmpty && page.fileName != arguments.first) continue;

    _buildPage(book, mustache, page);
  }

  // Output a minimal XML file that contains all tags used in the book.
  var allTagsPath = p.join("build", "xml", "all-tags.xml");
  File(allTagsPath)
      .writeAsStringSync("<chapter>\n${XmlRenderer.tagFileBuffer}\n</chapter>");
}

void _buildPage(Book book, Mustache mustache, Page page) {
  var xml = renderMarkdown(book, page, page.lines, Format.print);

  // Write the output.
  var xmlPath = p.join("build", "xml", "${page.fileName}.xml");
  File(xmlPath).writeAsStringSync(xml);

  print("${term.green('-')} ${page.numberString}. ${page.title}");
}


================================================
FILE: tool/bin/compile_snippets.dart
================================================
import 'dart:io';

import 'package:path/path.dart' as p;
import 'package:pool/pool.dart';

import 'package:tool/src/book.dart';
import 'package:tool/src/code_tag.dart';
import 'package:tool/src/page.dart';
import 'package:tool/src/split_chapter.dart';
import 'package:tool/src/term.dart' as term;

/// Tests that various snippets in the middle of chapters can be compiled without
/// error. Ensures that, as much as possible, we have a working program at
/// multiple points throughout the chapter.

// TODO: Do this for Java chapters.

var _chapterTags = <String, List<String>>{
  "Chunks of Bytecode": [
    "free-array",
    "main-include-chunk",
    "simple-instruction",
    "add-constant",
    "return-after-operand",
  ],
  "A Virtual Machine": [
    "main-include-vm",
    "vm-include-debug",
    "print-return",
    "main-negate",
  ],
  "Scanning on Demand": [
    "init-scanner",
    "error-token",
    "advance",
    "match",
    "newline",
    "peek-next",
    "string",
    "number",
    "identifier-type",
    "check-keyword",
  ],
  "Compiling Expressions": [
    "expression",
    "forward-declarations",
    "precedence-body",
    "infix",
    "define-debug-print-code",
    "dump-chunk"
  ],
  "Types of Values": [
    "op-arithmetic",
    "print-value",
    "disassemble-not",
    "values-equal",
  ],
  "Strings": [
    // "as-string",
    // We could get things working earlier by moving the "Operations on Strings"
    // section before "Strings".
    "value-include-object",
    "vm-include-object-memory",
  ],
  "Hash Tables": [
    "free-table",
    "hash-string",
    "table-add-all",
    "table-get",
    "table-delete",
    "resize-increment-count",
  ],
  "Global Variables": [
    "disassemble-print",
    "disassemble-pop",
    "synchronize",
    "define-global-op",
    "disassemble-define-global",
    "disassemble-get-global",
    "disassemble-set-global",
  ],
  "Local Variables": [
    "local-struct",
    "compiler",
    "end-scope",
    "add-local",
    "too-many-locals",
    "pop-locals",
    "interpret-set-local",
  ],
  "Jumping Back and Forth": [
    "jump-if-false-op",
    "compile-else",
    "jump-op",
    "pop-end",
    "jump-instruction",
    "and",
    "or",
    "while-statement",
    "loop-op",
    "disassemble-loop",
    "for-statement",
  ],
  "Calls and Functions": [
    "as-function",
    "function-type-enum",
    "init-compiler",
    "init-function-slot",
    "return-function",
    "disassemble-end",
    "runtime-error-temp",
    "compile-function",
    "init-function-name",
    "call",
    "interpret",
    "disassemble-call",
    "return-statement",
    "runtime-error-stack",
    "return-from-script",
    "print-native",
    "define-native",
    "vm-include-time"
  ],
  "Closures": [
    "obj-closure",
    "new-closure-h",
    "print-closure",
    "closure-op",
    "disassemble-closure",
    "interpret-closure",
    "runtime-error-function",
    "interpret",
    "upvalue-struct",
    "resolve-upvalue-recurse",
    "capture-upvalues",
    "debug-include-object",
    "obj-upvalue",
    "new-upvalue-h",
    "print-upvalue",
    "upvalue-fields",
    "allocate-upvalue-array",
    "init-upvalue-fields",
    "free-upvalues",
    "capture-upvalue",
    "interpret-get-upvalue",
    "interpret-set-upvalue",
    "is-captured-field",
    "init-is-captured",
    "init-zero-local-is-captured",
    "mark-local-captured",
    "close-upvalue-op",
    "disassemble-close-upvalue",
    "next-field",
    "init-next",
    "open-upvalues-field",
    "init-open-upvalues",
    "look-for-existing-upvalue",
    "insert-upvalue-in-list",
    "closed-field",
    "init-closed",
    "return-close-upvalues",
  ],
  "Garbage Collection": [
    "collect-garbage-h",
    "collect-garbage",
    "define-stress-gc",
    "call-collect",
    "define-log-gc",
    "debug-log-includes",
    "log-before-collect",
    "log-after-collect",
    "debug-log-allocate",
    "log-free-object",
    "mark-value-h",
    "mark-object-h",
    "is-marked-field",
    "init-is-marked",
    "log-mark-object",
    "mark-table-h",
    "mark-table",
    "mark-closures",
    "mark-open-upvalues",
    "memory-include-compiler",
    "compiler-include-memory",
    "vm-gray-stack",
    "init-gray-stack",
    "free-gray-stack",
    "blacken-closure",
    "log-blacken-object",
    "check-is-marked",
    "sweep",
    "unmark",
    "table-remove-white-h",
    "table-remove-white",
    "vm-fields",
    "init-gc-fields",
    "updated-bytes-allocated",
    "collect-on-next",
    "heap-grow-factor",
    "log-before-size",
    "log-collected-amount",
    "chunk-include-vm",
    "push-string",
    "pop-string",
    "concatenate-peek",
    "concatenate-pop",
  ],
  "Classes and Instances": [
    "obj-class",
    "print-class",
    "class-op",
    "disassemble-class",
    "interpret-class",
    "object-include-table",
    "print-instance",
    "call-class",
    "property-ops",
    "disassemble-property-ops",
    "interpret-get-property",
    "get-undefined",
    "get-not-instance",
    "interpret-set-property",
    "set-not-instance",
  ],
  "Methods and Initializers": [
    "class-methods",
    "init-methods",
    "free-methods",
    "mark-methods",
    "method-op",
    "disassemble-method",
    "define-method",
    "obj-bound-method",
    "print-bound-method",
    "bind-method",
    "call-bound-method",
    "this",
    "slot-zero",
    "method-type-enum",
    "method-type",
    "store-receiver",
    "class-compiler-struct",
    "create-class-compiler",
    "pop-enclosing",
    "this-outside-class",
    "vm-init-string",
    "init-init-string",
    "mark-init-string",
    "null-init-string",
    "clear-init-string",
    "initializer-type-enum",
    "return-this",
    "return-from-init",
    "invoke-op",
    "invoke-instruction",
    "invoke-from-class",
    "invoke-field",
  ],
  "Superclasses": [
    "inherit-op",
    "disassemble-inherit",
    "interpret-inherit",
    "inherit-non-class",
    "synthetic-token",
    "has-superclass",
    "init-has-superclass",
    "set-has-superclass",
    "get-super-op",
    "disassemble-get-super",
    "interpret-get-super",
    "super-invoke-op",
    "disassemble-super-invoke",
    "interpret-super-invoke",
  ],
  "Optimization": [
    "initial-index",
    "next-index",
    "adjust-alloc",
    "adjust-init",
    "re-hash",
    "adjust-free",
    "table-set-grow",
    "init-capacity-mask",
    "add-all-loop",
    "find-string-index",
    "find-string-next",
    "mark-table",
    "remove-white",
    "free-table",
    "define-nan-boxing",
    "end-values-equal",
  ],
};

var _allPassed = true;

Future<void> main(List<String> arguments) async {
  var watch = Stopwatch()..start();
  var book = Book();
  var pool = Pool(Platform.numberOfProcessors);
  var futures = <Future<void>>[];

  for (var chapterName in _chapterTags.keys) {
    var chapter = book.findChapter(chapterName);

    var tags = chapter.codeTags;
    var tagNames = _chapterTags[chapterName];
    if (tagNames.isNotEmpty) {
      tags = tagNames.map((name) => book.findTag(chapter, name));
    } else {
      print("Warning, no in-chapter snippets for '$chapterName'");
    }

    for (var tag in tags) {
      futures
          .add(pool.withResource(() => _compileChapterTag(book, chapter, tag)));
    }
  }

  await Future.wait(futures);

  print("Done in ${watch.elapsedMilliseconds / 1000} seconds");
  if (!_allPassed) exit(1);
}

Future<void> _compileChapterTag(Book book, Page chapter, CodeTag tag) async {
  await splitChapter(book, chapter, tag);

  var buildName = "${chapter.shortName}-${tag.directory}";
  var sourceDir = p.join("gen", "snippets", chapter.shortName, tag.directory);

  var makeArguments = [
    "-f",
    "util/c.make",
    "NAME=$buildName",
    "MODE=release",
    "SOURCE_DIR=$sourceDir",
    "SNIPPET=true"
  ];

  var result = await Process.run("make", makeArguments);
  if (result.exitCode == 0) {
    print("${term.green('PASS')} ${chapter.title} / ${tag.name}");
  } else {
    print("${term.red('FAIL')} ${chapter.title} / ${tag.name}");
    print(result.stdout);
    print(result.stderr);
    print("");
    _allPassed = false;
  }
}


================================================
FILE: tool/bin/split_chapters.dart
================================================
import 'package:tool/src/book.dart';
import 'package:tool/src/split_chapter.dart';

void main(List<String> arguments) {
  var book = Book();
  for (var page in book.pages) {
    if (page.language == null) continue;
    splitChapter(book, page);
  }
}


================================================
FILE: tool/bin/test.dart
================================================
import 'dart:convert';
import 'dart:io';

import 'package:args/args.dart';
import 'package:glob/glob.dart';
import 'package:path/path.dart' as p;

import 'package:tool/src/term.dart' as term;

/// Runs the tests.

final _expectedOutputPattern = RegExp(r"// expect: ?(.*)");
final _expectedErrorPattern = RegExp(r"// (Error.*)");
final _errorLinePattern = RegExp(r"// \[((java|c) )?line (\d+)\] (Error.*)");
final _expectedRuntimeErrorPattern = RegExp(r"// expect runtime error: (.+)");
final _syntaxErrorPattern = RegExp(r"\[.*line (\d+)\] (Error.+)");
final _stackTracePattern = RegExp(r"\[line (\d+)\]");
final _nonTestPattern = RegExp(r"// nontest");

var _passed = 0;
var _failed = 0;
var _skipped = 0;
var _expectations = 0;

Suite _suite;
String _filterPath;
String _customInterpreter;
List<String> _customArguments;

final _allSuites = <String, Suite>{};
final _cSuites = <String>[];
final _javaSuites = <String>[];

class Suite {
  final String name;
  final String language;
  final String executable;
  final List<String> args;
  final Map<String, String> tests;

  Suite(this.name, this.language, this.executable, this.args, this.tests);
}

void main(List<String> arguments) {
  _defineTestSuites();

  var parser = ArgParser();

  parser.addOption("interpreter", abbr: "i", help: "Path to interpreter.");
  parser.addMultiOption("arguments",
      abbr: "a", help: "Additional interpreter arguments.");

  var options = parser.parse(arguments);

  if (options.rest.isEmpty) {
    _usageError(parser, "Missing suite name.");
  } else if (options.rest.length > 2) {
    _usageError(
        parser, "Unexpected arguments '${options.rest.skip(2).join(' ')}'.");
  }

  var suite = options.rest[0];
  if (options.rest.length == 2) _filterPath = arguments[1];

  if (options.wasParsed("interpreter")) {
    _customInterpreter = options["interpreter"] as String;
  }

  if (options.wasParsed("arguments")) {
    _customArguments = options["arguments"] as List<String>;

    if (_customInterpreter == null) {
      _usageError(parser,
          "Must pass an interpreter path if providing custom arguments.");
    }
  }

  if (suite == "all") {
    _runSuites(_allSuites.keys.toList());
  } else if (suite == "c") {
    _runSuites(_cSuites);
  } else if (suite == "java") {
    _runSuites(_javaSuites);
  } else if (!_allSuites.containsKey(suite)) {
    print("Unknown interpreter '$suite'");
    exit(1);
  } else if (!_runSuite(suite)) {
    exit(1);
  }
}

void _usageError(ArgParser parser, String message) {
  print(message);
  print("");
  print("Usage: test.dart <suites> [filter] [custom interpreter...]");
  print("");
  print("Optional custom interpreter options:");
  print(parser.usage);
  exit(1);
}

void _runSuites(List<String> names) {
  var anyFailed = false;
  for (var name in names) {
    print("=== $name ===");
    if (!_runSuite(name)) anyFailed = true;
  }

  if (anyFailed) exit(1);
}

bool _runSuite(String name) {
  _suite = _allSuites[name];

  _passed = 0;
  _failed = 0;
  _skipped = 0;
  _expectations = 0;

  for (var file in Glob("test/**.lox").listSync()) {
    _runTest(file.path);
  }

  term.clearLine();

  if (_failed == 0) {
    print("All ${term.green(_passed)} tests passed "
        "($_expectations expectations).");
  } else {
    print("${term.green(_passed)} tests passed. "
        "${term.red(_failed)} tests failed.");
  }

  return _failed == 0;
}

void _runTest(String path) {
  if (path.contains("benchmark")) return;

  // Make a nice short path relative to the working directory. Normalize it to
  // use "/" since the interpreters expect the argument to use that.
  path = p.posix.normalize(path);

  // Check if we are just running a subset of the tests.
  if (_filterPath != null) {
    var thisTest = p.posix.relative(path, from: "test");
    if (!thisTest.startsWith(_filterPath)) return;
  }

  // Update the status line.
  var grayPath = term.gray("($path)");
  term.writeLine("Passed: ${term.green(_passed)} "
      "Failed: ${term.red(_failed)} "
      "Skipped: ${term.yellow(_skipped)} $grayPath");

  // Read the test and parse out the expectations.
  var test = Test(path);

  // See if it's a skipped or non-test file.
  if (!test.parse()) return;

  var failures = test.run();

  // Display the results.
  if (failures.isEmpty) {
    _passed++;
  } else {
    _failed++;
    term.writeLine("${term.red("FAIL")} $path");
    print("");
    for (var failure in failures) {
      print("     ${term.pink(failure)}");
    }
    print("");
  }
}

class ExpectedOutput {
  final int line;
  final String output;

  ExpectedOutput(this.line, this.output);
}

class Test {
  final String _path;

  final _expectedOutput = <ExpectedOutput>[];

  /// The set of expected compile error messages.
  final _expectedErrors = <String>{};

  /// The expected runtime error message or `null` if there should not be one.
  String _expectedRuntimeError;

  /// If there is an expected runtime error, the line it should occur on.
  int _runtimeErrorLine = 0;

  int _expectedExitCode = 0;

  /// The list of failure message lines.
  final _failures = <String>[];

  Test(this._path);

  bool parse() {
    // Get the path components.
    var parts = _path.split("/");
    var subpath = "";
    String state;

    // Figure out the state of the test. We don't break out of this loop because
    // we want lines for more specific paths to override more general ones.
    for (var part in parts) {
      if (subpath.isNotEmpty) subpath += "/";
      subpath += part;

      if (_suite.tests.containsKey(subpath)) {
        state = _suite.tests[subpath];
      }
    }

    if (state == null) {
      throw "Unknown test state for '$_path'.";
    } else if (state == "skip") {
      _skipped++;
      return false;
    }

    var lines = File(_path).readAsLinesSync();
    for (var lineNum = 1; lineNum <= lines.length; lineNum++) {
      var line = lines[lineNum - 1];

      // Not a test file at all, so ignore it.
      var match = _nonTestPattern.firstMatch(line);
      if (match != null) return false;

      match = _expectedOutputPattern.firstMatch(line);
      if (match != null) {
        _expectedOutput.add(ExpectedOutput(lineNum, match[1]));
        _expectations++;
        continue;
      }

      match = _expectedErrorPattern.firstMatch(line);
      if (match != null) {
        _expectedErrors.add("[$lineNum] ${match[1]}");

        // If we expect a compile error, it should exit with EX_DATAERR.
        _expectedExitCode = 65;
        _expectations++;
        continue;
      }

      match = _errorLinePattern.firstMatch(line);
      if (match != null) {
        // The two interpreters are slightly different in terms of which
        // cascaded errors may appear after an initial compile error because
        // their panic mode recovery is a little different. To handle that,
        // the tests can indicate if an error line should only appear for a
        // certain interpreter.
        var language = match[2];
        if (language == null || language == _suite.language) {
          _expectedErrors.add("[${match[3]}] ${match[4]}");

          // If we expect a compile error, it should exit with EX_DATAERR.
          _expectedExitCode = 65;
          _expectations++;
        }
        continue;
      }

      match = _expectedRuntimeErrorPattern.firstMatch(line);
      if (match != null) {
        _runtimeErrorLine = lineNum;
        _expectedRuntimeError = match[1];
        // If we expect a runtime error, it should exit with EX_SOFTWARE.
        _expectedExitCode = 70;
        _expectations++;
      }
    }

    if (_expectedErrors.isNotEmpty && _expectedRuntimeError != null) {
      print("${term.magenta('TEST ERROR')} $_path");
      print("     Cannot expect both compile and runtime errors.");
      print("");
      return false;
    }

    // If we got here, it's a valid test.
    return true;
  }

  /// Invoke the interpreter and run the test.
  List<String> run() {
    var args = [
      if (_customInterpreter != null) ...?_customArguments else ..._suite.args,
      _path
    ];
    var result = Process.runSync(_customInterpreter ?? _suite.executable, args);

    // Normalize Windows line endings.
    var outputLines = const LineSplitter().convert(result.stdout as String);
    var errorLines = const LineSplitter().convert(result.stderr as String);

    // Validate that an expected runtime error occurred.
    if (_expectedRuntimeError != null) {
      _validateRuntimeError(errorLines);
    } else {
      _validateCompileErrors(errorLines);
    }

    _validateExitCode(result.exitCode, errorLines);
    _validateOutput(outputLines);
    return _failures;
  }

  void _validateRuntimeError(List<String> errorLines) {
    if (errorLines.length < 2) {
      fail("Expected runtime error '$_expectedRuntimeError' and got none.");
      return;
    }

    if (errorLines[0] != _expectedRuntimeError) {
      fail("Expected runtime error '$_expectedRuntimeError' and got:");
      fail(errorLines[0]);
    }

    // Make sure the stack trace has the right line.
    RegExpMatch match;
    var stackLines = errorLines.sublist(1);
    for (var line in stackLines) {
      match = _stackTracePattern.firstMatch(line);
      if (match != null) break;
    }

    if (match == null) {
      fail("Expected stack trace and got:", stackLines);
    } else {
      var stackLine = int.parse(match[1]);
      if (stackLine != _runtimeErrorLine) {
        fail("Expected runtime error on line $_runtimeErrorLine "
            "but was on line $stackLine.");
      }
    }
  }

  void _validateCompileErrors(List<String> error_lines) {
    // Validate that every compile error was expected.
    var foundErrors = <String>{};
    var unexpectedCount = 0;
    for (var line in error_lines) {
      var match = _syntaxErrorPattern.firstMatch(line);
      if (match != null) {
        var error = "[${match[1]}] ${match[2]}";
        if (_expectedErrors.contains(error)) {
          foundErrors.add(error);
        } else {
          if (unexpectedCount < 10) {
            fail("Unexpected error:");
            fail(line);
          }
          unexpectedCount++;
        }
      } else if (line != "") {
        if (unexpectedCount < 10) {
          fail("Unexpected output on stderr:");
          fail(line);
        }
        unexpectedCount++;
      }
    }

    if (unexpectedCount > 10) {
      fail("(truncated ${unexpectedCount - 10} more...)");
    }

    // Validate that every expected error occurred.
    for (var error in _expectedErrors.difference(foundErrors)) {
      fail("Missing expected error: $error");
    }
  }

  void _validateExitCode(int exitCode, List<String> errorLines) {
    if (exitCode == _expectedExitCode) return;

    if (errorLines.length > 10) {
      errorLines = errorLines.sublist(0, 10);
      errorLines.add("(truncated...)");
    }

    fail("Expected return code $_expectedExitCode and got $exitCode. Stderr:",
        errorLines);
  }

  void _validateOutput(List<String> outputLines) {
    // Remove the trailing last empty line.
    if (outputLines.isNotEmpty && outputLines.last == "") {
      outputLines.removeLast();
    }

    var index = 0;
    for (; index < outputLines.length; index++) {
      var line = outputLines[index];
      if (index >= _expectedOutput.length) {
        fail("Got output '$line' when none was expected.");
        continue;
      }

      var expected = _expectedOutput[index];
      if (expected.output != line) {
        fail("Expected output '${expected.output}' on line ${expected.line} "
            " and got '$line'.");
      }
    }

    while (index < _expectedOutput.length) {
      var expected = _expectedOutput[index];
      fail("Missing expected output '${expected.output}' on line "
          "${expected.line}.");
      index++;
    }
  }

  void fail(String message, [List<String> lines]) {
    _failures.add(message);
    if (lines != null) _failures.addAll(lines);
  }
}

void _defineTestSuites() {
  void c(String name, Map<String, String> tests) {
    var executable = name == "clox" ? "build/cloxd" : "build/$name";
    _allSuites[name] = Suite(name, "c", executable, [], tests);
    _cSuites.add(name);
  }

  void java(String name, Map<String, String> tests) {
    var dir = name == "jlox" ? "build/java" : "build/gen/$name";
    _allSuites[name] = Suite(name, "java", "java",
        ["-cp", dir, "com.craftinginterpreters.lox.Lox"], tests);
    _javaSuites.add(name);
  }

  // These are just for earlier chapters.
  var earlyChapters = {
    "test/scanning": "skip",
    "test/expressions": "skip",
  };

  // JVM doesn't correctly implement IEEE equality on boxed doubles.
  var javaNaNEquality = {
    "test/number/nan_equality.lox": "skip",
  };

  // No hardcoded limits in jlox.
  var noJavaLimits = {
    "test/limit/loop_too_large.lox": "skip",
    "test/limit/no_reuse_constants.lox": "skip",
    "test/limit/too_many_constants.lox": "skip",
    "test/limit/too_many_locals.lox": "skip",
    "test/limit/too_many_upvalues.lox": "skip",

    // Rely on JVM for stack overflow checking.
    "test/limit/stack_overflow.lox": "skip",
  };

  // No classes in Java yet.
  var noJavaClasses = {
    "test/assignment/to_this.lox": "skip",
    "test/call/object.lox": "skip",
    "test/class": "skip",
    "test/closure/close_over_method_parameter.lox": "skip",
    "test/constructor": "skip",
    "test/field": "skip",
    "test/inheritance": "skip",
    "test/method": "skip",
    "test/number/decimal_point_at_eof.lox": "skip",
    "test/number/trailing_dot.lox": "skip",
    "test/operator/equals_class.lox": "skip",
    "test/operator/equals_method.lox": "skip",
    "test/operator/not_class.lox": "skip",
    "test/regression/394.lox": "skip",
    "test/super": "skip",
    "test/this": "skip",
    "test/return/in_method.lox": "skip",
    "test/variable/local_from_method.lox": "skip",
  };

  // No functions in Java yet.
  var noJavaFunctions = {
    "test/call": "skip",
    "test/closure": "skip",
    "test/for/closure_in_body.lox": "skip",
    "test/for/return_closure.lox": "skip",
    "test/for/return_inside.lox": "skip",
    "test/for/syntax.lox": "skip",
    "test/function": "skip",
    "test/operator/not.lox": "skip",
    "test/regression/40.lox": "skip",
    "test/return": "skip",
    "test/unexpected_character.lox": "skip",
    "test/while/closure_in_body.lox": "skip",
    "test/while/return_closure.lox": "skip",
    "test/while/return_inside.lox": "skip",
  };

  // No resolution in Java yet.
  var noJavaResolution = {
    "test/closure/assign_to_shadowed_later.lox": "skip",
    "test/function/local_mutual_recursion.lox": "skip",
    "test/variable/collide_with_parameter.lox": "skip",
    "test/variable/duplicate_local.lox": "skip",
    "test/variable/duplicate_parameter.lox": "skip",
    "test/variable/early_bound.lox": "skip",

    // Broken because we haven"t fixed it yet by detecting the error.
    "test/return/at_top_level.lox": "skip",
    "test/variable/use_local_in_initializer.lox": "skip",
  };

  // No control flow in C yet.
  var noCControlFlow = {
    "test/block/empty.lox": "skip",
    "test/for": "skip",
    "test/if": "skip",
    "test/limit/loop_too_large.lox": "skip",
    "test/logical_operator": "skip",
    "test/variable/unreached_undefined.lox": "skip",
    "test/while": "skip",
  };

  // No functions in C yet.
  var noCFunctions = {
    "test/call": "skip",
    "test/closure": "skip",
    "test/for/closure_in_body.lox": "skip",
    "test/for/return_closure.lox": "skip",
    "test/for/return_inside.lox": "skip",
    "test/for/syntax.lox": "skip",
    "test/function": "skip",
    "test/limit/no_reuse_constants.lox": "skip",
    "test/limit/stack_overflow.lox": "skip",
    "test/limit/too_many_constants.lox": "skip",
    "test/limit/too_many_locals.lox": "skip",
    "test/limit/too_many_upvalues.lox": "skip",
    "test/regression/40.lox": "skip",
    "test/return": "skip",
    "test/unexpected_character.lox": "skip",
    "test/variable/collide_with_parameter.lox": "skip",
    "test/variable/duplicate_parameter.lox": "skip",
    "test/variable/early_bound.lox": "skip",
    "test/while/closure_in_body.lox": "skip",
    "test/while/return_closure.lox": "skip",
    "test/while/return_inside.lox": "skip",
  };

  // No classes in C yet.
  var noCClasses = {
    "test/assignment/to_this.lox": "skip",
    "test/call/object.lox": "skip",
    "test/class": "skip",
    "test/closure/close_over_method_parameter.lox": "skip",
    "test/constructor": "skip",
    "test/field": "skip",
    "test/inheritance": "skip",
    "test/method": "skip",
    "test/number/decimal_point_at_eof.lox": "skip",
    "test/number/trailing_dot.lox": "skip",
    "test/operator/equals_class.lox": "skip",
    "test/operator/equals_method.lox": "skip",
    "test/operator/not.lox": "skip",
    "test/operator/not_class.lox": "skip",
    "test/regression/394.lox": "skip",
    "test/return/in_method.lox": "skip",
    "test/super": "skip",
    "test/this": "skip",
    "test/variable/local_from_method.lox": "skip",
  };

  // No inheritance in C yet.
  var noCInheritance = {
    "test/class/local_inherit_other.lox": "skip",
    "test/class/local_inherit_self.lox": "skip",
    "test/class/inherit_self.lox": "skip",
    "test/class/inherited_method.lox": "skip",
    "test/inheritance": "skip",
    "test/regression/394.lox": "skip",
    "test/super": "skip",
  };

  java("jlox", {
    "test": "pass",
    ...earlyChapters,
    ...javaNaNEquality,
    ...noJavaLimits,
  });

  java("chap04_scanning", {
    // No interpreter yet.
    "test": "skip",
    "test/scanning": "pass"
  });

  // No test for chapter 5. It just has a hardcoded main() in AstPrinter.

  java("chap06_parsing", {
    // No real interpreter yet.
    "test": "skip",
    "test/expressions/parse.lox": "pass"
  });

  java("chap07_evaluating", {
    // No real interpreter yet.
    "test": "skip",
    "test/expressions/evaluate.lox": "pass"
  });

  java("chap08_statements", {
    "test": "pass",
    ...earlyChapters,
    ...javaNaNEquality,
    ...noJavaLimits,
    ...noJavaFunctions,
    ...noJavaResolution,
    ...noJavaClasses,

    // No control flow.
    "test/block/empty.lox": "skip",
    "test/for": "skip",
    "test/if": "skip",
    "test/logical_operator": "skip",
    "test/while": "skip",
    "test/variable/unreached_undefined.lox": "skip",
  });

  java("chap09_control", {
    "test": "pass",
    ...earlyChapters,
    ...javaNaNEquality,
    ...noJavaLimits,
    ...noJavaFunctions,
    ...noJavaResolution,
    ...noJavaClasses,
  });

  java("chap10_functions", {
    "test": "pass",
    ...earlyChapters,
    ...javaNaNEquality,
    ...noJavaLimits,
    ...noJavaResolution,
    ...noJavaClasses,
  });

  java("chap11_resolving", {
    "test": "pass",
    ...earlyChapters,
    ...javaNaNEquality,
    ...noJavaLimits,
    ...noJavaClasses,
  });

  java("chap12_classes", {
    "test": "pass",
    ...earlyChapters,
    ...noJavaLimits,
    ...javaNaNEquality,

    // No inheritance.
    "test/class/local_inherit_other.lox": "skip",
    "test/class/local_inherit_self.lox": "skip",
    "test/class/inherit_self.lox": "skip",
    "test/class/inherited_method.lox": "skip",
    "test/inheritance": "skip",
    "test/regression/394.lox": "skip",
    "test/super": "skip",
  });

  java("chap13_inheritance", {
    "test": "pass",
    ...earlyChapters,
    ...javaNaNEquality,
    ...noJavaLimits,
  });

  c("clox", {
    "test": "pass",
    ...earlyChapters,
  });

  c("chap17_compiling", {
    // No real interpreter yet.
    "test": "skip",
    "test/expressions/evaluate.lox": "pass",
  });

  c("chap18_types", {
    // No real interpreter yet.
    "test": "skip",
    "test/expressions/evaluate.lox": "pass",
  });

  c("chap19_strings", {
    // No real interpreter yet.
    "test": "skip",
    "test/expressions/evaluate.lox": "pass",
  });

  c("chap20_hash", {
    // No real interpreter yet.
    "test": "skip",
    "test/expressions/evaluate.lox": "pass",
  });

  c("chap21_global", {
    "test": "pass",
    ...earlyChapters,
    ...noCControlFlow,
    ...noCFunctions,
    ...noCClasses,

    // No blocks.
    "test/assignment/local.lox": "skip",
    "test/variable/in_middle_of_block.lox": "skip",
    "test/variable/in_nested_block.lox": "skip",
    "test/variable/scope_reuse_in_different_blocks.lox": "skip",
    "test/variable/shadow_and_local.lox": "skip",
    "test/variable/undefined_local.lox": "skip",

    // No local variables.
    "test/block/scope.lox": "skip",
    "test/variable/duplicate_local.lox": "skip",
    "test/variable/shadow_global.lox": "skip",
    "test/variable/shadow_local.lox": "skip",
    "test/variable/use_local_in_initializer.lox": "skip",
  });

  c("chap22_local", {
    "test": "pass",
    ...earlyChapters,
    ...noCControlFlow,
    ...noCFunctions,
    ...noCClasses,
  });

  c("chap23_jumping", {
    "test": "pass",
    ...earlyChapters,
    ...noCFunctions,
    ...noCClasses,
  });

  c("chap24_calls", {
    "test": "pass",
    ...earlyChapters,
    ...noCClasses,

    // No closures.
    "test/closure": "skip",
    "test/for/closure_in_body.lox": "skip",
    "test/for/return_closure.lox": "skip",
    "test/function/local_recursion.lox": "skip",
    "test/limit/too_many_upvalues.lox": "skip",
    "test/regression/40.lox": "skip",
    "test/while/closure_in_body.lox": "skip",
    "test/while/return_closure.lox": "skip",
  });

  c("chap25_closures", {
    "test": "pass",
    ...earlyChapters,
    ...noCClasses,
  });

  c("chap26_garbage", {
    "test": "pass",
    ...earlyChapters,
    ...noCClasses,
  });

  c("chap27_classes", {
    "test": "pass",
    ...earlyChapters,
    ...noCInheritance,

    // No methods.
    "test/assignment/to_this.lox": "skip",
    "test/class/local_reference_self.lox": "skip",
    "test/class/reference_self.lox": "skip",
    "test/closure/close_over_method_parameter.lox": "skip",
    "test/constructor": "skip",
    "test/field/get_and_set_method.lox": "skip",
    "test/field/method.lox": "skip",
    "test/field/method_binds_this.lox": "skip",
    "test/method": "skip",
    "test/operator/equals_class.lox": "skip",
    "test/operator/equals_method.lox": "skip",
    "test/return/in_method.lox": "skip",
    "test/this": "skip",
    "test/variable/local_from_method.lox": "skip",
  });

  c("chap28_methods", {
    "test": "pass",
    ...earlyChapters,
    ...noCInheritance,
  });

  c("chap29_superclasses", {
    "test": "pass",
    ...earlyChapters,
  });

  c("chap30_optimization", {
    "test": "pass",
    ...earlyChapters,
  });
}


================================================
FILE: tool/bin/tile_pages.dart
================================================
import 'dart:io';

import 'package:image/image.dart';
import 'package:path/path.dart' as p;

/// Convert a PDF to a tiled PNG image of all of the pages.
///
/// Requires `pdftoppm` which can be installed on Mac with:
///
///     brew install poppler
Future<void> main(List<String> arguments) async {
  print('Exporting PDF pages to PNG...');
  var tempDir = await Directory('.').createTemp('pages');

  // The `-r` argument is DPI.
  var result = await Process.run('pdftoppm',
      ['-png', '-r', '40', arguments[0], p.join(tempDir.path, 'page')]);
  if (result.exitCode != 0) {
    print('Could not export pages:\n${result.stdout}\n${result.stderr}');
  }

  var pages = <Image>[];
  var imageFiles = tempDir
      .listSync()
      .whereType<File>()
      .where((entry) => entry.path.endsWith('.png'))
      .toList();
  imageFiles.sort((a, b) => a.path.compareTo(b.path));

  for (var imageFile in imageFiles) {
    print('Reading ${imageFile.path}...');
    var bytes = await imageFile.readAsBytes();
    pages.add(decodePng(bytes));
  }

  const columns = 36;
  const rows = 18;
  const border = 4;

  var pageWidth = pages.first.width;
  var pageHeight = pages.first.height;

  var tiled = Image.rgb((pageWidth + border) * columns + border,
      (pageHeight + border) * rows + border);
  tiled.fill(Color.fromRgb(0, 0, 0));

  for (var i = 0; i < pages.length; i++) {
    var x = i % columns;
    var y = i ~/ columns;
    print('Tiling page ${i + 1} ($x, $y)...');
    copyInto(tiled, pages[i],
        dstX: x * (pageWidth + border) + border,
        dstY: y * (pageHeight + border) + border);
  }

  print('Writing pages.png...');
  await File('pages.png').writeAsBytes(encodePng(tiled));

  await tempDir.delete(recursive: true);
}


================================================
FILE: tool/lib/src/book.dart
================================================
import 'code_tag.dart';
import 'location.dart';
import 'page.dart';
import 'snippet.dart';
import 'source_file_parser.dart';
import 'text.dart';

import 'package:glob/glob.dart';
import 'package:path/path.dart' as p;

const _tableOfContents = {
  '': [
    'Crafting Interpreters',
    'Dedication',
    'Acknowledgements',
    'Table of Contents',
  ],
  'Welcome': [
    'Introduction',
    'A Map of the Territory',
    'The Lox Language',
  ],
  'A Tree-Walk Interpreter': [
    'Scanning',
    'Representing Code',
    'Parsing Expressions',
    'Evaluating Expressions',
    'Statements and State',
    'Control Flow',
    'Functions',
    'Resolving and Binding',
    'Classes',
    'Inheritance',
  ],
  'A Bytecode Virtual Machine': [
    'Chunks of Bytecode',
    'A Virtual Machine',
    'Scanning on Demand',
    'Compiling Expressions',
    'Types of Values',
    'Strings',
    'Hash Tables',
    'Global Variables',
    'Local Variables',
    'Jumping Back and Forth',
    'Calls and Functions',
    'Closures',
    'Garbage Collection',
    'Classes and Instances',
    'Methods and Initializers',
    'Superclasses',
    'Optimization',
  ],
  'Backmatter': [
    'Appendix I',
    'Appendix II',
  ],
};

/// The contents of the Markdown and source files for the book, loaded and
/// parsed.
class Book {
  final List<Page> parts = [];
  final List<Page> frontmatter = [];
  final List<Page> pages = [];

  final Map<CodeTag, Snippet> _snippets = {};

  Book() {
    var partIndex = 1;
    var chapterIndex = 1;
    var inMatter = false;

    // Load the pages.
    for (var part in _tableOfContents.keys) {
      // Front- and backmatter have no names, pages, or numbers.
      var partNumber = "";
      inMatter = part == "" || part == "Backmatter";
      if (!inMatter) {
        partNumber = partIndex.roman;
        partIndex += 1;
      }

      // There is no part page for the frontmatter.
      Page partPage;
      if (part != "") {
        partPage = Page(part, null, partNumber, pages.length);
        pages.add(partPage);
        parts.add(partPage);
      }

      for (var chapter in _tableOfContents[part]) {
        var chapterNumber = "";
        if (inMatter) {
          // Front- and backmatter chapters are specially numbered.
          if (chapter == "Appendix I") {
            chapterNumber = "A1";
          } else if (chapter == "Appendix II") {
            chapterNumber = "A2";
          }
        } else {
          chapterNumber = chapterIndex.toString();
          chapterIndex++;
        }

        var page = Page(chapter, partPage, chapterNumber, pages.length);
        pages.add(page);
        if (partPage != null) {
          partPage.chapters.add(page);
        } else {
          frontmatter.add(page);
        }
      }
    }

    // Load the source files.
    for (var language in ["java", "c"]) {
      for (var file in Glob("$language/**.{c,h,java}").listSync()) {
        var shortPath = p.relative(file.path, from: language);
        var sourceFile = SourceFileParser(this, file.path, shortPath).parse();

        // Create snippets from the lines in the file.
        var lineIndex = 0;
        for (var line in sourceFile.lines) {
          var snippet = _snippets.putIfAbsent(
              line.start, () => Snippet(sourceFile, line.start));
          snippet.addLine(lineIndex, line);

          if (line.end != null) {
            var endSnippet = _snippets.putIfAbsent(
                line.end, () => Snippet(sourceFile, line.end));
            endSnippet.removeLine(lineIndex, line);
          }

          lineIndex++;
        }
      }
    }

    for (var snippet in _snippets.values) {
      if (snippet.tag.name == "not-yet") continue;
      if (snippet.tag.name == "omit") continue;
      snippet.calculateContext();
    }
  }

  /// Looks for a page with [title].
  Page findChapter(String title) =>
      pages.firstWhere((page) => page.title == title);

  /// Looks for a page with [number];
  Page findNumber(String number) =>
      pages.firstWhere((page) => page.numberString == number);

  /// Gets the [Page] [offset] pages before or after this one.
  Page adjacentPage(Page start, int offset) {
    var index = pages.indexOf(start) + offset;
    if (index < 0 || index >= pages.length) return null;
    return pages[index];
  }

  Snippet findSnippet(CodeTag tag) => _snippets[tag];

  /// Gets the last snippet that appears in [page].
  ///
  /// Note: Not very fast.
  Snippet lastSnippet(Page page) {
    Snippet last;
    for (var snippet in _snippets.values) {
      if (snippet.tag.chapter != page) continue;
      if (last == null || snippet.tag > last.tag) last = snippet;
    }

    return last;
  }

  /// Find the [CodeTag] with [name] on [page].
  ///
  /// Note: Not very fast.
  CodeTag findTag(Page page, String name) {
    for (var tag in _snippets.keys) {
      if (tag.chapter != page) continue;
      if (tag.name == name) return tag;
    }

    throw ArgumentError("Could not find tag '$name' in '${page.title}'.");
  }
}

/// A single source file whose code is included in the book.
class SourceFile {
  final String path;
  final List<SourceLine> lines = [];

  SourceFile(this.path);

  String get language => path.endsWith("java") ? "java" : "c";

  String get nicePath => path.replaceAll("com/craftinginterpreters/", "");
}

/// A line of code in a [SourceFile] and the metadata for it.
class SourceLine {
  final String text;
  final Location location;

  /// The first snippet where this line appears in the book.
  final CodeTag start;

  /// The last snippet where this line is removed, or null if the line reaches
  /// the end of the book.
  final CodeTag end;

  SourceLine(this.text, this.location, this.start, this.end);

  /// Returns true if this line exists by the time we reach [tag].
  bool isPresent(CodeTag tag) {
    // If we haven't reached this line's snippet yet.
    if (tag < start) return false;

    // If we are past the snippet where it is removed.
    if (end != null && tag >= end) return false;

    return true;
  }

  String toString() {
    var result = "${text.padRight(72)} // $start";
    if (end != null) result += " < $end";
    return result;
  }
}


================================================
FILE: tool/lib/src/code_tag.dart
================================================
import 'page.dart';

class CodeTag with Ordering<CodeTag> implements Comparable<CodeTag> {
  final Page chapter;
  final String name;

  /// The zero-based index of the tag in the order that it appears on the page.
  final int _index;

  /// Number of preceding lines of context to show.
  final int beforeCount;

  /// Number of trailing lines of context to show.
  final int afterCount;

  /// Whether to show location information.
  final bool showLocation;

  factory CodeTag(Page chapter, String name, int index, int beforeCount,
      int afterCount, bool showLocation) {
    // Hackish. Always want "not-yet" to be the last tag even if it appears
    // before a real tag. That ensures we can push it for other tags that have
    // been named.
    if (name == "not-yet") index = 9999;

    return CodeTag._(
        chapter, name, index, beforeCount, afterCount, showLocation);
  }

  CodeTag._(this.chapter, this.name, this._index, this.beforeCount,
      this.afterCount, this.showLocation);

  /// Gets the name of the directory used for this tag when the code is split
  /// at this tag's snippet.
  String get directory {
    var index = _index.toString().padLeft(2, "0");
    return "$index-$name";
  }

  int compareTo(CodeTag other) {
    if (chapter.ordinal != other.chapter.ordinal) {
      return chapter.ordinal.compareTo(other.chapter.ordinal);
    }

    return _index.compareTo(other._index);
  }

  String toString() => "Tag(${chapter.ordinal}|$_index: $chapter $name)";
}

/// Implements the comparison operators in terms of [compareTo()].
mixin Ordering<T> implements Comparable<T> {
  bool operator <(T other) => compareTo(other) < 0;
  bool operator <=(T other) => compareTo(other) <= 0;
  bool operator >(T other) => compareTo(other) > 0;
  bool operator >=(T other) => compareTo(other) >= 0;
}


================================================
FILE: tool/lib/src/format.dart
================================================
/// The book format being rendered to.
enum Format {
  /// HTML for the web.
  web,

  /// XML for importing into InDesign.
  print,
}

extension FormatExtension on Format {
  bool get isWeb => this == Format.web;
  bool get isPrint => this == Format.print;
}


================================================
FILE: tool/lib/src/location.dart
================================================
/// The context in which a line of code appears. The chain of types and
/// functions it's in.
class Location {
  final Location parent;
  final String kind;
  String _name;
  final String signature;

  /// If [kind] is "method" or "function" then this tracks where we are
  /// declaring or defining the function.
  final bool isFunctionDeclaration;

  Location(this.parent, this.kind, this._name,
      {this.signature, this.isFunctionDeclaration = false});

  String get name => _name;

  set name(String value) {
    // Can only set the name if it's an unnamed typedef.
    assert(_name == null);
    _name = value;
  }

  bool get isFile => kind == "file";

  bool get isFunction =>
      const {"constructor", "function", "method"}.contains(kind);

  int get depth {
    var current = this;
    var result = 0;
    while (current != null) {
      result++;
      current = current.parent;
    }
    return result;
  }

  String toString() {
    var result = "$kind $name";
    if (signature != null) result += "($signature)";
    if (parent != null) result = "$parent > $result";
    return result;
  }

  /// Generates a string of HTML that describes a snippet at this location,
  /// when following the [preceding] location.
  String toHtml(Location preceding, List<String> removed) {
    if (kind == "new") return "create new file";
    if (kind == "top") return "add to top of file";

    // Note: The order of these is highly significant.
    if (kind == "class" && parent?.kind == "class") {
      return "nest inside class <em>${parent.name}</em>";
    }

    if (isFunction && preceding == this) {
      //  Hack. There's one place where we add a new overload and that shouldn't
      //  be treated as in the same function. But we can't always look at the
      //  signature because there's another place where a change signature would
      //  confuse the build script. So just check for the one-off case here.
      if (name == "resolve" && signature == "Expr expr") {
        return "add after <em>${preceding.name}</em>(${preceding.signature})";
      }

      // We're still inside a function.
      return "in <em>$name</em>()";
    }

    if (isFunction && removed.isNotEmpty) {
      // Hack. We don't appear to be in the middle of a function, but we are
      // replacing lines, so assume we're replacing the entire function.
      return "$kind <em>$name</em>()";
    }

    if (parent == preceding && !preceding.isFile) {
      // We're nested inside a type.
      return "in ${preceding.kind} <em>${preceding.name}</em>";
    }

    if (preceding == this && !isFile) {
      // We're still inside a type.
      return "in $kind <em>$name</em>";
    }

    if (preceding.isFunction) {
      // We aren't inside a function, but we do know the preceding one.
      return "add after <em>${preceding.name}</em>()";
    }

    if (!preceding.isFile) {
      // We aren't inside any function, but we do know what we follow.
      return "add after ${preceding.kind} <em>${preceding.name}</em>";
    }

    // If we get here, there isn't a useful location to show. The snippet will
    // have enough surrounding context to make it clear. This is usually stuff
    // like imports or includes near the top of the file.
    return null;
  }

  /// Generates a string of InDesign XML that describes a snippet at this
  /// location, when following the [preceding] location.
  ///
  /// This is similar to [toHtml] but uses different tags and places the
  /// signatures inside the tags instead of outside.
  String toXml(Location preceding, List<String> removed) {
    if (kind == "new") return "create new file";
    if (kind == "top") return "add to top of file";

    // Note: The order of these is highly significant.
    if (kind == "class" && parent?.kind == "class") {
      return "nest inside class <location-type>${parent.name}</location-type>";
    }

    if (isFunction && preceding == this) {
      //  Hack. There's one place where we add a new overload and that shouldn't
      //  be treated as in the same function. But we can't always look at the
      //  signature because there's another place where a change signature would
      //  confuse the build script. So just check for the one-off case here.
      if (name == "resolve" && signature == "Expr expr") {
        return "add after <location-fn>${preceding.name}"
            "(${preceding.signature})</location-fn>";
      }

      // We're still inside a function.
      return "in <location-fn>$name()</location-fn>";
    }

    if (isFunction && removed.isNotEmpty) {
      // Hack. We don't appear to be in the middle of a function, but we are
      // replacing lines, so assume we're replacing the entire function.
      return "$kind <location-fn>$name()</location-fn>";
    }

    if (parent == preceding && !preceding.isFile) {
      // We're nested inside a type.
      return "in ${preceding.kind} "
          "<location-type>${preceding.name}</location-type>";
    }

    if (preceding == this && !isFile) {
      // We're still inside a type.
      return "in $kind <location-type>$name</location-type>";
    }

    if (preceding.isFunction) {
      // We aren't inside a function, but we do know the preceding one.
      return "add after <location-fn>${preceding.name}()</location-fn>";
    }

    if (!preceding.isFile) {
      // We aren't inside any function, but we do know what we follow.
      if (preceding.isFunction) {
        return "add after ${preceding.kind} "
            "<location-fn>${preceding.name}()</location-fn>";
      } else {
        return "add after ${preceding.kind} "
            "<location-type>${preceding.name}</location-type>";
      }
    }

    // If we get here, there isn't a useful location to show. The snippet will
    // have enough surrounding context to make it clear. This is usually stuff
    // like imports or includes near the top of the file.
    return null;
  }

  bool operator ==(Object other) {
    // Note: Signature is deliberately not considered part of equality. There's
    // a case in calls-and-functions where the signature of a function changes
    // and it confuses the build script if we treat the signatures as
    // significant.
    return other is Location && kind == other.kind && name == other.name;
  }

  int get hashCode => kind.hashCode ^ name.hashCode;

  /// Discard as many children as needed to get to [depth] parents.
  Location popToDepth(int depth) {
    var current = this;
    var locations = <Location>[];
    while (current != null) {
      locations.add(current);
      current = current.parent;
    }

    // If we are already shallower, there is nothing to pop.
    if (locations.length < depth + 1) return this;

    return locations[locations.length - depth - 1];
  }
}


================================================
FILE: tool/lib/src/markdown/block_syntax.dart
================================================
import 'package:markdown/markdown.dart';

import '../format.dart';
import '../page.dart';

/// Parses atx-style headers like `## Header` and gives them the book's special
/// handling:
///
/// - Generates anchor links.
/// - Includes the section numbers.
class BookHeaderSyntax extends BlockSyntax {
  /// Leading `#` define atx-style headers.
  static final _headerPattern = RegExp(r'^(#{1,6}) (.*)$');

  final Page _page;
  final Format _format;

  RegExp get pattern => _headerPattern;

  BookHeaderSyntax(this._page, this._format);

  Node parse(BlockParser parser) {
    var header = _page.headers[parser.current];
    parser.advance();

    if (_format.isPrint) {
      return Element("h${header.level}", [UnparsedContent(header.name)]);
    }

    var number = "";
    if (!header.isSpecial) {
      number = "${_page.numberString}&#8202;.&#8202;${header.headerIndex}";
      if (header.subheaderIndex != null) {
        number += "&#8202;.&#8202;${header.subheaderIndex}";
      }
    }

    var link = Element("a", [
      if (!header.isSpecial) Element("small", [Text(number)]),
      UnparsedContent(header.name)
    ]);
    link.attributes["href"] = "#${header.anchor}";
    link.attributes["id"] = header.anchor;

    return Element("h${header.level}", [link]);
  }
}


================================================
FILE: tool/lib/src/markdown/code_syntax.dart
================================================
import 'package:markdown/markdown.dart';

import '../book.dart';
import '../code_tag.dart';
import '../format.dart';
import '../page.dart';
import '../snippet.dart';
import '../syntax/highlighter.dart';
import '../text.dart';

/// Custom code block formatter that uses our syntax highlighter.
class HighlightedCodeBlockSyntax extends BlockSyntax {
  static final _codeFencePattern = RegExp(r'^(\s*)```(.*)$');

  final Format _format;

  RegExp get pattern => _codeFencePattern;

  HighlightedCodeBlockSyntax(this._format);

  bool canParse(BlockParser parser) =>
      pattern.firstMatch(parser.current) != null;

  List<String> parseChildLines(BlockParser parser) {
    var childLines = <String>[];
    parser.advance();

    while (!parser.isDone) {
      var match = pattern.firstMatch(parser.current);
      if (match == null) {
        childLines.add(parser.current);
        parser.advance();
      } else {
        parser.advance();
        break;
      }
    }

    return childLines;
  }

  Node parse(BlockParser parser) {
    // Get the syntax identifier, if there is one.
    var match = pattern.firstMatch(parser.current);
    var indent = match[1].length;
    var language = match[2];

    var childLines = parseChildLines(parser);

    String code;
    if (language == "text") {
      // Don't syntax highlight text.
      var buffer = StringBuffer();
      if (!_format.isPrint) {
        buffer.write("<pre>");

        // The HTML spec mandates that a leading newline after '<pre>' is
        // ignored.
        // https://html.spec.whatwg.org/#element-restrictions
        // Some snippets deliberately start with a newline which needs to be
        // preserved, so output an extra (discarded) newline in that case.
        if (_format.isWeb && childLines.first.isEmpty) buffer.writeln();
      }

      for (var line in childLines) {
        // Strip off any leading indentation.
        if (line.length > indent) line = line.substring(indent);
        checkLineLength(line);

        buffer.write(line.escapeHtml);
        if (_format.isPrint) {
          // Soft break, so that the code stays one paragraph.
          buffer.write("&#x2028;");
        } else {
          buffer.writeln();
        }
      }

      if (!_format.isPrint) buffer.write("</pre>");

      code = buffer.toString();
    } else {
      code = formatCode(language, childLines, _format, indent: indent);
    }

    if (_format.isPrint) {
      // Remove the trailing newline since we'll write a newline after the
      // "</pre>" and we don't want InDesign to insert a blank paragraph.
      code = code.trimTrailingNewline();

      // Replace newlines with soft breaks so that InDesign treats the entire
      // snippet as a single paragraph and keeps it together.
      code = code.replaceAll("\n", "&#x2028;");

      // Don't wrap in a div for XML.
      return Element.text("pre", code);
    }

    var element = Element.text("div", code);
    element.attributes["class"] = "codehilite";
    return element;
  }
}

/// Recognizes `^code` tags and inserts the relevant snippet.
class CodeTagBlockSyntax extends BlockSyntax {
  static final _startPattern = RegExp(r'\^code ([a-z0-9-]+)');

  final Book _book;
  final Page _page;
  final Format _format;

  CodeTagBlockSyntax(this._book, this._page, this._format);

  RegExp get pattern => _startPattern;

  bool canParse(BlockParser parser) =>
      pattern.firstMatch(parser.current) != null;

  Node parse(BlockParser parser) {
    var match = pattern.firstMatch(parser.current);
    var name = match[1];
    parser.advance();

    var codeTag = _page.findCodeTag(name);
    String snippet;
    if (_format.isPrint) {
      snippet = _buildSnippetXml(codeTag, _book.findSnippet(codeTag));
    } else {
      snippet = _buildSnippet(_format, codeTag, _book.findSnippet(codeTag));
    }
    return Text(snippet);
  }
}

String _buildSnippet(Format format, CodeTag tag, Snippet snippet) {
  // NOTE: If you change this, be sure to update the baked in example snippet
  // in introduction.md.

  if (snippet == null) {
    print("Undefined snippet ${tag.name}");
    return "<strong>ERROR: Missing snippet ${tag.name}</strong>\n";
  }

  var location = <String>[];
  if (tag.showLocation) location = snippet.locationHtmlLines;

  var buffer = StringBuffer();
  buffer.write('<div class="codehilite">');

  if (snippet.contextBefore.isNotEmpty) {
    _writeContextHtml(format, buffer, snippet.contextBefore,
        cssClass: snippet.added.isNotEmpty ? "insert-before" : null);
  }

  if (snippet.addedComma != null) {
    var commaLine = formatCode(
        snippet.file.language, [snippet.addedComma], format,
        preClass: "insert-before");
    var comma = commaLine.lastIndexOf(",");
    buffer.write(commaLine.substring(0, comma));
    buffer.write('<span class="insert-comma">,</span>');
    buffer.write(commaLine.substring(comma + 1));
  }

  if (tag.showLocation) {
    var lines = location.join("<br>\n");
    buffer.writeln('<div class="source-file">$lines</div>');
  }

  if (snippet.added != null) {
    var added = formatCode(snippet.file.language, snippet.added, format,
        preClass: tag.beforeCount > 0 || tag.afterCount > 0 ? "insert" : null);
    buffer.write(added);
  }

  if (snippet.contextAfter.isNotEmpty) {
    _writeContextHtml(format, buffer, snippet.contextAfter,
        cssClass: snippet.added.isNotEmpty ? "insert-after" : null);
  }

  buffer.writeln('</div>');

  if (tag.showLocation) {
    var lines = location.join(", ");
    buffer.writeln('<div class="source-file-narrow">$lines</div>');
  }

  return buffer.toString();
}

String _buildSnippetXml(CodeTag tag, Snippet snippet) {
  var buffer = StringBuffer();

  if (tag.showLocation) buffer.writeln(snippet.locationXml);

  if (snippet.contextBefore.isNotEmpty) {
    _writeContextXml(buffer, snippet.contextBefore, "before");
  }

  if (snippet.addedComma != null) {
    // TODO: How should this look in print?
    buffer.write("TODO added comma");
//    var commaLine = formatCode(snippet.file.language, [snippet.addedComma],
//        preClass: "insert-before", xml: true);
//    var comma = commaLine.lastIndexOf(",");
//    buffer.write(commaLine.substring(0, comma));
//    buffer.write('<span class="insert-comma">,</span>');
//    buffer.write(commaLine.substring(comma + 1));
  }

  if (snippet.added != null) {
    // Use different tags based on whether there is context before, after,
    // neither, or both.
    String insertTag;
    if (tag.beforeCount > 0) {
      if (tag.afterCount > 0) {
        insertTag = "interpreter-between";
      } else {
        insertTag = "interpreter-after";
      }
    } else {
      if (tag.afterCount > 0) {
        insertTag = "interpreter-before";
      } else {
        insertTag = "interpreter";
      }
    }

    if (snippet.contextBefore.isNotEmpty) buffer.writeln();
    buffer.write("<$insertTag>");

    var code = formatCode(snippet.file.language, snippet.added, Format.print);
    // Discard the trailing newline so we don't end up with a blank paragraph
    // in InDesign.
    code = code.trimTrailingNewline();

    // Replace newlines with soft breaks so that InDesign treats the entire
    // snippet as a single paragraph and keeps it together.
    code = code.replaceAll("\n", "&#x2028;");

    buffer.write(code);
    buffer.write("</$insertTag>");
  }

  if (snippet.contextAfter.isNotEmpty) {
    buffer.writeln();
    _writeContextXml(buffer, snippet.contextAfter, "after");
  }

  return buffer.toString();
}

void _writeContextHtml(Format format, StringBuffer buffer, List<String> lines,
    {String cssClass}) {
  buffer.write("<pre");
  if (cssClass != null) buffer.write(' class="$cssClass"');
  buffer.write(">");

  // The HTML spec mandates that a leading newline after '<pre>' is ignored.
  // https://html.spec.whatwg.org/#element-restrictions
  // Some snippets deliberately start with a newline which needs to be
  // preserved, so output an extra (discarded) newline in that case.
  if (format.isWeb && lines.first.isEmpty) buffer.writeln();

  for (var line in lines) {
    buffer.writeln(line.escapeHtml);
  }

  buffer.write("</pre>");
}

void _writeContextXml(StringBuffer buffer, List<String> lines, String tag) {
  if (lines.isEmpty) return;

  buffer.write("<context-$tag>");
  var first = true;
  for (var line in lines) {
    // Soft break, so that the context stays one paragraph.
    if (!first) buffer.write("&#x2028;");
    first = false;
    buffer.write(line.escapeHtml);
  }
  buffer.write("</context-$tag>");
}


================================================
FILE: tool/lib/src/markdown/html_renderer.dart
================================================
import 'package:markdown/markdown.dart';

/// Custom Markdown to HTML renderer with some tweaks for the output we want.
class HtmlRenderer implements NodeVisitor {
  static const _blockTags = {
    "blockquote",
    "div",
    "h1",
    "h2",
    "h3",
    "h4",
    "h5",
    "h6",
    "hr",
    "li",
    "ol",
    "p",
    "pre",
    "ul",
  };

  StringBuffer buffer;

  final _elementStack = <Element>[];
  String _lastVisitedTag;

  String render(List<Node> nodes) {
    buffer = StringBuffer();

    for (final node in nodes) {
      node.accept(this);
    }

    buffer.writeln();

    return buffer.toString();
  }

  void visitText(Text text) {
    var content = text.text;

    // Put a newline before inline HTML markup for block-level tags.
    if (content.startsWith("<aside") ||
        content.startsWith("</aside") ||
        content.startsWith("<div") ||
        content.startsWith("</div")) {
      buffer.writeln();
    }

    if (const ['p', 'li'].contains(_lastVisitedTag)) {
      content = content.trimLeft();
    }
    buffer.write(content);

    _lastVisitedTag = null;
  }

  bool visitElementBefore(Element element) {
    // Separate block-level elements with newlines.
    if (buffer.isNotEmpty && _blockTags.contains(element.tag)) {
      buffer.writeln();
    }

    buffer.write('<${element.tag}');

    for (var entry in element.attributes.entries) {
      buffer.write(' ${entry.key}="${entry.value}"');
    }

    _lastVisitedTag = element.tag;

    if (element.isEmpty) {
      // Empty element like <hr/>.
      buffer.write(' />');
      if (element.tag == 'br') buffer.write('\n');
      return false;
    } else {
      _elementStack.add(element);
      buffer.write('>');
      return true;
    }
  }

  void visitElementAfter(Element element) {
    assert(identical(_elementStack.last, element));

    if (element.children != null &&
        element.children.isNotEmpty &&
        _blockTags.contains(_lastVisitedTag) &&
        _blockTags.contains(element.tag)) {
      buffer.writeln();
    } else if (element.tag == 'blockquote') {
      buffer.writeln();
    }
    buffer.write('</${element.tag}>');

    _lastVisitedTag = _elementStack.removeLast().tag;
  }
}


================================================
FILE: tool/lib/src/markdown/inline_syntax.dart
================================================
import 'package:charcode/ascii.dart';
import 'package:markdown/markdown.dart';

import '../format.dart';

class EllipseSyntax extends InlineSyntax {
  final Format _format;

  EllipseSyntax(this._format) : super(r"\.\.\. ?", startCharacter: $dot);

  bool onMatch(InlineParser parser, Match match) {
    // A Unicode ellipsis doesn't have as much space between the dots as
    // Chicago style mandates so do our own thing.
    parser.addNode(Text(_format.isPrint
        ? "&thinsp;.&thinsp;.&thinsp;.&thinsp;"
        : '<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.&nbsp;</span>'));
    return true;
  }
}

class ApostropheSyntax extends InlineSyntax {
  final Format _format;

  ApostropheSyntax(this._format) : super(r"'", startCharacter: $apostrophe);

  bool onMatch(InlineParser parser, Match match) {
    var before = -1;
    if (parser.pos > 0) {
      before = parser.charAt(parser.pos - 1);
    }
    var after = -1;
    if (parser.pos < parser.source.length - 1) {
      after = parser.charAt(parser.pos + 1);
    }

    var isRight = _isRight(before, after);
    String quote;
    if (_format.isPrint) {
      quote = isRight ? "#8217" : "#8216";
    } else {
      quote = isRight ? "rsquo" : "lsquo";
    }
    parser.addNode(Text("&$quote;"));
    return true;
  }

  bool _isRight(int before, int after) {
    // Years like "the '60s".
    if (before == $space && after >= $0 && after <= $9) return true;

    // Possessive after code.
    if (before == $backquote && after == $s) return true;

    if (before == $space) return false;
    if (before == $lf) return false;

    // Default to right.
    return true;
  }
}

class SmartQuoteSyntax extends InlineSyntax {
  final Format _format;

  SmartQuoteSyntax(this._format) : super(r'"', startCharacter: $double_quote);

  bool onMatch(InlineParser parser, Match match) {
    var before = -1;
    if (parser.pos > 0) {
      before = parser.charAt(parser.pos - 1);
    }
    var after = -1;
    if (parser.pos < parser.source.length - 1) {
      after = parser.charAt(parser.pos + 1);
    }

    var isRight = _isRight(before, after);
    String quote;
    if (_format.isPrint) {
      quote = isRight ? "#8221" : "#8220";
    } else {
      quote = isRight ? "rdquo" : "ldquo";
    }

    parser.addNode(Text("&$quote;"));
    return true;
  }

  bool _isRight(int before, int after) {
    if (after == $space) return true;
    if (before >= $a && before <= $z) return true;
    if (before >= $A && before <= $Z) return true;
    if (before >= $0 && before <= $9) return true;
    if (before == $dot) return true;
    if (before == $question) return true;
    if (before == $exclamation) return true;

    if (after == $colon) return true;
    if (after == $comma) return true;
    if (after == $dot) return true;

    // Default to left.
    return false;
  }
}

class EmDashSyntax extends InlineSyntax {
  final Format _format;

  EmDashSyntax(this._format) : super(r"\s--\s");

  bool onMatch(InlineParser parser, Match match) {
    parser.addNode(
        Text(_format.isPrint ? '—' : '<span class="em">&mdash;</span>'));
    return true;
  }
}

/// Remove newlines in paragraphs and turn them into spaces since InDesign
/// treats them as line breaks.
class NewlineSyntax extends InlineSyntax {
  NewlineSyntax() : super("\n", startCharacter: $lf);

  bool onMatch(InlineParser parser, Match match) {
    parser.addNode(Text(" "));
    return true;
  }
}


================================================
FILE: tool/lib/src/markdown/markdown.dart
================================================
import 'package:markdown/markdown.dart' hide HtmlRenderer;

import '../book.dart';
import '../format.dart';
import '../page.dart';
import 'block_syntax.dart';
import 'code_syntax.dart';
import 'html_renderer.dart';
import 'inline_syntax.dart';
import 'xml_renderer.dart';

String renderMarkdown(Book book, Page page, List<String> lines, Format format) {
  var document = Document(blockSyntaxes: [
    BookHeaderSyntax(page, format),
    CodeTagBlockSyntax(book, page, format),
    HighlightedCodeBlockSyntax(format),
  ], inlineSyntaxes: [
    // Put inline Markdown code syntax before our smart quotes so that
    // quotes inside `code` spans don't get smartened.
    CodeSyntax(),
    EllipseSyntax(format),
    ApostropheSyntax(format),
    SmartQuoteSyntax(format),
    EmDashSyntax(format),
    if (format.isPrint) NewlineSyntax(),
  ], extensionSet: ExtensionSet.gitHubFlavored);

  var ast = document.parseLines(lines);
  if (format.isPrint) {
    return XmlRenderer().render(ast);
  } else {
    return HtmlRenderer().render(ast);
  }
}


================================================
FILE: tool/lib/src/markdown/xml_renderer.dart
================================================
import 'package:markdown/markdown.dart';

final _imagePathPattern = RegExp(r'"([^"]+.png)"');

/// Matches opening XML tag names.
final _tagPattern = RegExp(r"<([a-z-_0-9]+)");

final _spanPattern = RegExp(r'<span\s+name="[^"]+">');

final _smallCapsPattern = RegExp(r'<span\s+class="small-caps">([A-Z]+)</span>');

class XmlRenderer implements NodeVisitor {
  /// While building, also fill a StringBuffer with the minimal set of
  /// paragraphs needed to cover all tags in the book.
  static final tagFileBuffer = StringBuffer();

  /// Keeps track of which XML tags [tagFileBuffer] contains.
  static final allTags = <String>{};

  /// The list of paragraph-level tags.
  final List<_Paragraph> _paragraphs = [];

  /// Whether we need to create a new paragraph before appending the next text.
  bool _pendingParagraph = true;

  /// The nested stack of current inline tags.
  final List<_Inline> _inlineStack = [];

  /// The stack tracking where we are in the document.
  _Context _context = _Context("main");

  String render(List<Node> nodes) {
    for (final node in nodes) {
      node.accept(this);
    }

    var buffer = StringBuffer();
    buffer.writeln("<chapter>");

    _Paragraph previousMain;
    _Paragraph previousAside;

    for (var paragraph in _paragraphs) {
      String text;

      if (paragraph.context.has("aside")) {
        text = paragraph.prettyPrint(previousAside);
        previousAside = paragraph;
      } else {
        text = paragraph.prettyPrint(previousMain);
        previousMain = paragraph;

        // Reached the end of an aside.
        previousAside = null;
      }

      buffer.write(text);

      // Only add the paragraph to the tag file buffer if it has a unique tag.
      var tags = _tagPattern.allMatches(text).map((match) => match[1]).toSet();
      if (tags.difference(allTags).isNotEmpty) {
        tagFileBuffer.write(text);
        allTags.addAll(tags);
      }
    }

    buffer.writeln("</chapter>");
    return buffer.toString();
  }

  void visitText(Text node) {
    var text = node.text;

    if (text.isEmpty) return;

    // There are a couple of hand-coded HTML ellipses inside an HTML table.
    text = text.replaceAll(
        '<span class="ellipse">&thinsp;.&thinsp;.&thinsp;.</span>', "&#8230;");

    // Convert the small-caps bitwise operator spans in "Optimization" to
    // custom tags.
    text = text.replaceAllMapped(
        _smallCapsPattern, (match) => "<bitwise>${match[1]}</bitwise>");

    text = text
        .replaceAll("&eacute;", "&#233;")
        .replaceAll("&ensp;", "&#8194;")
        .replaceAll("&ldquo;", "&#8220;")
        .replaceAll("&nbsp;", "&#160;")
        .replaceAll("&rdquo;", "&#8221;")
        .replaceAll("&rsquo;", "&#8217;")
        .replaceAll("&rarr;", "&#8594;")
        .replaceAll("&sect;", "&#167;")
        .replaceAll("&thinsp;", "&#8201;")
        .replaceAll("&times;", "&#215;")
        .replaceAll("<br>", "<br/>");

    // Don't send tables to InDesign as XML.
    text = text
        .replaceAll("<table>", "[table]")
        .replaceAll("</table>", "[/table]")
        .replaceAll("<thead>", "[thead]")
        .replaceAll("</thead>", "[/thead]")
        .replaceAll("<tbody>", "[tbody]")
        .replaceAll("</tbody>", "[/tbody]")
        .replaceAll("<tr>", "[tr]")
        .replaceAll("</tr>", "[/tr]")
        .replaceAll("<td>", "[td]")
        .replaceAll("</td>", "[/td]");

    // Turn aside span locators into little visible markers.
    text = text
        .replaceAll(_spanPattern, "<mark>@</mark>")
        .replaceAll("</span>", "");

    // Discard the challenge and design note divs.
    if (text.startsWith("<div") || text.startsWith("</div>")) return;

    // Convert image tags to just their paths.
    if (text.startsWith("<img")) {
      var imagePath = _imagePathPattern.firstMatch(text)[1];

      // The GC chapter has a couple of tiny inline images that happen to be in
      // an unordered list. Don't create paragraphs for them.
      var isInline = _context.has("unordered");

      // Put main column images in their own paragraph.
      if (!isInline) _push("image");
      _addText(imagePath);
      if (!isInline) _pop();
      return;
    }

    // Include code snippet XML as-is.
    if (text.startsWith("<location-file>") ||
        // "Representing Code" has a few inserted snippets with no location tag.
        text.startsWith("<context-before>") ||
        text.startsWith("<interpreter>") ||
        text.startsWith("<interpreter-between>")) {
      _push("xml");
      _addText(text);
      _pop();
      return;
    }

    // Since aside tags appear in the Markdown as literal HTML, they are parsed
    // as text, not Markdown elements.
    if (text.startsWith("<aside")) {
      _push("aside");
      return;
    }

    if (text.startsWith("</aside>")) {
      _pop();
      return;
    }

    if (text.trimLeft().startsWith("<cite>")) {
      _push("xml");

      // Use a custom inline style for cite emphasis.
      text = text
          .replaceAll("<em>", "<cite-em>")
          .replaceAll("</em>", "</cite-em>");

      _addText(text.trimLeft());
    } else if (_inlineStack.isNotEmpty) {
      // We're in an inline tag, so add it to that.
      _inlineStack.last.text += text;
    } else {
      if (_context.name == "xml") {
        // Hackish. Assume the only <em> tags inside XML blocks are in cites.
        text = text
            .replaceAll("<em>", "<cite-em>")
            .replaceAll("</em>", "</cite-em>");
      }

      _addText(text);
    }

    if (text.endsWith("</cite>")) _pop();
  }

  bool visitElementBefore(Element element) {
    switch (element.tag) {
      case "p":
        _resetParagraph();
        break;

      case "blockquote":
        _push("quote");
        break;

      case "h2":
        var text = element.textContent;
        if (text == "Challenges") {
          _context = _Context("challenges");
        } else if (text.contains("Design Note")) {
          _context = _Context("design");
        }
        _push("heading");
        break;

      case "h3":
        _push("subheading");
        break;

      case "ol":
        _push("ordered");

        // Immediately push a subcontext to mark the first list item.
        _push("first");
        break;

      case "pre":
        _push("pre");
        break;

      case "ul":
        _push("unordered");
        break;

      case "li":
        // If we're on the first item, discard it and replace it with the next
        // item. The first item restarts numbering but later ones don't.
        if (_context.name != "first") _push("item");
        break;

      case "a":
        // TODO: What do we want to do with links? Highlight them somehow so
        // that I decide if the surrounding text needs tweaking?
        break;

      case "code":
      case "em":
      case "small":
      case "strong":
        // Inline tags.

        // If we're in an inline tags already, flatten them by emitting inline
        // segments for any text they have. Leave them on the stack so that
        // they get resumed when the nested inline tags end.
        var tagParts = [element.tag];
        for (var i = 0; i < _inlineStack.length; i++) {
          var inline = _inlineStack[i];
          if (inline.text.isNotEmpty) {
            _addInline(inline);
            _inlineStack[i] = _Inline(inline.tag);
          }

          tagParts.add(inline.tag);
        }

        String tag;
        if (tagParts.contains("code")) {
          // Code formatting wipes out italics or bold.
          tag = "code";
        } else {
          tagParts.sort();
          tag = tagParts.join("-");
        }
        // Make a tag name that includes all nested tags. We'll define separate
        // styles for each combination.
        _inlineStack.add(_Inline(tag));
        break;

      default:
        print("Unexpected open tag ${element.tag}.");
    }

    return !element.isEmpty;
  }

  void visitElementAfter(Element element) {
    switch (element.tag) {
      case "blockquote":
      case "h2":
      case "h3":
      case "pre":
        _pop();
        break;

      case "ol":
      case "ul":
        // If we still have a context for the item, it means we have a Markdown
        // list with no paragraph tags inside the items. There are a couple of
        // those in the book.
        if (_context.name == "first" || _context.name == "item") _pop();

        // Pop the list itself.
        _pop();
        break;

      case "a":
        // Nothing to do.
        break;

      case "li":
      case "p":
        // The first paragraph in each list item has a special style so that
        // apply the bullet or number. Later paragraphs in the same list item
        // do not.

        // We match both <p> and <li> so that lists without paragraphs inside
        // don't leave lingering item contexts.
        if (_context.name == "first" || _context.name == "item") _pop();
        break;

      case "code":
      case "em":
      case "small":
      case "strong":
        // Inline tags.
        _addInline(_inlineStack.removeLast());
        break;

      default:
        print("Unexpected close tag ${element.tag}.");
    }
  }

  void _push(String name) {
    _context = _Context(name, _context);
    _resetParagraph();
  }

  void _pop() {
    _context = _context.parent;
    _resetParagraph();
  }

  void _addText(String text) {
    _flushParagraph();

    // Discard any leading whitespace at the beginning of list items.
    var paragraph = _paragraphs.last;
    if (paragraph.contents.isEmpty &&
        (_context.has("ordered") || _context.has("unordered"))) {
      text = text.trimLeft();
    }

    paragraph.contents.add(_Inline(null, text));
  }

  void _addInline(_Inline inline) {
    _flushParagraph();
    _paragraphs.last.contents.add(inline);
  }

  void _resetParagraph() {
    _pendingParagraph = true;
  }

  void _flushParagraph() {
    if (!_pendingParagraph) return;
    _paragraphs.add(_Paragraph(_context));
    _pendingParagraph = false;
  }
}

class _Context {
  final String name;
  final _Context parent;

  _Context(this.name, [this.parent]);

  /// Whether any of the contexts in this chain are [name].
  bool has(String name) {
    var context = this;
    while (context != null) {
      if (context.name == name) return true;
      context = context.parent;
    }

    return false;
  }

  /// Whether [parent] has [name].
  bool isIn(String name) => parent != null && parent.has(name);

  /// How many levels of list nesting this context contains.
  int get listDepth {
    var depth = 0;

    for (var context = this; context != null; context = context.parent) {
      if (context.name == "ordered" || context.name == "unordered") {
        depth++;
      } else if (context.name == "aside") {
        // Content inside an aside inside a list item isn't really part of the
        // list.
        break;
      }
    }

    return depth;
  }

  String get paragraphTag {
    var tag = name;
    var depth = listDepth;
    if (depth > 2) print("Unexpected deep list nesting $this.");

    switch (tag) {
      case "main":
        return "p";
      case "main":
        return "p";
      case "challenges":
        // There's only paragraph of non-list prose text and that's also
        // indented like a list (so that it lines up with the heading), so just
        // use the same style for both.
        return "challenges-list-p";
      case "design":
        return "design-p";
      case "aside":
        return "aside";
      case "xml":
        return "xml";

      case "first":
      case "item":
        tag = "${parent.name}-$tag";
        if (depth > 1) tag = "sublist-$tag";
        break;

      case "ordered":
      case "unordered":
        tag = "list-p";
        if (depth > 1) tag = "sublist-$tag";
        break;

      default:
        if (depth > 1) {
          tag = "sublist-$tag";
        } else if (depth > 0) {
          tag = "list-$tag";
        }
    }

    if (isIn("aside")) {
      tag = "aside-$tag";
    } else if (isIn("challenges")) {
      tag = "challenges-$tag";
    } else if (isIn("design")) {
      tag = "design-$tag";
    }

    return tag;
  }

  /// The prefix to apply to inline tags within this context or the empty string
  /// it none should be added.
  String get inlinePrefix {
    if (has("aside")) return "aside";
    if (has("challenges")) return "challenges";
    if (has("design")) return "design";
    if (has("quote")) return "quote";

    return "";
  }

  String toString() {
    if (parent == null) return name;
    return "$parent > $name";
  }
}

/// A paragraph-level tag that contains text and inline tags.
class _Paragraph {
  final _Context context;

  final List<_Inline> contents = [];

  _Paragraph(this.context);

  bool _isNext(String tag, String previousTag) {
    const nextTags = {
      "aside",
      "challenges-p",
      "challenges-list-p",
      "design-p",
      "design-list-p",
      "list-p",
      "p"
    };

    if (tag == previousTag) return nextTags.contains(tag);

    // The paragraph after a bullet item is also a next.
    if (tag.endsWith("list-p")) {
      // This includes both "unordered" and "ordered", tags that start with
      // "challenges" or "design", and ones that end with "first" or "item".
      return previousTag.contains("ordered-");
    }

    return false;
  }

  String prettyPrint(_Paragraph previous) {
    var buffer = StringBuffer();
    var tag = context.paragraphTag;

    if (previous != null && _isNext(tag, previous.context.paragraphTag)) {
      tag += "-next";
    }

    if (tag != "xml") buffer.write("<$tag>");

    for (var inline in contents) {
      inline.prettyPrint(buffer, context);
    }

    if (tag != "xml") buffer.write("</$tag>");
    buffer.writeln();
    return buffer.toString();
  }
}

/// An inline tag or plain text.
class _Inline {
  /// The tag name if this is an inline tag or `null` if it is text.
  final String tag;

  String text;

  _Inline(this.tag, [this.text = ""]);

  bool get isText => tag == null;

  void prettyPrint(StringBuffer buffer, _Context context) {
    if (tag == null) {
      buffer.write(text);
      return;
    }

    var fullTag = tag;
    var prefix = context.inlinePrefix;
    if (prefix != "") fullTag = "$prefix-$fullTag";

    buffer.write("<$fullTag>$text</$fullTag>");
  }
}


================================================
FILE: tool/lib/src/mustache.dart
================================================
/// Creates the data map and renders the Mustache templates to HTML.
import 'dart:io';

import 'package:mustache_template/mustache_template.dart';
import 'package:path/path.dart' as p;

import 'book.dart';
import 'page.dart';
import 'text.dart';

/// Maintains the cache of loaded partials and allows rendering templates.
class Mustache {
  /// The directory where template files can be found.
  final String _templateDirectory;

  final Map<String, Template> _templates = {};

  Mustache([String templateDirectory])
      : _templateDirectory = templateDirectory ?? p.join("asset", "mustache");

  String render(Book book, Page page, String body, {String template}) {
    var part = page.part?.title;

    var up = "Table of Contents";
    if (part != null) {
      up = part;
    } else if (page.title == "Table of Contents") {
      up = "Crafting Interpreters";
    }

    var previousPage = book.adjacentPage(page, -1);
    var nextPage = book.adjacentPage(page, 1);
    String nextType;
    if (nextPage != null && nextPage.isChapter) {
      nextType = "Chapter";
    } else if (nextPage != null && nextPage.isPart) {
      nextType = "Part";
    }

    List<Map<String, dynamic>> chapters;
    if (page.isPart) {
      chapters = _makeChapterList(page);
    }

    var isFrontmatter = const {
      "Acknowledgements",
      "Dedication",
    }.contains(page.title);

    var data = <String, dynamic>{
      "is_chapter": part != null,
      "is_part": part == null && page.title != null && !isFrontmatter,
      "is_frontmatter": isFrontmatter,
      "title": page.title,
      "part": part,
      "body": body,
      "sections": _makeSections(page),
      "chapters": chapters,
      "design_note": page.designNote,
      "has_design_note": page.designNote != null,
      "has_challenges": page.hasChallenges,
      "has_challenges_or_design_note":
          page.hasChallenges || page.designNote != null,
      "has_number": page.numberString != "",
      "number": page.numberString,
      // Previous page.
      "has_prev": previousPage != null,
      "prev": previousPage?.title,
      "prev_file": previousPage?.fileName,
      // Next page.
      "has_next": nextPage != null,
      "next": nextPage?.title,
      "next_file": nextPage?.fileName,
      "next_type": nextType,
      "has_up": up != null,
      "up": up,
      "up_file": up != null ? toFileName(up) : null,
      // TODO: Only need this for contents page.
      "part_1": _makePartData(book, 0),
      "part_2": _makePartData(book, 1),
      "part_3": _makePartData(book, 2),
    };

    return _load(template ?? page.template).renderString(data);
  }

  Map<String, dynamic> _makePartData(Book book, int partIndex) {
    var partPage = book.parts[partIndex];
    return <String, dynamic>{
      "title": partPage.title,
      "number": partPage.numberString,
      "file": partPage.fileName,
      "chapters": _makeChapterList(partPage)
    };
  }

  List<Map<String, dynamic>> _makeChapterList(Page part) {
    return [
      for (var chapter in part.chapters)
        <String, dynamic>{
          "title": chapter.title,
          "number": chapter.numberString,
          "file": chapter.fileName,
          "design_note": chapter.designNote?.replaceAll("'", "&rsquo;"),
        }
    ];
  }

  List<Map<String, dynamic>> _makeSections(Page page) {
    var sections = <Map<String, dynamic>>[];

    for (var header in page.headers.values) {
      if (!header.isSpecial && header.level == 2) {
        sections.add(<String, dynamic>{
          "name": header.name,
          "anchor": header.anchor,
          "index": header.headerIndex
        });
      }
    }

    return sections;
  }

  Template _load(String name) {
    return _templates.putIfAbsent(name, () {
      var path = p.join(_templateDirectory, "$name.html");
      return Template(File(path).readAsStringSync(),
          name: path, partialResolver: _load);
    });
  }
}


================================================
FILE: tool/lib/src/page.dart
================================================
import 'package:path/path.dart' as p;

import 'code_tag.dart';
import 'page_parser.dart';
import 'text.dart';

/// One page (in the HTML sense) of the book.
///
/// Each chapter, part introduction, and backmatter section is a page.
class Page {
  /// The title of this page.
  final String title;

  /// The chapter or part number, like "12", "II", or "".
  final String numberString;

  /// The numeric index of the page in chapter order.
  ///
  /// Used to determine which order snippets appear in the book.
  final int ordinal;

  /// If this page is a part page, the list of chapter pages it contains.
  final List<Page> chapters = [];

  /// If this page is a chapter page, the part that contains this page.
  final Page part;

  PageFile _file;

  Page(this.title, this.part, this.numberString, this.ordinal);

  /// The base file path and URI for the page, without any extension.
  String get fileName => toFileName(title);

  /// The path to this page's Markdown source file.
  String get markdownPath => p.join("book", "$fileName.md");

  /// The path to this page's generated HTML file.
  String get htmlPath => p.join("site", "$fileName.html");

  /// Whether this page is a chapter page, as opposed to a part.
  bool get isChapter => part != null;

  /// Whether this page is a part page, as opposed to a chapter.
  bool get isPart => part == null;

  /// The code language used for this chapter page or `null` if this isn't one
  /// of the main chapter pages.
  String get language {
    if (isPart) return null;
    if (part.title == "A Tree-Walk Interpreter") return "java";
    if (part.title == "A Bytecode Virtual Machine") return "c";
    return null;
  }

  String get shortName {
    var number = numberString.padLeft(2, "0");

    var words = title.split(" ");
    var word = words.first.toLowerCase();
    if (word == "a" || word == "the") word = words[1].toLowerCase();

    return "chap${number}_$word";
  }

  List<String> get lines => _ensureFile().lines;

  String get template {
    if (title == "Crafting Interpreters") return "index";
    if (title == "Table of Contents") return "contents";
    return "page";
  }

  Map<String, Header> get headers => _ensureFile().headers;

  bool get hasChallenges => _ensureFile().hasChallenges;

  String get designNote => _ensureFile().designNote;

  Iterable<CodeTag> get codeTags => _ensureFile().codeTags.values;

  CodeTag findCodeTag(String name) {
    // Return fake tags for the placeholders.
    if (name == "omit") return CodeTag(this, "omit", 9998, 0, 0, false);
    if (name == "not-yet") return CodeTag(this, "omit", 9999, 0, 0, false);

    var codeTag = _ensureFile().codeTags[name];
    if (codeTag != null) return codeTag;

    throw ArgumentError("Could not find code tag '$name'.");
  }

  String toString() => title;

  /// Lazily parse the Markdown file for the page.
  PageFile _ensureFile() => _file ??= parsePage(this);
}

/// The data for a page parsed from the Markdown source.
class PageFile {
  final List<String> lines;
  final Map<String, Header> headers;
  final bool hasChallenges;

  /// The name of the design note in this page, or `null` if there is none.
  final String designNote;

  final Map<String, CodeTag> codeTags;

  PageFile(this.lines, this.headers, this.hasChallenges, this.designNote,
      this.codeTags);
}

/// A section header in a page.
class Header {
  /// The header depth: 1 is the page title, 2 header, 3 subheader.
  final int level;
  final int headerIndex;
  final int subheaderIndex;
  final String name;

  Header(this.level, this.headerIndex, this.subheaderIndex, this.name);

  /// Whether this header is for the special "Challenges" or "Design Note"
  /// sections.
  bool get isSpecial => isChallenges || isDesignNote;

  bool get isChallenges {
    // Check for a subheader because there is a "Challenges" *subheader* in
    // the Introduction.
    return name == "Challenges" && level == 2;
  }

  bool get isDesignNote => name.startsWith("Design Note:");

  String get anchor {
    if (isChallenges) return "challenges";
    if (isDesignNote) return "design-note";
    return toFileName(name);
  }
}


================================================
FILE: tool/lib/src/page_parser.dart
================================================
import 'dart:io';

import 'code_tag.dart';
import 'page.dart';
import 'text.dart';

final _codePattern = RegExp(r"^\^code ([-a-z0-9]+)( \(([^)]+)\))?$");
final _headerPattern = RegExp(r"^(#{1,3}) ");
final _beforePattern = RegExp(r"(\d+) before");
final _afterPattern = RegExp(r"(\d+) after");

/// Parses the contents of the Markdown file for [page] to extract its metadata,
/// code tags, section headers, etc.
PageFile parsePage(Page page) {
  var headers = <String, Header>{};
  var codeTagsByName = <String, CodeTag>{};
  String designNote;
  var hasChallenges = false;

  var headerIndex = 0;
  var subheaderIndex = 0;

  var lines = File(page.markdownPath).readAsLinesSync();
  for (var i = 0; i < lines.length; i++) {
    var line = lines[i];

    var match = _codePattern.firstMatch(line);
    if (match != null) {
      var codeTag =
          _createCodeTag(page, codeTagsByName.length, match[1], match[3]);
      codeTagsByName[codeTag.name] = codeTag;
      continue;
    }

    match = _headerPattern.firstMatch(line);
    if (match != null) {
      // Keep track of the headers so we can add section navigation for them.
      var headerType = match[1];
      var level = headerType.length;
      var name = line.substring(level).trim().pretty;

      if (level == 2) {
        headerIndex += 1;
        subheaderIndex = 0;
      } else if (level == 3) {
        subheaderIndex += 1;
      }

      var header =
          Header(level, headerIndex, level == 3 ? subheaderIndex : null, name);

      if (header.isChallenges) hasChallenges = true;
      if (header.isDesignNote) {
        designNote = header.name.substring("Design Note: ".length);
      }

      headers[line] = header;
    }
  }

//  # Validate that every snippet for the chapter is included.
//  for name, snippet in snippets.items():
//    if name != 'not-yet' and name != 'omit' and snippet != False:
//      errors.append("Unused snippet {}".format(name))
//
//  # Show any errors at the top of the file.
//  if errors:
//    error_markdown = ""
//    for error in errors:
//      error_markdown += "**Error: {}**\n\n".format(error)
//    contents = error_markdown + contents
//
  return PageFile(lines, headers, hasChallenges, designNote, codeTagsByName);
}

CodeTag _createCodeTag(Page page, int index, String name, String options) {
  // Parse the location annotations after the name, if present.
  var showLocation = true;
  var beforeCount = 0;
  var afterCount = 0;

  if (options != null) {
    for (var option in options.split(", ")) {
      if (option == "no location") {
        showLocation = false;
        continue;
      }

      var match = _beforePattern.firstMatch(option);
      if (match != null) {
        beforeCount = int.parse(match[1]);
        continue;
      }

      match = _afterPattern.firstMatch(option);
      if (match != null) {
        afterCount = int.parse(match[1]);
        continue;
      }

      throw "Unknown code option '$option'";
    }
  }

  return CodeTag(page, name, index, beforeCount, afterCount, showLocation);
}


================================================
FILE: tool/lib/src/snippet.dart
================================================
import 'book.dart';
import 'code_tag.dart';
import 'location.dart';
import 'text.dart';

/// A snippet of source code that is inserted in the book.
class Snippet {
  final SourceFile file;
  final CodeTag tag;

  Location _location;

  int _firstLine;
  int _lastLine;

  Location get precedingLocation => _precedingLocation;
  Location _precedingLocation;

  /// If the snippet replaces a line with the same line but with a trailing
  /// comma, this is that line (with the comma).
  String get addedComma => _addedComma;
  String _addedComma;

  final List<String> added = [];
  final List<String> removed = [];

  final List<String> contextBefore = [];
  final List<String> contextAfter = [];

  Snippet(this.file, this.tag);

  void addLine(int lineIndex, SourceLine line) {
    if (added.isEmpty) {
      _location = line.location;
      _firstLine = lineIndex;
    }
    added.add(line.text);

    // Assume that we add the removed lines in order.
    _lastLine = lineIndex;
  }

  void removeLine(int lineIndex, SourceLine line) {
    removed.add(line.text);

    // Assume that we add the removed lines in order.
    _lastLine = lineIndex;
  }

  /// Describes where in the file this snippet appears. Returns a list of HTML
  /// strings.
  List<String> get locationHtmlLines {
    var result = ["<em>${file.nicePath}</em>"];

    var html = _location.toHtml(precedingLocation, removed);
    if (html != null) result.add(html);

    if (removed.isNotEmpty && added.isNotEmpty) {
      result.add("replace ${removed.length} line${pluralize(removed)}");
    } else if (removed.isNotEmpty && added.isEmpty) {
      result.add("remove ${removed.length} line${pluralize(removed)}");
    }

    if (addedComma != null) {
      result.add("add <em>&ldquo;,&rdquo;</em> to previous line");
    }

    return result;
  }

  /// Describes where in the file this snippet appears.
  String get locationXml {
    var result = StringBuffer();
    result.write("<location-file>${file.nicePath}</location-file>");

    var xml = _location.toXml(precedingLocation, removed);
    var changes = [
      if (xml != null) xml,
      if (removed.isNotEmpty && added.isNotEmpty)
        "replace ${removed.length} line${pluralize(removed)}"
      else if (removed.isNotEmpty && added.isEmpty)
        "remove ${removed.length} line${pluralize(removed)}",
      if (addedComma != null)
        "add <location-comma>&ldquo;,&rdquo;</location-comma> to previous line"
    ].map((change) => "<location-change>$change</location-change>");

    if (changes.isNotEmpty) {
      result.writeln();
      result.writeAll(changes, "\n");
    }

    return result.toString();
  }

  String toString() => "${file.nicePath} ${tag.name}";

  /// Calculate the surrounding context information for this snippet.
  void calculateContext() {
    // Get the preceding lines.
    for (var i = _firstLine - 1;
        i >= 0 && contextBefore.length < tag.beforeCount;
        i--) {
      var line = file.lines[i];
      if (!line.isPresent(tag)) continue;
      contextBefore.insert(0, line.text);
    }

    // Get the following lines.
    for (var i = _lastLine + 1;
        i < file.lines.length && contextAfter.length < tag.afterCount;
        i++) {
      var line = file.lines[i];
      if (line.isPresent(tag)) contextAfter.add(line.text);
    }

    // Get the preceding location.
    // TODO: This constant is somewhat arbitrary. Come up with a more precise
    // way to track the preceding location.
    int checkedLines = 0;
    for (var i = _firstLine - 1; i >= 0 && checkedLines <= 4; i--) {
      var line = file.lines[i];
      if (!line.isPresent(tag)) continue;
      checkedLines++;

      // Store the most precise preceding location we find.
      if (_precedingLocation == null ||
          line.location.depth > _precedingLocation.depth) {
        _precedingLocation = line.location;
      }
    }

    // Update the current location based on surrounding lines.
    var hasCodeBefore = contextBefore.isNotEmpty;
    var hasCodeAfter = contextAfter.isNotEmpty;
    for (var i = _firstLine - 1; !hasCodeBefore && i >= 0; i--) {
      hasCodeBefore = file.lines[i].isPresent(tag);
    }

    for (var i = _lastLine + 1; !hasCodeAfter && i < file.lines.length; i++) {
      hasCodeAfter = file.lines[i].isPresent(tag);
    }

    if (!hasCodeBefore) {
      _location = Location(null, hasCodeAfter ? "top" : "new", null);
    }

    // Find line changes that just add a trailing comma.
    if (added.isNotEmpty &&
        removed.isNotEmpty &&
        added.first == "${removed.last},") {
      _addedComma = added.first;
      added.removeAt(0);
      removed.removeLast();
    }
  }
}


================================================
FILE: tool/lib/src/source_file_parser.dart
================================================
import 'dart:io';

import 'book.dart';
import 'code_tag.dart';
import 'location.dart';
import 'page.dart';

final _blockPattern = RegExp(
    r"^/\* ([A-Z][A-Za-z\s]+) ([-a-z0-9]+) < ([A-Z][A-Za-z\s]+) ([-a-z0-9]+)$");
final _blockSnippetPattern = RegExp(r"^/\* < ([-a-z0-9]+)$");
final _beginSnippetPattern = RegExp(r"^//> ([-a-z0-9]+)$");
final _endSnippetPattern = RegExp(r"^//< ([-a-z0-9]+)$");
final _beginChapterPattern = RegExp(r"^//> ([A-Z][A-Za-z\s]+) ([-a-z0-9]+)$");
final _endChapterPattern = RegExp(r"^//< ([A-Z][A-Za-z\s]+) ([-a-z0-9]+)$");

// Hacky regexes that matches various declarations.
final _constructorPattern = RegExp(r"^  ([A-Z][a-z]\w+)\(");
final _functionPattern = RegExp(r"(\w+)>*\*? (\w+)\(([^)]*)");
final _variablePattern = RegExp(r"^\w+\*? (\w+)(;| = )");
final _structPattern = RegExp(r"^struct (\w+)? {$");
final _typePattern =
    RegExp(r"(public )?(abstract )?(class|enum|interface) ([A-Z]\w+)");
final _namedTypedefPattern = RegExp(r"^typedef (enum|struct|union) (\w+) {$");
final _unnamedTypedefPattern = RegExp(r"^typedef (enum|struct|union) {$");
final _typedefNamePattern = RegExp(r"^\} (\w+);$");

/// Reserved words that can appear like a return type in a function declaration
/// but shouldn't be treated as one.
const _keywords = {"new", "return", "throw"};

class SourceFileParser {
  final Book _book;
  final SourceFile _file;
  final List<String> _lines;
  final List<_ParseState> _states = [];

  Location _unnamedTypedef;

  Location _location;
  Location _locationBeforeBlock;

  SourceFileParser(this._book, String path, String relative)
      : _file = SourceFile(relative),
        _lines = File(path).readAsLinesSync() {
    _location = Location(null, "file", _file.nicePath);
  }

  SourceFile parse() {
//  line_num = 1
//  handled = False
//
//  def error(message):
//    print("Error: {} line {}: {}".format(relative, line_num, message),
//        file=sys.stderr)
//    source_code.errors[state.start.chapter].append(
//        "{} line {}: {}".format(relative, line_num, message))
//
    // Split the source file into lines.
//    printed_file = False
//    line_num = 1
    for (var i = 0; i < _lines.length; i++) {
      var line = _lines[i].trimRight();
//      handled = False
//
//      # Report any lines that are too long.
//      trimmed = re.sub(r'// \[([-a-z0-9]+)\]', '', line)
//      if len(trimmed) > 72 and not '/*' in trimmed:
//        if not printed_file:
//          print("Long line in {}:".format(file.path))
//          printed_file = True
//        print("{0:4} ({1:2} chars): {2}".format(line_num, len(trimmed), trimmed))
//

      _updateLocationBefore(line, i);

      if (!_updateState(line)) {
        var sourceLine =
            SourceLine(line, _location, _currentState.start, _currentState.end);
        _file.lines.add(sourceLine);
      }

      _updateLocationAfter(line);
//
//      line_num += 1
    }

//    # ".parent.parent" because there is always the top "null" state.
//    if state.parent != None and state.parent.parent != None:
//      print("{}: Ended with more than one state on the stack.".format(relative),
//          file=sys.stderr)
//      s = state
//      while s.parent != None:
//        print("  {}".format(s.start), file=sys.stderr)
//        s = s.parent
//      sys.exit(1)
//

    // TODO: Validate that we don't define two snippets with the same chapter
    // and number. A snippet may end up in disjoint lines in the final output
    // because a later snippet is inserted in it, but it shouldn't be explicitly
    // authored that way.
    return _file;
  }

  /// Keep track of the current location where the parser is in the source file.
  void _updateLocationBefore(String line, int lineIndex) {
    // See if we reached a new function or method declaration.
    var match = _functionPattern.firstMatch(line);
    if (match != null &&
        !line.contains("#define") &&
        !_keywords.contains(match[1])) {
      // Hack. Don't get caught by comments or string literals.
      if (!line.contains("//") && !line.contains('"')) {
        var isFunctionDeclaration = line.endsWith(";");

        // Hack: Handle multi-line declarations.
        if (line.endsWith(",") && _lines[lineIndex + 1].endsWith(";")) {
          isFunctionDeclaration = true;
        }

        _location = Location(_location,
            _file.language == "java" ? "method" : "function", match[2],
            signature: match[3], isFunctionDeclaration: isFunctionDeclaration);
        return;
      }
    }

    match = _constructorPattern.firstMatch(line);
    if (match != null) {
      _location = Location(_location, "constructor", match[1]);
      return;
    }

    match = _typePattern.firstMatch(line);
    if (match != null) {
      // Hack. Don't get caught by comments or string literals.
      if (!line.contains("//") && !line.contains('"')) {
        var kind = match[3];
        var name = match[4];
        _location = Location(_location, kind, name);
      }
      return;
    }

    match = _structPattern.firstMatch(line);
    if (match != null) {
      _location = Location(_location, "struct", match[1]);
      return;
    }

    match = _namedTypedefPattern.firstMatch(line);
    if (match != null) {
      _location = Location(_location, match[1], match[2]);
      return;
    }

    match = _unnamedTypedefPattern.firstMatch(line);
    if (match != null) {
      // We don't know the name of the typedef yet.
      _location = Location(_location, match[1], null);
      _unnamedTypedef = _location;
      return;
    }

    match = _variablePattern.firstMatch(line);
    if (match != null) {
      _location = Location(_location, "variable", match[1]);
      return;
    }
  }

  void _updateLocationAfter(String line) {
    var match = _typedefNamePattern.firstMatch(line);
    if (match != null) {
      // Now we know the typedef name.
      _unnamedTypedef?.name = match[1];
      _unnamedTypedef = null;
      _location = _location.parent;
    }

    // Use "startsWith" to include lines like "} [aside-marker]".
    if (line.startsWith("}")) {
      _location = _location.popToDepth(0);
    } else if (line.startsWith("  }")) {
      _location = _location.popToDepth(1);
    } else if (line.startsWith("    }")) {
      _location = _location.popToDepth(2);
    }

    // If we reached a function declaration, not a definition, then it's done
    // after one line.
    if (_location.isFunctionDeclaration) {
      _location = _location.parent;
    }

    // Module variables are only a single line.
    if (_location.kind == "variable") {
      _location = _location.parent;
    }

    // Hack. There is a one-line class in Parser.java.
    if (line.contains("class ParseError")) {
      _location = _location.parent;
    }
  }

  /// Processes any [line] that changes what snippet the parser is currently in.
  ///
  /// Returns `true` if the line contained a snippet annotation.
  bool _updateState(String line) {
    var match = _blockPattern.firstMatch(line);
    if (match != null) {
      _push(
          startChapter: _book.findChapter(match[1]),
          startName: match[2],
          endChapter: _book.findChapter(match[3]),
          endName: match[4]);
      _locationBeforeBlock = _location;
      return true;
    }

    match = _blockSnippetPattern.firstMatch(line);
    if (match != null) {
      _push(endChapter: _currentState.start.chapter, endName: match[1]);
      _locationBeforeBlock = _location;
      return true;
    }

    if (line.trim() == "*/" && _currentState.end != null) {
      _location = _locationBeforeBlock;
      _pop();
      return true;
    }

    match = _beginSnippetPattern.firstMatch(line);
    if (match != null) {
      var name = match[1];
//        var tag = source_code.find_snippet_tag(state.start.chapter, name);
//        if tag < state.start:
//          error("Can't push earlier snippet {} from {}.".format(name, state.start.name))
//        elif tag == state.start:
//          error("Can't push to same snippet {}.".format(name))
      _push(startName: name);
      return true;
    }

    match = _endSnippetPattern.firstMatch(line);
    if (match != null) {
//      var name = match[1];
//        if name != state.start.name:
//          error("Expecting to pop {} but got {}.".format(state.start.name, name))
//        if state.parent.start.chapter == None:
//          error('Cannot pop last state {}.'.format(state.start))
      _pop();
      return true;
    }

    match = _beginChapterPattern.firstMatch(line);
    if (match != null) {
      var chapter = _book.findChapter(match[1]);
      var name = match[2];

//        if state.start != None:
//          old_chapter = book.chapter_number(state.start.chapter)
//          new_chapter = book.chapter_number(chapter)
//
//          if chapter == state.start.chapter and name == state.start.name:
//            error('Pushing same snippet "{} {}"'.format(chapter, name))
//          if chapter == state.start.chapter:
//            error('Pushing same chapter, just use "//>> {}"'.format(name))
//          if new_chapter < old_chapter:
//            error('Can\'t push earlier chapter "{}" from "{}".'.format(
//                chapter, state.start.chapter))
      _push(startChapter: chapter, startName: name);
      return true;
    }

    match = _endChapterPattern.firstMatch(line);
    if (match != null) {
//      var chapter = match[1];
//      var name = match[2];
//        if chapter != state.start.chapter or name != state.start.name:
//          error('Expecting to pop "{} {}" but got "{} {}".'.format(
//              state.start.chapter, state.start.name, chapter, name))
//        if state.start.chapter == None:
//          error('Cannot pop last state "{}".'.format(state.start))
      _pop();
      return true;
    }

    return false;
  }

  _ParseState get _currentState => _states.last;

  void _push(
      {Page startChapter, String startName, Page endChapter, String endName}) {
    startChapter ??= _currentState.start.chapter;

    CodeTag start;
    if (startName != null) {
      start = startChapter.findCodeTag(startName);
    } else {
      start = _currentState.start;
    }

    CodeTag end;
    if (endChapter != null) {
      end = endChapter.findCodeTag(endName);
    }

    _states.add(_ParseState(start, end));
  }

  void _pop() {
    _states.removeLast();
  }
}

class _ParseState {
  final CodeTag start;
  final CodeTag end;

  _ParseState(this.start, [this.end]);

  String toString() {
    if (end != null) return "_ParseState($start > $end)";
    return "_ParseState($start)";
  }
}


================================================
FILE: tool/lib/src/split_chapter.dart
================================================
import 'dart:io';

import 'package:glob/glob.dart';
import 'package:path/path.dart' as p;
import 'package:pool/pool.dart';

import 'package:tool/src/book.dart';
import 'package:tool/src/code_tag.dart';
import 'package:tool/src/page.dart';
import 'package:tool/src/source_file_parser.dart';

/// Don't do too many file operations at once or we risk running out of file
/// descriptors.
var _filePool = Pool(200);

Future<void> splitChapter(Book book, Page chapter, [CodeTag tag]) async {
  var futures = <Future<void>>[];

  for (var file in Glob("${chapter.language}/**.{c,h,java}").listSync()) {
    futures.add(_splitSourceFile(book, chapter, file.path, tag));
  }

  await Future.wait(futures);
}

Future<void> _splitSourceFile(Book book, Page chapter, String sourcePath,
    [CodeTag tag]) async {
  var relative = p.relative(sourcePath, from: chapter.language);

  // Don't split the generated files.
  if (relative == "com/craftinginterpreters/lox/Expr.java") return;
  if (relative == "com/craftinginterpreters/lox/Stmt.java") return;

  var package = chapter.shortName;
  if (tag != null) {
    package = p.join("snippets", package, tag.directory);
  }

  // If we're generating the split for an entire chapter, include all its
  // snippets.
  tag ??= book.lastSnippet(chapter).tag;

  var outputFile = File(p.join("gen", package, relative));

  var resource = await _filePool.request();
  try {
    var output = _generateSourceFile(book, chapter, sourcePath, tag);
    if (output.isNotEmpty) {
      // Don't overwrite the file if it didn't change, so the makefile doesn't
      // think it was touched.
      if (await outputFile.exists()) {
        var previous = await outputFile.readAsString();
        if (previous == output) return;
      }

      // Write the changed output.
      await Directory(p.dirname(outputFile.path)).create(recursive: true);
      await outputFile.writeAsString(output);
    } else {
      // Remove it since it's supposed to be nonexistent.
      if (await outputFile.exists()) await outputFile.delete();
    }
  } finally {
    resource.release();
  }
}

/// Gets the code for [sourceFilePath] as it appears at [tag] of [chapter].
String _generateSourceFile(
    Book book, Page chapter, String sourcePath, CodeTag tag) {
  var shortPath = p.relative(sourcePath, from: chapter.language);
  var sourceFile = SourceFileParser(book, sourcePath, shortPath).parse();

  var buffer = StringBuffer();
  for (var line in sourceFile.lines) {
    if (line.isPresent(tag)) {
      // Hack. In generate_ast.java, we split up a parameter list among
      // multiple chapters, which leads to hanging commas in some cases.
      // Remove them.
      if (line.text.trim().startsWith(")")) {
        var text = buffer.toString();
        if (text.endsWith(",\n")) {
          buffer.clear();
          buffer.writeln(text.substring(0, text.length - 2));
        }
      }

      buffer.writeln(line.text);
    }
  }

  return buffer.toString();
}


================================================
FILE: tool/lib/src/syntax/grammar.dart
================================================
import 'language.dart';
import 'rule.dart';

final languages = {
  "c": c,
  "c++": cpp,
  "ebnf": ebnf,
  "java": java,
  "js": js,
  "lisp": lisp,
  "lox": lox,
  // TODO: This is just enough for the one line in "scanning". Do more if
  // needed.
  "lua": Language(rules: _commonRules),
  "python": python,
  "ruby": ruby,
};

final c = Language(
  keywords: _cKeywords,
  types: "bool char double FILE int size_t uint16_t uint32_t uint64_t uint8_t "
      "uintptr_t va_list void",
  rules: _cRules,
);

final cpp = Language(
  keywords: _cKeywords,
  types: "vector string",
  rules: _cRules,
);

final ebnf = Language(
  rules: [
    // Color ALL_CAPS terminals like types to make them distinct.
    Rule(r"[A-Z][A-Z0-9_]+", "t"),
    ..._commonRules
  ],
);

final java = Language(
  keywords: "abstract assert break case catch class const continue default do "
      "else enum extends false final finally for goto if implements import "
      "instanceof interface native new null package private protected public "
      "return static strictfp super switch synchronized this throw throws "
      "transient true try volatile while",
  types: "boolean byte char double float int long short void",
  rules: [
    // Import.
    Rule.capture(r"(import)(\s+)(\w+(?:\.\w+)*)(;)", ["k", "", "i", ""]),
    // Static import.
    Rule.capture(r"(import\s+static?)(\s+)(\w+(?:\.\w+)*(?:\.\*)?)(;)",
        ["k", "", "i", ""]),
    // Package.
    Rule.capture(r"(package)(\s+)(\w+(?:\.\w+)*)(;)", ["k", "", "i", ""]),
    // Annotation.
    Rule(r"@[a-zA-Z_][a-zA-Z0-9_]*", "a"),

    // ALL_CAPS constant names are colored like normal identifiers. We give
    // them their own rule so that it matches before the capitalized type name
    // rule.
    Rule(r"[A-Z][A-Z0-9_]+\b", "i"),

    ..._commonRules,
    _characterRule,
  ],
);

final js = Language(
  keywords: "break case catch class const continue debugger default delete do "
      "else export extends finally for function if import in instanceof let "
      "new return super switch this throw try typeof var void while with yield",
  rules: _commonRules,
);

final lisp = Language(
  rules: [
    // TODO: Other punctuation characters.
    Rule(r"[a-zA-Z0-9_-]+", "i"),
  ],
);

final lox = Language(
  keywords: "and class else false fun for if nil or print return super this "
      "true var while",
  rules: _commonRules,
);

final python = Language(
  keywords: "and as assert break class continue def del elif else except "
      "exec finally for from global if import in is lambda not or pass "
      "print raise range return try while with yield",
  rules: _commonRules,
);

final ruby = Language(
  keywords: "__LINE__ _ENCODING__ __FILE__ BEGIN END alias and begin break "
      "case class def defined? do else elsif end ensure false for if in lambda "
      "module next nil not or redo rescue retry return self super then true "
      "undef unless until when while yield",
  rules: _commonRules,
);

final _cKeywords =
    "break case const continue default do else enum extern false for goto if "
    "inline return sizeof static struct switch true typedef union while";

final _cRules = [
  // Preprocessor with comment.
  Rule.capture(r"(#.*?)(//.*)", ["a", "c"]),

  // Preprocessor.
  Rule(r"#.*", "a"),

  // ALL_CAPS preprocessor macro use.
  Rule(r"[A-Z][A-Z0-9_]+", "a"),

  ..._commonRules,
  _characterRule,
];

// TODO: Multi-character escapes?
final _characterRule = Rule(r"'\\?.'", "s");

final _commonRules = [
  StringRule(),

  Rule(r"[0-9]+\.[0-9]+f?", "n"), // Float.
  Rule(r"0x[0-9a-fA-F]+", "n"), // Hex integer.
  Rule(r"[0-9]+[Lu]?", "n"), // Integer.

  Rule(r"//.*", "c"), // Line comment.

  // Capitalized type name.
  Rule(r"[A-Z][A-Za-z0-9_]*", "t"),

  // Other identifiers or keywords.
  IdentifierRule(),
];


================================================
FILE: tool/lib/src/syntax/highlighter.dart
================================================
import 'package:charcode/ascii.dart';
import 'package:string_scanner/string_scanner.dart';

import '../format.dart';
import '../term.dart' as term;
import 'grammar.dart' as grammar;
import 'language.dart';

const _maxLineLength = 67;

/// Takes a string of source code and returns a block of HTML with spans for
/// syntax highlighting.
///
/// Wraps the result in a <pre> tag with the given [preClass].
String formatCode(String language, List<String> lines, Format format,
    {String preClass, int indent = 0}) {
  return Highlighter(language, format)._highlight(lines, preClass, indent);
}

void checkLineLength(String line) {
  final asideCommentPattern = RegExp(r' +// \[([-a-z0-9]+)\]');
  final asideWithCommentPattern = RegExp(r' +// (.+) \[([-a-z0-9]+)\]');

  line = line.replaceAll(asideCommentPattern, '');
  line = line.replaceAll(asideWithCommentPattern, '');

  if (line.length <= _maxLineLength) return;

  print(line.substring(0, _maxLineLength) +
      term.red(line.substring(_maxLineLength)));
}

class Highlighter {
  final Format _format;
  final StringBuffer _buffer = StringBuffer();
  StringScanner scanner;
  final Language language;

  /// Whether we are in a multi-line macro started on a previous line.
  bool _inMacro = false;

  Highlighter(String language, this._format)
      : language = grammar.languages[language] ??
            (throw "Unknown language '$language'.");

  String _highlight(List<String> lines, String preClass, int indent) {
    if (!_format.isPrint) {
      _buffer.write("<pre");
      if (preClass != null) _buffer.write(' class="$preClass"');
      _buffer.write(">");

      // The HTML spec mandates that a leading newline after '<pre>' is ignored.
      // https://html.spec.whatwg.org/#element-restrictions
      // Some snippets deliberately start with a newline which needs to be
      // preserved, so output an extra (discarded) newline in that case.
      if (_format.isWeb && lines.first.isEmpty) _buffer.writeln();
    }

    for (var line in lines) {
      _scanLine(line, indent);
    }

    if (!_format.isPrint) _buffer.write("</pre>");

    return _buffer.toString();
  }

  void _scanLine(String line, int indent) {
    if (line.trim().isEmpty) {
      _buffer.writeln();
      return;
    }

    // If the entire code block is indented, remove that indentation from the
    // code lines.
    if (line.length > indent) line = line.substring(indent);

    checkLineLength(line);

    // Hackish. If the line ends with `\`, then it is a multi-line macro
    // definition and we want to highlight subsequent lines like preprocessor
    // code too.
    if (language == grammar.c && line.endsWith("\\")) _inMacro = true;

    if (_inMacro) {
      writeToken("a", line);
    } else {
      scanner = StringScanner(line);
      while (!scanner.isDone) {
        var found = false;
        for (var rule in language.rules) {
          if (rule.apply(this)) {
            found = true;
            break;
          }
        }

        if (!found) _writeChar(scanner.readChar());
      }
    }

    if (_inMacro && !line.endsWith("\\")) _inMacro = false;

    _buffer.writeln();
  }

  void writeToken(String type, [String text]) {
    text ??= scanner.lastMatch[0];

    if (_format.isPrint) {
      // Only highlight keywords and comments in XML.
      var tag = {"k": "keyword", "c": "comment"}[type];

      if (tag != null) _buffer.write("<$tag>");
      writeText(text);
      if (tag != null) _buffer.write("</$tag>");
    } else {
      _buffer.write('<span class="$type">');
      writeText(text);
      _buffer.write('</span>');
    }
  }

  void writeText(String string) {
    for (var i = 0; i < string.length; i++) {
      _writeChar(string.codeUnitAt(i));
    }
  }

  void _writeChar(int char) {
    switch (char) {
      case $less_than:
        _buffer.write("&lt;");
        break;
      case $greater_than:
        _buffer.write("&gt;");
        break;
      case $single_quote:
        _buffer.write("&#39;");
        break;
      case $double_quote:
        _buffer.write("&quot;");
        break;
      case $ampersand:
        _buffer.write("&amp;");
        break;
      default:
        _buffer.writeCharCode(char);
    }
  }
}


================================================
FILE: tool/lib/src/syntax/language.dart
================================================
import 'rule.dart';

/// Defines the syntax rules for a single programming language.
class Language {
  final Map<String, String> words = {};
  final List<Rule> rules;

  Language({String keywords, String types, List<Rule> this.rules}) {
    keywordType(String wordList, String type) {
      if (wordList == null) return;
      for (var word in wordList.split(" ")) {
        words[word] = type;
      }
    }

    keywordType(keywords, "k");
    keywordType(types, "t");
  }
}


================================================
FILE: tool/lib/src/syntax/rule.dart
================================================
import 'package:charcode/ascii.dart';

import 'highlighter.dart';

abstract class Rule {
  final RegExp pattern;

  factory Rule(String pattern, String tokenType) =>
      SimpleRule(pattern, tokenType);

  factory Rule.capture(String pattern, List<String> tokenTypes) =>
      CaptureRule(pattern, tokenTypes);

  Rule._(String pattern) : pattern = RegExp(pattern);

  bool apply(Highlighter highlighter) {
    if (!highlighter.scanner.scan(pattern)) return false;
    applyRule(highlighter);
    return true;
  }

  void applyRule(Highlighter highlighter);
}

/// Parses a single regex and outputs the entire matched text as a single token
/// with the given [tokenType].
class SimpleRule extends Rule {
  final String tokenType;

  SimpleRule(String pattern, this.tokenType) : super._(pattern);

  void applyRule(Highlighter highlighter) {
    highlighter.writeToken(tokenType);
  }
}

/// Parses a single regex where each capture group has a corresponding token
/// type. If the type is `""` for some group, the matched string text is output
/// as plain text.
class CaptureRule extends Rule {
  final List<String> tokenTypes;

  CaptureRule(String pattern, this.tokenTypes) : super._(pattern);

  void applyRule(Highlighter highlighter) {
    var match = highlighter.scanner.lastMatch;
    for (var i = 0; i < tokenTypes.length; i++) {
      var type = tokenTypes[i];
      if (type.isNotEmpty) {
        highlighter.writeToken(type, match[i + 1]);
      } else {
        highlighter.writeText(match[i + 1]);
      }
    }
  }
}

/// Parses string literals and the escape codes inside them.
class StringRule extends Rule {
  static final _escapePattern = RegExp(r"\\.");

  StringRule() : super._('"');

  void applyRule(Highlighter highlighter) {
    var scanner = highlighter.scanner;
    var start = scanner.position - 1;

    while (!scanner.isDone) {
      if (scanner.scan(_escapePattern)) {
        if (scanner.position > start) {
          highlighter.writeToken(
              "s", scanner.substring(start, scanner.position - 2));
        }
        highlighter.writeToken("e");
        start = scanner.position;
      } else if (scanner.scanChar($double_quote)) {
        highlighter.writeToken("s", scanner.substring(start, scanner.position));
        return;
      } else {
        scanner.position++;
      }
    }

    // Error: Unterminated string.
    highlighter.writeToken("err", scanner.substring(start, scanner.position));
  }
}

/// Parses an identifier and resolves keywords for their token type.
class IdentifierRule extends Rule {
  IdentifierRule() : super._(r"[a-zA-Z_][a-zA-Z0-9_]*");

  void applyRule(Highlighter highlighter) {
    var identifier = highlighter.scanner.lastMatch[0];
    var type = highlighter.language.words[identifier] ?? "i";
    highlighter.writeToken(type);
  }
}


================================================
FILE: tool/lib/src/term.dart
================================================
/// Utilities for printing to the terminal.
import 'dart:io';

final _cyan = _ansi('\u001b[36m');
final _gray = _ansi('\u001b[1;30m');
final _green = _ansi('\u001b[32m');
final _magenta = _ansi('\u001b[35m');
final _pink = _ansi('\u001b[91m');
final _red = _ansi('\u001b[31m');
final _yellow = _ansi('\u001b[33m');
final _none = _ansi('\u001b[0m');
final _resetColor = _ansi('\u001b[39m');

String cyan(Object message) => "$_cyan$message$_none";
String gray(Object message) => "$_gray$message$_none";
String green(Object message) => "$_green$message$_resetColor";
String magenta(Object message) => "$_magenta$message$_resetColor";
String pink(Object message) => "$_pink$message$_resetColor";
String red(Object message) => "$_red$message$_resetColor";
String yellow(Object message) => "$_yellow$message$_resetColor";

void clearLine() {
  if (_allowAnsi) {
    stdout.write("\u001b[2K\r");
  } else {
    print("");
  }
}

void writeLine([String line]) {
  clearLine();
  if (line != null) stdout.write(line);
}

bool get _allowAnsi =>
    !Platform.isWindows && stdioType(stdout) == StdioType.terminal;

String _ansi(String special, [String fallback = '']) =>
    _allowAnsi ? special : fallback;


================================================
FILE: tool/lib/src/text.dart
================================================
import 'dart:convert';
import 'dart:math' as math;

/// Punctuation characters removed from file names and anchors.
final _punctuation = RegExp(r'[,.?!:' "'" '/"()]');

final _whitespace = RegExp(r"\s+");

/// Converts [text] to a string suitable for use as a file or anchor name.
String toFileName(String text) {
  if (text == "Crafting Interpreters") return "index";
  if (text == "Table of Contents") return "contents";

  // Hack. The introduction has a *subheader* named "Challenges" distinct from
  // the challenges section. This function here is also used to generate the
  // anchor names for the links, so handle that one specially so it doesn't
  // collide with the real "Challenges" section.
  if (text == "Challenges") return "challenges_";

  return text.toLowerCase().replaceAll(" ", "-").replaceAll(_punctuation, "");
}

/// Returns the length of the longest line in lines, or [longest], whichever
/// is longer.
int longestLine(int longest, Iterable<String> lines) {
  for (var line in lines) {
    longest = math.max(longest, line.length);
  }
  return longest;
}

String pluralize<T>(Iterable<T> sequence) {
  if (sequence.length == 1) return "";
  return "s";
}

extension IntExtensions on int {
  /// Convert n to roman numerals.
  String get roman {
    if (this <= 3) return "I" * this;
    if (this == 4) return "IV";
    if (this < 10) return "V" + "I" * (this - 5);

    throw ArgumentError("Can't convert $this to Roman.");
  }

  /// Make a nicely formatted string.
  String get withCommas {
    if (this > 1000) return "${this ~/ 1000},${this % 1000}";
    return toString();
  }
}

extension StringExtensions on String {
  /// Use nicer HTML entities and special characters.
  String get pretty {
    return this
        .replaceAll("à", "&agrave;")
        .replaceAll("ï", "&iuml;")
        .replaceAll("ø", "&oslash;")
        .replaceAll("æ", "&aelig;");
  }

  String get escapeHtml =>
      const HtmlEscape(HtmlEscapeMode.attribute).convert(this);

  int get wordCount => split(_whitespace).length;

  /// Removes a single newline from the end of the string.
  String trimTrailingNewline() {
    if (endsWith("\n")) return substring(0, length - 1);
    return this;
  }
}


================================================
FILE: tool/pubspec.yaml
================================================
name: tool
publish_to: none
environment:
  sdk: '>2.11.0 <3.0.0'
dependencies:
  args: ^1.6.0
  charcode: ^1.1.3
  glob: ^1.2.0
  image: ^2.1.19
  markdown: ^2.1.3
  mime_type: ^0.3.0
  mustache_template: ^1.0.0
  path: ^1.7.0
  pool: ^1.4.0
  sass: ^1.26.5
  shelf: ^0.7.5
  string_scanner: ^1.0.5


================================================
FILE: util/c.make
================================================
# Makefile for building a single configuration of the C interpreter. It expects
# variables to be passed in for:
#
# MODE         "debug" or "release".
# NAME         Name of the output executable (and object file directory).
# SOURCE_DIR   Directory where source files and headers are found.

ifeq ($(CPP),true)
	# Ideally, we'd add -pedantic-errors, but the use of designated initializers
	# means clox relies on some GCC/Clang extensions to compile as C++.
	CFLAGS := -std=c++11
	C_LANG := -x c++
else
	CFLAGS := -std=c99
endif

CFLAGS += -Wall -Wextra -Werror -Wno-unused-parameter

# If we're building at a point in the middle of a chapter, don't fail if there
# are functions that aren't used yet.
ifeq ($(SNIPPET),true)
	CFLAGS += -Wno-unused-function
endif

# Mode configuration.
ifeq ($(MODE),debug)
	CFLAGS += -O0 -DDEBUG -g
	BUILD_DIR := build/debug
else
	CFLAGS += -O3 -flto
	BUILD_DIR := build/release
endif

# Files.
HEADERS := $(wildcard $(SOURCE_DIR)/*.h)
SOURCES := $(wildcard $(SOURCE_DIR)/*.c)
OBJECTS := $(addprefix $(BUILD_DIR)/$(NAME)/, $(notdir $(SOURCES:.c=.o)))

# Targets ---------------------------------------------------------------------

# Link the interpreter.
build/$(NAME): $(OBJECTS)
	@ printf "%8s %-40s %s\n" $(CC) $@ "$(CFLAGS)"
	@ mkdir -p build
	@ $(CC) $(CFLAGS) $^ -o $@

# Compile object files.
$(BUILD_DIR)/$(NAME)/%.o: $(SOURCE_DIR)/%.c $(HEADERS)
	@ printf "%8s %-40s %s\n" $(CC) $< "$(CFLAGS)"
	@ mkdir -p $(BUILD_DIR)/$(NAME)
	@ $(CC) -c $(C_LANG) $(CFLAGS) -o $@ $<

.PHONY: default


================================================
FILE: util/intellij/chap04_read.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap04_framework">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap04_framework" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap05_scanning.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap05_scanning">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap05_scanning" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap06_representing.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap06_representing">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap06_representing" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap07_parsing.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap07_parsing">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap07_parsing" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap08_evaluating.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap08_evaluating">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap08_evaluating" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap09_statements.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap09_statements">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap09_statements" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap10_control.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap10_control">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap10_control" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap11_functions.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap11_functions">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap11_functions" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap12_resolving.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap12_resolving">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap12_resolving" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap13_classes.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap13_classes">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap13_classes" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/chap14_inheritance.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/chap14_inheritance">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/chap14_inheritance" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/intellij.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$">
      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/jlox.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../java">
      <sourceFolder url="file://$MODULE_DIR$/../../java" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/section_test.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="false">
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/intellij/snippet_test.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
    <content url="file://$MODULE_DIR$/../../gen/snippet_test">
      <sourceFolder url="file://$MODULE_DIR$/../../gen/snippet_test" isTestSource="false" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>

================================================
FILE: util/java.make
================================================
# Makefile for building a single directory of Java source files. It requires
# a DIR variable to be set.

BUILD_DIR := build

SOURCES := $(wildcard $(DIR)/com/craftinginterpreters/$(PACKAGE)/*.java)
CLASSES := $(addprefix $(BUILD_DIR)/, $(SOURCES:.java=.class))

JAVA_OPTIONS := -Werror

default: $(CLASSES)
	@: # Don't show "Nothing to be done" output.

# Compile a single .java file to .class.
$(BUILD_DIR)/$(DIR)/%.class: $(DIR)/%.java
	@ mkdir -p $(BUILD_DIR)/$(DIR)
	@ javac -cp $(DIR) -d $(BUILD_DIR)/$(DIR) $(JAVA_OPTIONS) -implicit:none $<
	@ printf "%8s %-60s %s\n" javac $< "$(JAVA_OPTIONS)"

.PHONY: default