Repository: elm-city-craftworks/practicing-ruby-manuscripts
Branch: master
Commit: b675b2cf9eab
Files: 114
Total size: 1.7 MB

Directory structure:
gitextract_ccqrdc7w/

├── .gitignore
├── AUTHORS.md
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── articles/
│   ├── v1/
│   │   ├── 001-method-lookup.md
│   │   ├── 002-method-lookup.md
│   │   ├── 003-configurable.md
│   │   ├── 004-configurable.md
│   │   ├── 005-testing-antipatterns.md
│   │   ├── 006-good-and-bad-code.md
│   │   ├── 007-good-and-bad-code.md
│   │   ├── 008-uses-for-modules.md
│   │   ├── 009-uses-for-modules.md
│   │   ├── 010-uses-for-modules.md
│   │   ├── 010a-uses-for-modules.md
│   │   ├── 011-uses-for-modules.md
│   │   ├── 012-rapid-prototyping.md
│   │   ├── 013-obfuscations.md
│   │   ├── 014-duck-typing.md
│   │   ├── 015-duck-typing.md
│   │   ├── 016-interesting-ruby-hackers.md
│   │   ├── 017-interesting-ruby-writers.md
│   │   ├── 018-testing-dogma.md
│   │   ├── 019-thoughts-on-mocking.md
│   │   ├── 020-thoughts-on-mocking.md
│   │   ├── 021-how-to-practice.md
│   │   ├── 022-how-to-practice.md
│   │   ├── 023-solid-design.md
│   │   ├── 024-connascence.md
│   │   ├── 025-creational-design-patterns.md
│   │   ├── 026-structural-design-patterns.md
│   │   └── README.md
│   ├── v2/
│   │   ├── 001-ways-to-load-code.md
│   │   ├── 002-sticky-problems.md
│   │   ├── 003-closure-double-edged-sword.md
│   │   ├── 004-roll-your-own-enumerable-and-enumerator.md
│   │   ├── 005-regression-testing.md
│   │   ├── 006-learning-new-things-step-by-step.md
│   │   ├── 007-unobtrusive-ruby.md
│   │   ├── 008-singleton-pattern.md
│   │   ├── 009-unix-style-command-line-applications.md
│   │   ├── 010-from-requirements-discovery-to-release.md
│   │   ├── 011-domain-specific-api-construction.md
│   │   ├── 012-working-with-binary-file-formats.md
│   │   ├── 014-arguments-and-results-1.md
│   │   ├── 015-arguments-and-results-2.md
│   │   └── README.md
│   ├── v3/
│   │   ├── 001-quality-software.md
│   │   ├── 002-building-excellent-examples.md
│   │   ├── 003-turing-tarpit.md
│   │   ├── 004-climbing-the-spiral-staircase-of-refactoring.md
│   │   ├── 005-framework-design-and-implementation.md
│   │   ├── 006-framework-design-and-implementation.md
│   │   ├── 007-criteria-for-disciplined-inheritance.md
│   │   ├── 008-criteria-for-disciplined-inheritance.md
│   │   ├── 009-using-games-to-practice-domain-modeling.md
│   │   ├── 010-robustness.md
│   │   └── README.md
│   ├── v4/
│   │   ├── 001-testing-costs-benefits.md
│   │   ├── 002-moving-beyond-the-first-release.md
│   │   ├── 004-cheap-counterfeits-jekyll.md
│   │   ├── 005-scoring-predictions-kata.md
│   │   ├── 006-persisting-relations.md
│   │   ├── 007-confident-ruby.md
│   │   ├── 008-implementing-active-record.md
│   │   ├── 009-the-hidden-costs-of-inheritance.md
│   │   ├── 010-implementing-active-record.md
│   │   ├── 011-responsibility-vs-data-driven.md
│   │   ├── 012-tdd-lessons-learned.md
│   │   ├── 012.1-tdd-lessons-learned-lesson-1.md
│   │   ├── 012.2-tdd-lessons-learned-lesson-2.md
│   │   ├── 012.3-tdd-lessons-learned-lesson-3.md
│   │   ├── 012.4-tdd-lessons-learned-lesson-4.md
│   │   └── README.md
│   ├── v5/
│   │   ├── 001-statistical-modeling.md
│   │   ├── 002-rocket-science-and-lod.md
│   │   ├── 003-evented-io.md
│   │   ├── 004-service-apis.md
│   │   ├── 005-process-spawning-patterns.md
│   │   ├── 006-service-quality.md
│   │   ├── 007-dependencies-notifications-and-adjustments.md
│   │   ├── 008-clojure.md
│   │   ├── 009-ants.md
│   │   ├── 010-prototypes.md
│   │   └── README.md
│   ├── v6/
│   │   ├── 001-parsing-json.md
│   │   ├── 002-code-reading.md
│   │   ├── 003-actors.md
│   │   ├── 004-code-reuse.md
│   │   ├── 005-debugging.md
│   │   ├── 006-recipes-method-bloat.md
│   │   ├── 007-demeter.md
│   │   ├── 008-procedural-to-oop.md
│   │   ├── 009-isolate-responsibilities.md
│   │   ├── 010-concept-before-code.md
│   │   └── README.md
│   ├── v7/
│   │   ├── 001-simulating-tiny-worlds.md
│   │   ├── 002-http-server.md
│   │   ├── 003-stats.md
│   │   ├── 004-incremental.md
│   │   ├── 005-low-level.md
│   │   ├── 006-infrastructure.md
│   │   ├── 007-wumpus.md
│   │   ├── 008-language-learning.md
│   │   ├── 008-oss-quality.md
│   │   ├── 009-course1.md
│   │   ├── 010-information.md
│   │   └── README.md
│   └── v8/
│       ├── 001-problem-discovery.md
│       ├── 002-formula-processing.md
│       └── README.md
└── templates/
    └── default.html.erb

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
articles-html


================================================
FILE: AUTHORS.md
================================================
The articles within this repository are copyrighted works, released under a
creative commons license by their respective authors. Contributors to these
articles retain their own copyright, but must agree to release their content
under the same license as the original source materials.

* All articles in Practicing Ruby volume 1 and volume 2 were written 
by Gregory Brown ( gregory@practicingruby.com ).


================================================
FILE: Gemfile
================================================
source 'https://rubygems.org'

gem "rake"
gem "kramdown"
gem "coderay" 


================================================
FILE: LICENSE.txt
================================================
Creative Commons Legal Code

Attribution-ShareAlike 3.0 Unported

    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
    LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN
    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
    REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR
    DAMAGES RESULTING FROM ITS USE.

License

THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE
COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY
COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS
AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED.

BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE
TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY
BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS
CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND
CONDITIONS.

1. Definitions

 a. "Adaptation" means a work based upon the Work, or upon the Work and
    other pre-existing works, such as a translation, adaptation,
    derivative work, arrangement of music or other alterations of a
    literary or artistic work, or phonogram or performance and includes
    cinematographic adaptations or any other form in which the Work may be
    recast, transformed, or adapted including in any form recognizably
    derived from the original, except that a work that constitutes a
    Collection will not be considered an Adaptation for the purpose of
    this License. For the avoidance of doubt, where the Work is a musical
    work, performance or phonogram, the synchronization of the Work in
    timed-relation with a moving image ("synching") will be considered an
    Adaptation for the purpose of this License.
 b. "Collection" means a collection of literary or artistic works, such as
    encyclopedias and anthologies, or performances, phonograms or
    broadcasts, or other works or subject matter other than works listed
    in Section 1(f) below, which, by reason of the selection and
    arrangement of their contents, constitute intellectual creations, in
    which the Work is included in its entirety in unmodified form along
    with one or more other contributions, each constituting separate and
    independent works in themselves, which together are assembled into a
    collective whole. A work that constitutes a Collection will not be
    considered an Adaptation (as defined below) for the purposes of this
    License.
 c. "Creative Commons Compatible License" means a license that is listed
    at http://creativecommons.org/compatiblelicenses that has been
    approved by Creative Commons as being essentially equivalent to this
    License, including, at a minimum, because that license: (i) contains
    terms that have the same purpose, meaning and effect as the License
    Elements of this License; and, (ii) explicitly permits the relicensing
    of adaptations of works made available under that license under this
    License or a Creative Commons jurisdiction license with the same
    License Elements as this License.
 d. "Distribute" means to make available to the public the original and
    copies of the Work or Adaptation, as appropriate, through sale or
    other transfer of ownership.
 e. "License Elements" means the following high-level license attributes
    as selected by Licensor and indicated in the title of this License:
    Attribution, ShareAlike.
 f. "Licensor" means the individual, individuals, entity or entities that
    offer(s) the Work under the terms of this License.
 g. "Original Author" means, in the case of a literary or artistic work,
    the individual, individuals, entity or entities who created the Work
    or if no individual or entity can be identified, the publisher; and in
    addition (i) in the case of a performance the actors, singers,
    musicians, dancers, and other persons who act, sing, deliver, declaim,
    play in, interpret or otherwise perform literary or artistic works or
    expressions of folklore; (ii) in the case of a phonogram the producer
    being the person or legal entity who first fixes the sounds of a
    performance or other sounds; and, (iii) in the case of broadcasts, the
    organization that transmits the broadcast.
 h. "Work" means the literary and/or artistic work offered under the terms
    of this License including without limitation any production in the
    literary, scientific and artistic domain, whatever may be the mode or
    form of its expression including digital form, such as a book,
    pamphlet and other writing; a lecture, address, sermon or other work
    of the same nature; a dramatic or dramatico-musical work; a
    choreographic work or entertainment in dumb show; a musical
    composition with or without words; a cinematographic work to which are
    assimilated works expressed by a process analogous to cinematography;
    a work of drawing, painting, architecture, sculpture, engraving or
    lithography; a photographic work to which are assimilated works
    expressed by a process analogous to photography; a work of applied
    art; an illustration, map, plan, sketch or three-dimensional work
    relative to geography, topography, architecture or science; a
    performance; a broadcast; a phonogram; a compilation of data to the
    extent it is protected as a copyrightable work; or a work performed by
    a variety or circus performer to the extent it is not otherwise
    considered a literary or artistic work.
 i. "You" means an individual or entity exercising rights under this
    License who has not previously violated the terms of this License with
    respect to the Work, or who has received express permission from the
    Licensor to exercise rights under this License despite a previous
    violation.
 j. "Publicly Perform" means to perform public recitations of the Work and
    to communicate to the public those public recitations, by any means or
    process, including by wire or wireless means or public digital
    performances; to make available to the public Works in such a way that
    members of the public may access these Works from a place and at a
    place individually chosen by them; to perform the Work to the public
    by any means or process and the communication to the public of the
    performances of the Work, including by public digital performance; to
    broadcast and rebroadcast the Work by any means including signs,
    sounds or images.
 k. "Reproduce" means to make copies of the Work by any means including
    without limitation by sound or visual recordings and the right of
    fixation and reproducing fixations of the Work, including storage of a
    protected performance or phonogram in digital form or other electronic
    medium.

2. Fair Dealing Rights. Nothing in this License is intended to reduce,
limit, or restrict any uses free from copyright or rights arising from
limitations or exceptions that are provided for in connection with the
copyright protection under copyright law or other applicable laws.

3. License Grant. Subject to the terms and conditions of this License,
Licensor hereby grants You a worldwide, royalty-free, non-exclusive,
perpetual (for the duration of the applicable copyright) license to
exercise the rights in the Work as stated below:

 a. to Reproduce the Work, to incorporate the Work into one or more
    Collections, and to Reproduce the Work as incorporated in the
    Collections;
 b. to create and Reproduce Adaptations provided that any such Adaptation,
    including any translation in any medium, takes reasonable steps to
    clearly label, demarcate or otherwise identify that changes were made
    to the original Work. For example, a translation could be marked "The
    original work was translated from English to Spanish," or a
    modification could indicate "The original work has been modified.";
 c. to Distribute and Publicly Perform the Work including as incorporated
    in Collections; and,
 d. to Distribute and Publicly Perform Adaptations.
 e. For the avoidance of doubt:

     i. Non-waivable Compulsory License Schemes. In those jurisdictions in
        which the right to collect royalties through any statutory or
        compulsory licensing scheme cannot be waived, the Licensor
        reserves the exclusive right to collect such royalties for any
        exercise by You of the rights granted under this License;
    ii. Waivable Compulsory License Schemes. In those jurisdictions in
        which the right to collect royalties through any statutory or
        compulsory licensing scheme can be waived, the Licensor waives the
        exclusive right to collect such royalties for any exercise by You
        of the rights granted under this License; and,
   iii. Voluntary License Schemes. The Licensor waives the right to
        collect royalties, whether individually or, in the event that the
        Licensor is a member of a collecting society that administers
        voluntary licensing schemes, via that society, from any exercise
        by You of the rights granted under this License.

The above rights may be exercised in all media and formats whether now
known or hereafter devised. The above rights include the right to make
such modifications as are technically necessary to exercise the rights in
other media and formats. Subject to Section 8(f), all rights not expressly
granted by Licensor are hereby reserved.

4. Restrictions. The license granted in Section 3 above is expressly made
subject to and limited by the following restrictions:

 a. You may Distribute or Publicly Perform the Work only under the terms
    of this License. You must include a copy of, or the Uniform Resource
    Identifier (URI) for, this License with every copy of the Work You
    Distribute or Publicly Perform. You may not offer or impose any terms
    on the Work that restrict the terms of this License or the ability of
    the recipient of the Work to exercise the rights granted to that
    recipient under the terms of the License. You may not sublicense the
    Work. You must keep intact all notices that refer to this License and
    to the disclaimer of warranties with every copy of the Work You
    Distribute or Publicly Perform. When You Distribute or Publicly
    Perform the Work, You may not impose any effective technological
    measures on the Work that restrict the ability of a recipient of the
    Work from You to exercise the rights granted to that recipient under
    the terms of the License. This Section 4(a) applies to the Work as
    incorporated in a Collection, but this does not require the Collection
    apart from the Work itself to be made subject to the terms of this
    License. If You create a Collection, upon notice from any Licensor You
    must, to the extent practicable, remove from the Collection any credit
    as required by Section 4(c), as requested. If You create an
    Adaptation, upon notice from any Licensor You must, to the extent
    practicable, remove from the Adaptation any credit as required by
    Section 4(c), as requested.
 b. You may Distribute or Publicly Perform an Adaptation only under the
    terms of: (i) this License; (ii) a later version of this License with
    the same License Elements as this License; (iii) a Creative Commons
    jurisdiction license (either this or a later license version) that
    contains the same License Elements as this License (e.g.,
    Attribution-ShareAlike 3.0 US)); (iv) a Creative Commons Compatible
    License. If you license the Adaptation under one of the licenses
    mentioned in (iv), you must comply with the terms of that license. If
    you license the Adaptation under the terms of any of the licenses
    mentioned in (i), (ii) or (iii) (the "Applicable License"), you must
    comply with the terms of the Applicable License generally and the
    following provisions: (I) You must include a copy of, or the URI for,
    the Applicable License with every copy of each Adaptation You
    Distribute or Publicly Perform; (II) You may not offer or impose any
    terms on the Adaptation that restrict the terms of the Applicable
    License or the ability of the recipient of the Adaptation to exercise
    the rights granted to that recipient under the terms of the Applicable
    License; (III) You must keep intact all notices that refer to the
    Applicable License and to the disclaimer of warranties with every copy
    of the Work as included in the Adaptation You Distribute or Publicly
    Perform; (IV) when You Distribute or Publicly Perform the Adaptation,
    You may not impose any effective technological measures on the
    Adaptation that restrict the ability of a recipient of the Adaptation
    from You to exercise the rights granted to that recipient under the
    terms of the Applicable License. This Section 4(b) applies to the
    Adaptation as incorporated in a Collection, but this does not require
    the Collection apart from the Adaptation itself to be made subject to
    the terms of the Applicable License.
 c. If You Distribute, or Publicly Perform the Work or any Adaptations or
    Collections, You must, unless a request has been made pursuant to
    Section 4(a), keep intact all copyright notices for the Work and
    provide, reasonable to the medium or means You are utilizing: (i) the
    name of the Original Author (or pseudonym, if applicable) if supplied,
    and/or if the Original Author and/or Licensor designate another party
    or parties (e.g., a sponsor institute, publishing entity, journal) for
    attribution ("Attribution Parties") in Licensor's copyright notice,
    terms of service or by other reasonable means, the name of such party
    or parties; (ii) the title of the Work if supplied; (iii) to the
    extent reasonably practicable, the URI, if any, that Licensor
    specifies to be associated with the Work, unless such URI does not
    refer to the copyright notice or licensing information for the Work;
    and (iv) , consistent with Ssection 3(b), in the case of an
    Adaptation, a credit identifying the use of the Work in the Adaptation
    (e.g., "French translation of the Work by Original Author," or
    "Screenplay based on original Work by Original Author"). The credit
    required by this Section 4(c) may be implemented in any reasonable
    manner; provided, however, that in the case of a Adaptation or
    Collection, at a minimum such credit will appear, if a credit for all
    contributing authors of the Adaptation or Collection appears, then as
    part of these credits and in a manner at least as prominent as the
    credits for the other contributing authors. For the avoidance of
    doubt, You may only use the credit required by this Section for the
    purpose of attribution in the manner set out above and, by exercising
    Your rights under this License, You may not implicitly or explicitly
    assert or imply any connection with, sponsorship or endorsement by the
    Original Author, Licensor and/or Attribution Parties, as appropriate,
    of You or Your use of the Work, without the separate, express prior
    written permission of the Original Author, Licensor and/or Attribution
    Parties.
 d. Except as otherwise agreed in writing by the Licensor or as may be
    otherwise permitted by applicable law, if You Reproduce, Distribute or
    Publicly Perform the Work either by itself or as part of any
    Adaptations or Collections, You must not distort, mutilate, modify or
    take other derogatory action in relation to the Work which would be
    prejudicial to the Original Author's honor or reputation. Licensor
    agrees that in those jurisdictions (e.g. Japan), in which any exercise
    of the right granted in Section 3(b) of this License (the right to
    make Adaptations) would be deemed to be a distortion, mutilation,
    modification or other derogatory action prejudicial to the Original
    Author's honor and reputation, the Licensor will waive or not assert,
    as appropriate, this Section, to the fullest extent permitted by the
    applicable national law, to enable You to reasonably exercise Your
    right under Section 3(b) of this License (right to make Adaptations)
    but not otherwise.

5. Representations, Warranties and Disclaimer

UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING, LICENSOR
OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY
KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE,
INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY,
FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF
LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS,
WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION
OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU.

6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE
LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR
ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES
ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS
BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

7. Termination

 a. This License and the rights granted hereunder will terminate
    automatically upon any breach by You of the terms of this License.
    Individuals or entities who have received Adaptations or Collections
    from You under this License, however, will not have their licenses
    terminated provided such individuals or entities remain in full
    compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will
    survive any termination of this License.
 b. Subject to the above terms and conditions, the license granted here is
    perpetual (for the duration of the applicable copyright in the Work).
    Notwithstanding the above, Licensor reserves the right to release the
    Work under different license terms or to stop distributing the Work at
    any time; provided, however that any such election will not serve to
    withdraw this License (or any other license that has been, or is
    required to be, granted under the terms of this License), and this
    License will continue in full force and effect unless terminated as
    stated above.

8. Miscellaneous

 a. Each time You Distribute or Publicly Perform the Work or a Collection,
    the Licensor offers to the recipient a license to the Work on the same
    terms and conditions as the license granted to You under this License.
 b. Each time You Distribute or Publicly Perform an Adaptation, Licensor
    offers to the recipient a license to the original Work on the same
    terms and conditions as the license granted to You under this License.
 c. If any provision of this License is invalid or unenforceable under
    applicable law, it shall not affect the validity or enforceability of
    the remainder of the terms of this License, and without further action
    by the parties to this agreement, such provision shall be reformed to
    the minimum extent necessary to make such provision valid and
    enforceable.
 d. No term or provision of this License shall be deemed waived and no
    breach consented to unless such waiver or consent shall be in writing
    and signed by the party to be charged with such waiver or consent.
 e. This License constitutes the entire agreement between the parties with
    respect to the Work licensed here. There are no understandings,
    agreements or representations with respect to the Work not specified
    here. Licensor shall not be bound by any additional provisions that
    may appear in any communication from You. This License may not be
    modified without the mutual written agreement of the Licensor and You.
 f. The rights granted under, and the subject matter referenced, in this
    License were drafted utilizing the terminology of the Berne Convention
    for the Protection of Literary and Artistic Works (as amended on
    September 28, 1979), the Rome Convention of 1961, the WIPO Copyright
    Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996
    and the Universal Copyright Convention (as revised on July 24, 1971).
    These rights and subject matter take effect in the relevant
    jurisdiction in which the License terms are sought to be enforced
    according to the corresponding provisions of the implementation of
    those treaty provisions in the applicable national law. If the
    standard suite of rights granted under applicable copyright law
    includes additional rights not granted under this License, such
    additional rights are deemed to be included in the License; this
    License is not intended to restrict the license of any rights under
    applicable law.


Creative Commons Notice

    Creative Commons is not a party to this License, and makes no warranty
    whatsoever in connection with the Work. Creative Commons will not be
    liable to You or any party on any legal theory for any damages
    whatsoever, including without limitation any general, special,
    incidental or consequential damages arising in connection to this
    license. Notwithstanding the foregoing two (2) sentences, if Creative
    Commons has expressly identified itself as the Licensor hereunder, it
    shall have all rights and obligations of Licensor.

    Except for the limited purpose of indicating to the public that the
    Work is licensed under the CCPL, Creative Commons does not authorize
    the use by either party of the trademark "Creative Commons" or any
    related trademark or logo of Creative Commons without the prior
    written consent of Creative Commons. Any permitted use will be in
    compliance with Creative Commons' then-current trademark usage
    guidelines, as may be published on its website or otherwise made
    available upon request from time to time. For the avoidance of doubt,
    this trademark restriction does not form part of the License.

    Creative Commons may be contacted at http://creativecommons.org/.


================================================
FILE: README.md
================================================
## Practicing Ruby: Manuscripts archive

This is a collection of publicly released articles from the [Practicing
Ruby](http://practicingruby.com) journal.

You are welcome to use these materials as you see fit, as long as you 
abide by the terms of the [Creative Commons Attribution-ShareAlike 3.0 
Unported](http://creativecommons.org/licenses/by-sa/3.0/) license.

Make sure to include a clear reference to practicingruby.com anywhere you make 
use of these materials. It'd also be nice for you to link to 
http://github.com/elm-city-craftworks/practicing-ruby-manuscripts so
that others can find these source materials, but that is optional.

Please see the AUTHORS.md file in this repository for copyright information.

For any questions, please contact gregory@practicingruby.com. I am especially
interested in hearing from folks who would like to translate these articles into
other languages (both spoken languages and programming languages!), but I'd love
to hear from anyone who wishes to make use of these materials for any purpose.

## To support my work, please subscribe at practicingruby.com

I believe that it's only right for me to release my articles under a free 
documentation license, but this project is still a TON of work,
and would not exist without my subscribers. You can make it possible for me 
to keep publishing by signing up for a paid account. It's only $8/month, and it's totally worth it.


================================================
FILE: Rakefile
================================================
require "kramdown"
require "coderay"
require "fileutils"

MARKDOWN_FILES = Dir.glob("#{__dir__}/articles/**/*.md")

task default: :html_files

desc "Generate HTML files from markdown articles"
task :html_files do
  MARKDOWN_FILES.each do |markdown_file|
    html_path = markdown_file.sub("/articles/", "/articles-html/").sub(/\.md$/, ".html")
    puts "Generating #{html_path}"
    FileUtils.mkdir_p(File.dirname(html_path))
    File.open(html_path, "w") do |html_file|
      filecontent = File.read(markdown_file)
      filecontent = filecontent.gsub("\`\`\`", "~~~")
      filecontent = Kramdown::Document.new(filecontent, template: "#{__dir__}/templates/default.html.erb")
      html_file.write(filecontent.to_html)
    end
  end
end

desc "Delete all generated HTML files"
task :clean do
  FileUtils.rm_rf("#{__dir__}/articles-html")
end


================================================
FILE: articles/v1/001-method-lookup.md
================================================
I decided to start off this newsletter with one of the most basic but essential pieces of knowledge you can have about Ruby's object model: the way it looks up methods. Let's do a little exploration by working through a few examples.

Below we have a simple report class tasked with performing some basic data manipulations and then producing some text output.

```ruby
class Report
  def initialize(ledger)
    @balance          = ledger.inject(0) { |sum, (k,v)| sum + v }
    @credits, @debits = ledger.partition { |k,v| v > 0 }
  end

  attr_reader :credits, :debits, :balance

  def formatted_output
    "Current Balance: #{balance}\n\n" +
    "Credits:\n\n#{formatted_line_items(credits)}\n\n" +
    "Debits:\n\n#{formatted_line_items(debits)}"
  end

  def formatted_line_items(items)
    items.map { |k, v| "#{k}: #{'%.2f' % v.abs}" }.join("\n")
  end
end
```

The following example demonstrates how we'd make use of this class.

```ruby
ledger = [ ["Deposit Check #123", 500.15],
           ["Fancy Shoes",       -200.25],
           ["Fancy Hat",          -54.40],
           ["ATM Deposit",       1200.00],
           ["Kitteh Litteh",       -5.00] ]

report = Report.new(ledger)
puts report.formatted_output
```

And for those who don't want to take the time to copy and paste this code and run it locally, the actual output is shown below.

```
Current Balance: 1440.5

Credits:

Deposit Check #123: 500.15
ATM Deposit: 1200.00

Debits:

Fancy Shoes: 200.25
Fancy Hat: 54.40
Kitteh Litteh: 5.00
```

While not particularly pretty, this report is mostly what we'd expect to see. You can probably imagine how this information might be embedded within another report, such as an email-based form letter with some header and footer information. One possible way to do this would be through class inheritance, as in the example below.

```ruby
require "date"

class EmailReport < Report
  def header
    "Dear Valued Customer,\n\n"+
    "This report shows your account activity as of #{Date.today}\n"
  end

  def banner
    "\n............................................................\n"
  end

  def formatted_output
    header + banner + super + banner + footer
  end

  def footer
    "\nWith Much Love,\nYour Faceless Banking Institution"
  end
end
```

We only need to make a minor change to our calling code to make use of this new class.

```ruby
ledger = [ ["Deposit Check #123", 500.15],
           ["Fancy Shoes",       -200.25],
           ["Fancy Hat",          -54.40],
           ["ATM Deposit",       1200.00],
           ["Kitteh Litteh",       -5.00] ]

report = EmailReport.new(ledger)
puts report.formatted_output
```

Below you can see what the new output ends up looking like.

```
Dear Valued Customer,

The following report shows your account activity as of 2010-11-09

............................................................
Current Balance: 1440.5

Credits:

Deposit Check #123: 500.15
ATM Deposit: 1200.00

Debits:

Fancy Shoes: 200.25
Fancy Hat: 54.40
Kitteh Litteh: 5.00
............................................................

With Much Love,
Your Faceless Banking Institution
```

Looking back at the `EmailReport` code, it's easy to see what we've done to produce this new output. We've defined a new `formatted_output` method which adds the headers and footers, and combined this new behavior with the original behavior of our `Report` class by calling `super`. This is the same extension by inheritance pattern that you'll learn in any basic computer science course or encounter in any of the reasonably traditional object oriented languages out there.

But before you go asking for a refund and start telling your friends that this newsletter is painfully dull, consider this: While many languages have a method lookup path which is based on inheritance alone, that isn't even close to being true about Ruby.

Because Ruby allows for module mixins and per-object behavior, the `super` keyword takes on a whole new life in which an object's superclass is the last stop on a five part journey through Ruby's object model. The following example proves the point by composing a simple string which demonstrates the order in which methods are resolved in Ruby.

```ruby
module W
  def foo
    "- Mixed in method defined by W\n" + super
  end
end

module X
  def foo
    "- Mixed in method defined by X\n" + super
  end
end

module Y
  def foo
    "- Mixed in method defined by Y\n" + super
  end
end

module Z
  def foo
    "- Mixed in method defined by Z\n" + super
  end
end

class A
  def foo
    "- Instance method defined by A\n"
  end
end

class B < A
  include W
  include X

  def foo
    "- Instance method defined by B\n" + super
  end
end

object = B.new
object.extend(Y)
object.extend(Z)

def object.foo
  "- Method defined directly on an instance of B\n" + super
end

puts object.foo
```

When we run this code, we see the following output, which traces the `super` calls all the way up from the method defined directly on our object to its superclass.

```
- Method defined directly on an instance of B
- Mixed in method defined by Z
- Mixed in method defined by Y
- Instance method defined by B
- Mixed in method defined by X
- Mixed in method defined by W
- Instance method defined by A
```

As promised, it's a five step journey. Particularly, the above is a demonstration that Ruby methods are looked up in the following order:

1. Methods defined in the object's singleton class (i.e. the object itself)
1. Modules mixed into the singleton class in reverse order of inclusion
1. Methods defined by the object's class
1. Modules included into the object's class in reverse order of inclusion
1. Methods defined by the object's superclass.

This process is then repeated all the way up the inheritance chain until `BasicObject` is reached. Now that we know the basic order, we should stop and consider a few questions about what we've discussed so far.

### Open Questions / Things To Explore

* Why would we want or need five distinct places to define methods? Do these other options really gain us anything over ordinary inheritance?

* Does this change the way that classic object oriented design principles apply to Ruby? For example, how well do you think direct translations of design patterns map to Ruby?

* Think of each place you can define a method in Ruby, and consider which ones are important for every day use, and which ones are edge cases. Is per-object behavior really that useful?

* It is rare to use all of these options at once, and the only reason it was done in this exercise was for demonstration purposes. But taken individually, can you think of a practical use for each way of defining Ruby methods?

* What are some disadvantages for each technique shown here?

I will address these points and also go over some practical applications in the next issue, but please share your own thoughts in the comments section below.

> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/030-issue-1-method-lookup.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/002-method-lookup.md
================================================
In [Issue #1](http://practicingruby.com/articles/29) we discussed Ruby's lookup
path and proved by example that class inheritance is only a small part of the 
picture. To recap, Ruby methods are looked up in the following order:

1. Methods defined in the object's singleton class (i.e. the object itself)
1. Modules mixed into the singleton class in reverse order of inclusion
1. Methods defined by the object's class
1. Modules included into the object's class in reverse order of inclusion
1. Methods defined by the object's superclass, i.e. inherited methods

The example we looked at in the previous issue just showed the mechanics of how the above process plays out, it didn't really hint at practical use cases. Today, we'll look at a scenario for each of these options and discuss some of the up and downs that come along with them. Rather than presenting the examples in method lookup order, I'll try to start with the most common ones and work my way out to the more special purpose ones.

> **NOTE:** In a comment on issue #1, <a href="http://twitter.com/jeg2">@JEG2</a> correctly pointed out that this roadmap doesn't account for what happens after the whole class hierarchy is walked. Once `BasicObject` is reached, Ruby starts from the bottom again calling the `method_missing` hook, which is essentially an implicit step 6. I left this detail out for the sake of simplicity, but it's very important to at least be aware of.

### When to use ordinary class definitions

The following code implements a simple timer that can write out a timestamp to a file and read it back later to determine elapsed time. Study it and consider its design.

```ruby
class Timer
  MissingTimestampError = Class.new(StandardError)

  def initialize(dir=Turbine::Application.config_dir)
    @file = "#{dir}/timestamp"
  end

  def write_timestamp
    File.open(@file, "w") { |f| f << Time.now.utc.to_s }
  end
  
  def timestamp
    raise MissingTimestampError unless running?
    Time.parse(File.read(@file)).localtime
  end
  
  def elapsed_time
    (Time.now.utc - timestamp.utc) / 60.0 / 60.0
  end

  def clear_timestamp
    FileUtils.rm_f(@file)
  end

  def running?
    File.exist?(@file)
  end
end 
```

When deciding if just a plain old class definition will do, I often ask myself several questions.

* Is it likely is that I'll need to customize this code later for another purpose? 
* Is this code meant to be interacted with and extended by third party code? 
* Are there any common behaviors in this code I'd want to extract and use elsewhere?

Because this `Timer` class definition comes from a real project of mine, I can tell you that the answer to all of the above questions in the context this code is intended to be used is a simple 'no'. What this indicates to me is that while extension might be necessary at some point down the line, there is no immediate need to design for extensibility, and so we go with the most simple option that could possibly work.

Another indicator that a plain class definition might be appropriate here is the fact that most of the functionality in this class is centered around manipulating a particular bit of state, the <i>timestamp</i> file. The problem we are trying to solve is quite a narrow one, and a single-minded class definition reflects that.

The downside to designing code this way is that it does make third-party modification harder. If for example, you wanted to add some behavior around the `timestamp()` method, you have three options, none of them great:

 * You can create a subclass of `Timer`, but your new class won't be used by the application that defined `Timer` without modification.

 * You can create an instance of `Timer` and then add per-object behavior, but this has the same problem as subclassing.

 * You can use `alias_method` to create a monkeypatch to `Timer`, which will inject your code into the original application, but runs risks of naming clashes and other nasty things.

While it ultimately depends on how the calling code uses this `Timer` class, and what features are provided for making extensions, it's not going to be trivial to modify systems built in this fashion. But because we already determined this was a narrow bit of functionality designed to be used internally within a larger application, it isn't a problem that it isn't super extendable.

Many of the rules that apply to defining your own classes also apply to inheritance based designs, so let's investigate that now.

### When Inheritance Makes Sense

For those working with Rails, you already encounter class inheritance on a daily basis, through the ActiveRecord ORM. Despite the terrible choice of name, `ActiveRecord::Base` is a reasonable example of when class inheritance is a decent option.

Consider the typical ActiveRecord model, which is often extremely simple:

```ruby
class User < ActiveRecord::Base 
  has_many :comments

  validates_presence_of :name, :email
end
```

While it's true that in most interesting applications, models do become more complex, implementing intricate business logic, the amount of functionality added by the user is dwarfed by what ActiveRecord provides.

One key thing to notice about a subclass of `ActiveRecord::Base` is that by design, there is really no incentive to manage your own state. All state manipulation is passed upwards to the parent class to handle, which typically involves using a pre-configured database connection also managed by the parent class to persist whatever state is required.

Inheritance makes sense in situations where complex state manipulations are handled by the parent class. This is especially true if the parent class provides a boat-load of functionality which dwarfs the customization needs of the child class. Since both things are true about a typical ActiveRecord model, the design is certainly a reasonable choice.

However, before you start modeling your own projects after this pattern, you should take a look at the great pains that go into designing an extensible parent class. It's outside the scope of this article, but I'd recommend reading what <a href="http://is.gd/gW558 ">Yehuda Katz has to say about ActiveModel</a>, which provides the bulk of ActiveRecord's functionality under the hood.

Before we move on to other topics, I'd like to offer another example outside of the Rails world, just to help further illuminate the pattern.

The PDF generation library <a href="http://prawn.majesticseacreature.com">Prawn</a> provides a class that's designed to be inherited from, `Prawn::Document`. I made use of this functionality recently to build a small typesetting library for formatting technical articles. While I won't go into much detail here, you can check out the <a href="http://github.com/madriska/jambalaya">implementation and example code</a>.

What you'll find in <a href="https://github.com/madriska/jambalaya/blob/master/lib/jambalaya.rb">lib/jambalaya.rb</a> is that except for a custom factory method for generating the document, Jambalaya introduces no new state, relying on calls to `Prawn::Document` to do all the heavy lifting. You can also see that <a href="https://github.com/madriska/jambalaya/blob/master/example/rbp_ch1.rb">examples/rbp_ch1.rb</a> gives the illusion of a new special purpose DSL, but that in truth, almost all the work is being done by Prawn under the hood.

Unfortunately, the disadvantages of class inheritance become clear the farther away you get from these scenarios in which the subclass truly is analogous to its parent class. You get only one parent class, and chaining to it is a commitment that you must be willing to respect all the way up the hierarchy. For the scenarios we've shown, the benefits outweigh the drawbacks, but for many others, they do not.

In Issue #1, I asked the question of which techniques are special cases, and which are meant to be used commonly. While not rare by any means, inheritance falls closer to being a special case than it does to being the first tool you should reach for. If this comes as a surprise to you, it's about time for us to talk about modules.

### Mixing modules into a class

If you want to see the power of mixins, you need to look no farther than Ruby's
`Enumerable` module. Rather than relying on a common base class to provide
iterators for collections, Ruby mixes in the `Enumerable` module into its core structures, 
including `Array` and `Hash`. This is where a whole host of useful methods come from, 
including `map`, `select`, and `inject`.

The beauty of this design is that it imposes a much lighter contract than the rigid is-a relationship enforced by class inheritance. Instead, mixins focuses on what you can do with an object rather than what that object is. It makes perfect sense to say both `Hash` and `Array` objects have elements that can be enumerated over. As far as Ruby is concerned, the same can be true about any object which defines an `each()` method.

Let's take a look at a custom Ruby class which implements each and mixes in
`Enumerable`. It is a simple file-backed numerical queue, from the same project 
our `Timer` came from.

```ruby
class Queue 
  include Enumerable

  def initialize(file)
    @file = file
  end

  def entries
    return [] if empty?

    File.read(@file).split("\n").map { |e| e.to_f }
  end

  def each
    entries.each { |e| yield(e) }
  end

  # additional unrelated methods omitted
end
```

The data file this queue wraps looked something similar to the data shown below.

```
125.75
100.25
300.50
700
```

Given a properly formatted input file, it's possible to interact with the `Queue` like any other `Enumerable` object.

```ruby
queue = Queue.new("queue.txt")
p queue.map { |x| "Amount: #{x}" }
p queue.inject { |x,y| x + y }
```

If you go ahead and try this yourself, you'll find that it will work identically if you simply replace the first line with an array, as shown below.

```ruby
queue = [125.75, 100.25, 300.50, 700]
p queue.map { |x| "Amount: #{x}" }
p queue.inject { |x,y| x + y }
```

This simple example hints at the real beauty of `Enumerable` in particular, and the mixin technique in general. In reality, my `Queue` object and Ruby's `Array` class have very little in common. But in the context of how you can iterate over the two objects, they can share a matching interface for the things they do have in common.

This is where modules shine. They allow some of the benefits of inheritance in that they allow implementation sharing, but without the requirement of organizing things into a rigid class hierarchy. Things get even more interesting when you remember to tie your understanding of how modules work back to the way Ruby looks up methods.

### Exploiting the lookup order of mixins

Methods are looked up in mixins in reverse order of their inclusion, giving the last module you mixed in a priority spot in the lookup path. A pleasant effect that arises naturally from this rule is that it provides an elegant technique for monkey patching that does not rely on method aliasing. Let's look at a patch that uses method aliasing, and how it could be written differently.

Below is the code that Rubygems uses to patch `require` to add in gem loading functionality. Since `require` is just a method in Ruby, and not a keyword, the patch is relatively straightforward in pure Ruby.

```ruby
module Kernel
  alias gem_original_require require

  def require(path) # :doc:
    gem_original_require path
  rescue LoadError => load_error
    if load_error.message.end_with?(path)
      if Gem.try_activate(path)
        return gem_original_require(path)
      end
    end

    raise load_error
  end
end 
```

At the time this code was written, using method aliasing was the standard way of changing the behavior of an existing method. Aliases are used to make a copy of an existing method before modifying it, which allows customized code to delegate to the original method. This permits re-using the parts of the original method that are needed while (hopefully) preventing issues with backwards compatibility. The general approach works well, but it increases the chances that the copied methods will clash with each other as the chain gets longer, and also adds a number of superfluous methods to objects that are really just implementation details.

Taking advantage of Ruby's method lookup order in modules, we can get around the issues with aliasing by writing a patch similar to the one shown below.

```ruby
module GemCustomRequire
  def require(path) # :doc:
    super
  rescue LoadError => load_error
    if load_error.message.end_with?(path)
      if Gem.try_activate(path)
        return super
      end
    end

    raise load_error
  end   
end 

class Object
  include GemCustomRequire
end
```

Because the original `require()` method is defined within the `Kernel` module and not on `Object` itself, we can include our `GemCustomRequire` module and then use `super` to call the original require. The result is code that looks more natural and ordinary, reducing the amount of magic you need to know in order to understand it. It also completely avoids the possible issue of copied methods clashing with one another.

This ability to do safe monkeypatching that modules affords us has been picking up steam within popular Ruby projects. Rails 3 was in a large extent designed to afford this sort of modularity, for the express purpose of making it easier for third party plugins to hook into the system in a more graceful way than method aliasing. Other projects that require a high degree of extensibility are quickly following in its footsteps, which is a good thing.

While you're less likely to run into this question in application code than you are in library or framework code, knowing what mixins can gain you in terms of extensibility can really come in handy. There are tons of other good things to say about modules, but we'll need to save those for another day.

### Per Object Behavior

I was originally going to go into detail about mixing modules into individual objects as well as defining singleton methods. However, I think that can be a topic all of it's own, and I want to give it a proper treatment rather than tacking it on to the end of an already lengthy newsletter.

I promise we'll revisit it soon, but for those who absolutely want to explore potential uses of these techniques right away, I offer two small challenges.

1) Rather than using a framework for testing stubs, experiment with something like the code below next time you're writing tests.

```ruby
obj = Object.new

class << obj
  def my_stubbed_method

  end
end
```

2) Rather than re-opening a class to add some extra behavior, experiment with mixing modules into individual objects to get the extra features you need.

```ruby
module MathHelpers
  def sum
    inject { |x,y| x + y }
  end

  def average
    sum.to_f / length
  end
end

array = [1,2,3]
array.extend(MathHelpers)
p array.average
```

If you try these ideas out, you'll almost certainly find uses for them in other
contexts, too.

### Reflections

Hopefully you've learned something new about Ruby's method lookup rules, or at least been given some new things to think about and explore. If you've come from a background in which inheritance has been your only tool, you will likely have to retrain yourself a bit to make full use of what Ruby has to offer.

Whenever you compare one of these options to the other, consider your context
and how much the advantages and disadvantages of each technique affect your
particular situation. The correct approach always depends on that context, and
if in doubt, experiment and see what works best.

More discussion on this topic is welcome in the comments section below. While I wrote this article a while ago, I am happy to jump back into the topic as long as folks have interesting ideas and questions to share.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/031-issue-2-method-lookup.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/003-configurable.md
================================================
Ruby developers tend to prefer convention over configuration, but that doesn't mean our applications are configuration-free.  If you're doing serious software development, it's likely that at least some of your projects depend on some sort of configuration data. Whether you simply need to store database credentials, an API key, or something much more complicated, it's important to know how to do so in a way that is flexible without introducing too much administrative overhead.

In this two part article series, we'll be talking about the many options Ruby provides us for working with configuration data, and what techniques work best in various common scenarios. We'll start by showing a single example of a problem and one way to solve it, and then go on to discuss various other options in Issue #4.

### Configuration Done Wrong

The worst way to work with configuration data is to embed it directly within your application. The simple Sinatra application shown below is a nice example of what *not* to do.

```ruby
require "rubygems"
require "sinatra"
require "active_record"

class User < ActiveRecord::Base; end

configure do
  ActiveRecord::Base.establish_connection(
    :adapter  => "mysql",
    :host     => "myhost",
    :username => "myuser",
    :password => "mypass",
    :database => "somedatabase"
  )
end

get "/users" do
  @users = User.all
  haml :user_index
end
```

The code above establishes a connection to the database on application startup and then proceeds to implement a rather simple call to get a full user listing and then render a Haml template. With an application this simple, the configuration data seems a bit harmless. But with just a moment's thought, it is easy to see numerous flaws with this sort of design.

The first and most obvious issue with this sort of code is security, everyone who looks at its source needs to be trusted, as the credentials for the database connection are embedded directly within it. Now, this may or may not be a concern depending on who is involved with the project, and what other systems are in place to restrict access to production systems, but it is important to think about nonetheless.

In a field in which revision control is a key part of our practices, it's not as simple as removing this sensitive information when you decide you no longer want to share it with others. Rewriting the history of a repository is straightforward on its own, but mixing application and configuration code makes it tricky to do this without jumping through a bunch of hoops. This is where the security concerns overlap with maintenance issues.

Suppose you want to share this trivial sinatra application with a friend, or even use it on another machine. The in-application configuration forces everyone to set up an identical database environment, even if the needs of the application may not really call for that. Any change to this configuration information would lead to merge conflicts when you try to pull in changes across machines, which could become annoying quite fast.

Fortunately, Ruby makes writing proper configuration systems easy enough where the only valid reason for writing code this way is if you're doing a throwaway spike. Let's see how easily we can emulate the way Rails solves this problem in their own framework.

### YAML Based Configurations

With slight modifications, we can move our configuration out of our application and into a YAML file. We'd like to end up with a database.yml file looking quite similar to a standard Rails configuration file, such as the one below:

```
development:
  adapter: mysql
  database: mydatabase
  username: myuser
  password: mypass
  host: myhost
```

Through the standard YAML library, we can easily access this data by parsing it into a nested hash, as shown in the irb session below.

```
>> require "yaml"
=> true
>> YAML.load_file("config/database.yml")
=> {"development"=>{"username"=>"myuser", "adapter"=>"mysql", 
   "database"=>"mydatabase", "host"=>"myhost", "password"=>"mypass"}}
```

If we compare this output to our original example of calling `establish_connection()` directly with an explicit configuration hash, the following code should be very easy to follow.

```ruby
require "rubygems"
require "yaml"
require "sinatra"
require "active_record"

class User < ActiveRecord::Base; end

configure do
  database_config = YAML.load_file("config/database.yml")
  ActiveRecord::Base.establish_connection(database_config)
end

get "/users" do
  @users = User.all
  haml :user_index
end
```

By removing the configuration data from the application code, we have made it so that the application code no longer needs to be modified everywhere it runs, provided the configuration data is properly set up. We can now safely tell our revision control system to ignore the configuration file without it causing many problems.

Now that we've seen a simple problem and a reasonable fix for it, let's ponder a few questions so that we can hit some more subtle topics in Issue #4

### Questions and Discussion Points

* YAML is a nice readable data format with good Ruby support, but it can only represent data, which does not allow you to make dynamic configuration systems with it. Rails runs its YAML files through ERB to address this issue, but what other ways could this problem be solved?

* How would you handle configuration for something like a command line application which may be run anywhere on your system? How might you build per-user and per-project configuration systems?

* Suppose you have a project that is mirrored to both Github and Heroku, and that you want to run directly from your public sources while providing some configuration options in your production environment. How should you handle this?

* What are some important practices to follow when implementing configuration systems, regardless of the underlying context and what approach you choose?

Please feel free to include your answers to these questions in the comments section below, along with any other thoughts or questions you might wish to share. I promise to reply personally to anyone who leaves a comment!
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/032-issue-3-configurable.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/004-configurable.md
================================================
In [Issue #3](http://practicingruby.com/articles/31), we looked at the downsides
of mixing configuration code with application code. We discussed how storing
configuration data in YAML files can solve many of those issues, but not
all of them. In this article, we will explore the limitations of the YAML 
format, and then consider the tradeoffs involved in using various 
alternative solutions.

### Dynamic Configuration

In response to the questions posed by Issue #3, Franklin Webber demonstrated
how YAML's aliasing functionality can be used to reduce duplication in
a configuration file:

```
default: &DEFAULT
  host:
    name: testsystem
    http_port: '8080'
    username: defaultuser
  database:
    host: db01/db01
    username:
    password:
  test:
    browser: FIREFOX

windows_default: &WIN_DEFAULT
  <<: *DEFAULT
  test:
    browser: IE
```

In this example, the `default` and `windows_default` configurations share almost
the same attributes, except that browsers differ in test mode. Franklin uses
aliasing to merge the `DEFAULT` data into the `WIN_DEFAULT` entry, solving his
duplication problem. This is a neat way to keep your YAML configurations well
organized.

While Franklin shared this example of aliasing to illustrate that some dynamic
functionality does exist within YAML, he acknowledged that the format was still
mostly suited for static data. Even though it is possible to reference
various entries within the data structure, they cannot be manipulated. 
That means that the following concatenation example cannot be done in pure 
YAML, and would require some additional processing:

```
host:
  name: localhost
  port: 3000
web:
  login_url: #{name}:#{port}/login 
```

This is where we cross the line from problems solved by a data format to those
solved by programming languages. Franklin suggests that running the YAML data
through Ruby's `eval` method is an option, which is similar to how Rails
passes its YAML files through `ERB`. This approach would work, but once we 
start going down that road, we need to ask what it would take to implement 
the entire configuration in pure Ruby. As you can see in the following example, 
the answer is 'not much':

```ruby
module MyApp
  module Config
    HOST = { :name => 'localhost', :port => 3000 }
    WEB  = { :login_url =>  "#{HOST[:name]}:#{HOST[:port]}/login" }
  end
end
```

If we drop this snippet into our application code, we run into the same problems
that we saw in the first example in Issue #3. But by defining this module
in its own file and requiring that file, those issues are avoided:

```ruby
require "config/my_app_config"
require "rest_client"

module MyApp
  module Client
    extend self

    def authenticate(user, password)
      RestClient.post(MyApp::Config::WEB[:login_url], 
        :user => user, :password => password)
    end
  end
end

MyApp::Client.authenticate('my_user', 'seekrit')
```

Using ordinary Ruby constants is no more complicated than referring to data
stored in a YAML file, but gives you the full power of Ruby in your
configuration scripts. In more complex configurations, you may even build
a mini-DSL, as shown in the following example:

```ruby
AccessControl.configure do
  role "basic", 
    :permissions => [:read_answers, :answer_questions]
  
  role "premium", 
    :parent      => "basic",
    :permissions => [:hide_advertisements]

  role "manager", 
    :parent      => "premium",
    :permissions => [:create_quizzes, :edit_quizzes]

  role "owner",
    :parent      => "manager",
    :permissions => [:edit_users, :deactivate_users]
end
```

While this looks like vanilla configuration code on the surface, we can see that what we're working with are full blown Ruby objects. Here are some examples of how this system is used:

```ruby
>> AccessControl.roles_with_permission(:create_quizzes)
=> ["manager", "owner"]
>> AccessControl["premium"].permissions
=> [:hide_advertisements, :read_answers, :answer_questions]
>> AccessControl["owner"].allows?(:edit_users)
=> true
>> AccessControl["basic"].allows?(:edit_users)
=> false
```

This is an advanced configuration system that not only encapsulates some configuration data, but also makes it possible to query that data in useful ways. The following implementation code illustrates how little magic is involved in building such a system.

```ruby
module AccessControl
  extend self
 
  def configure(&block)
    instance_eval(&block)
  end

  def definitions
    @definitions ||= Hash.new
  end

  def role(level, options={})  
    definitions[level] = Role.new(level, options)
  end

  def roles_with_permission(permission)
    definitions.select { |k,v| v.allows?(permission) }.map { |k,_| k }
  end

  def [](level)
    definitions[level]
  end

  class Role
    def initialize(name, options)
      @name        = name
      @permissions = options[:permissions]
      @parent      = options[:parent]
    end

    attr_reader :parent

    def permissions
      return @permissions unless parent
      
      @permissions + AccessControl[parent].permissions
    end

    def allows?(permission)
      permissions.include?(permission)
    end
    
    def to_s
      @name
    end
  end
end
```

Because doing configuration in pure Ruby is so easy, I often lean towards it rather than using YAML or some other external file format. I find configuration files written in Ruby to be just as readable as YAML, but far more flexible.

There are some situations in which external data formats make more sense than Ruby based configurations. Using YAML might be a better idea than the approach shown above if any of the following apply to your application:

 * You need to integrate with other programs that will either read or write your configuration files. It is easier for a program written in another language to produce and consume YAML than it is for it to work with arbitrary Ruby code

 * You don't want users to be able to execute arbitrary code in your application's runtime environment. This can either be for security reasons, or for protecting users from their own stupidity by restricting the range of possible mistakes they can make.

 * You want configuration data that can easily be passed over a network and then executed remotely.

While these are all good reasons to avoid Ruby based configurations, frankly they are not common scenarios. The reason Ruby has had such a widespread adoption of YAML is almost certainly not because of it being the best tool for the job, but instead due to an early design decision made in Rails that people have emulated in their own projects without further thought. While either technique may get the job done, I'd argue that Ruby based configurations are a better default choice due to their inherent flexibility.

But sometimes, neither Ruby nor YAML does what we need them to do. In certain situations, configuration data isn't made available until the application is invoked. For those scenarios, we can take advantage of how well Ruby is integrated with the shell by making use of environment variables.

### Using the Shell Environment for Configuration

Every Ruby application has a fairly primitive but useful configuration system built into it through direct access to shell environment variables. As you can see in the code below, Ruby provides a top level constant that turns the environment variable mappings into a plain old Hash object.

```
$ TURBINE_API_KEY="saf3t33553" ruby -e "puts ENV['TURBINE_API_KEY']"
IqxPfasfasasfasfgqNm
```

The fact that I mention API keys in the above contrived example is no coincidence. The area I first made use of environment variables in my own applications was in a command line application which acted as a client to a web service I needed to interact with. Each distinct user needed to use a different API key, but I didn't want to rely on fragile home directory lookup code to provide per-user configuration. By using environment variables, it was possible to write a line like the following in my <i>.bash_profile</i> which would ensure that this information was available whenever my command line program ran.

```
export TURBINE_API_KEY="IqxPfasfasasfasfgqNm"
```

Since most modern shell implementations support environment variables, they're a good choice for this sort of semi-global configuration data. You'll also find environment variables used in places where you don't have much control over the system where your application is destined to run. The Ruby web application deployment service Heroku is a good example of that sort of environment.

On Heroku, you aren't given direct shell access and aren't even given any guarantees about where on the filesystem your application is destined to run. On top of that, if you want to run an open source application on Heroku while actively mirroring your changes to Github or some other public git host, you can't simply check in configuration files which may contain sensitive information, whether written in Ruby, YAML, or anything else.

The way Heroku solves these problems is with a configuration system based on, you guessed it, environment variables. The following example from the Heroku website shows how these set via the heroku command line app.

```
$ cd myapp
$ heroku config:add S3_KEY=8N029N81 S3_SECRET=9s83109d3+583493190
Adding config vars:
  S3_KEY    => 8N029N81
  S3_SECRET => 9s83109d3+583493190
Restarting app...done.
```

In the application, these variables are accessed in a similar fashion to our
previous example:

```ruby
AWS::S3::Base.establish_connection!(
  :access_key_id     => ENV['S3_KEY'],
  :secret_access_key => ENV['S3_SECRET']
)
```

While hardly the first tool you should reach for, environment variables make sense in situations in which you do not want to store sensitive information within your application. They also come in handy when you don't want to assume anything about your user's file system in order to locate user-wide configuration settings.

Before we wrap up with some general tips that are relevant to all configurable applications, I'd like to quickly visit one more trick that involves project-wide configurations.

### Per-project configurations for command line apps

Some command line applications need to be context aware in order to do their jobs. Two such examples are rake and git. Both tools know how to locate their own configuration information so that they do the right thing when running their commands.

For example, git knows which repository to interact with because it knows how to work backwards to find the <i>.git/</i> configuration folder at the project root. Likewise, running `rake test` from anywhere within your project causes rake to look backwards recursively until it finds the nearest <i>Rakefile</i> to run. This general pattern can be seen in many other applications, and is worth knowing about in case you ever need to make use of it yourself.

While I don't want to go into much detail about this topic, I will say that it seemed a bit magical to me until I needed to implement this sort of functionality in my own projects. The basic idea is no more complicated than working backwards from your current directory until you find the file or folder than you need to interact with, which is something Ruby's pathname library can make quick work of.

Here's an example pulled directly out of a project of mine which illustrates a reverse search from the current working directory back to the filesystem's root directory.

```ruby
require 'pathname'

def config_dir(dir = Pathname.new("."))
  app_config_dir = dir + ".myappconfigfolder"
  if dir.children.include?(app_config_dir)
    app_config_dir.expand_path
  else
    return nil if dir.expand_path.root?
    config_dir(dir.parent)
  end
end
```

A bit of code like this combined with ordinary `require` calls for Ruby configurations or `YAML.load_file` calls for YAML configurations can be used to implement exactly the sort of context sensitive behavior you find in rake and git. I'll leave the exact methods of doing that as something for you to explore on your own, but hopefully this bit of code will come in handy if you ever run into that sort of situation.

This article turned out to be longer than I expected it to be, but hopefully was still quite useful to you. Before we part, let's review a few key points to keep in mind when building any sort of configuration system.

### Configuration Best Practices 

* Convention often is better than configuration. Always provide sensible defaults where possible. For example, if you're interacting with a service that has a common default port, don't force the user to define a port to use unless they wish to deviate from the default.

* Don't put your real configuration files into your application's code repository, since this can expose sensitive data and also makes it hard for others to submit patches without merge conflicts on configuration settings.

* Include a sample configuration file filled with reasonable defaults with your application. For example, in Rails, people often check in a <i>config/database.yml.example</i> for this purpose. The goal should be to make it as easy for your user to make a copy of the sample file and then customize it as needed to get their systems up and running

* Raise an appropriate error message when a config file is missing. You can do this by doing a `File.exist?` check before loading your configuration file, or by rescuing the error a failed load causes and then re-raising a more specific error that instructs the user on where to set up their configuration file.

* Make it very easy for users to override defaults by merging their overrides rather than forcing them to replace whole configuration structures in order to make a small change.

### Reflections 

What do you think of what we've covered here? Feel free to leave your questions, comments and suggestions in the comments section below.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/033-issue-4-configurable.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/005-testing-antipatterns.md
================================================
If you are using `send` to test private methods in your tests, you are almost certainly doing it wrong. Most private methods tend to fall into one of the following categories, none of which require `send` to test:

* A method that does not have behavior of its own (a helper function) 
* A method that actually deserves to be public on the current object 
* A method that is only private to hide a design flaw

Take a look at the three objects below and try to match them to the patterns listed above.

```ruby
class Book
  def initialize(name)
    @name = name
  end

  def available_for_purchase?
    copies_remaining > 0     
  end

  private

  def copies_remaining
    Inventory.count(:book, @name)
  end
end

module Inventory
  extend self

  def count(item_type, name)
    item_class(item_type).find_by_name(name).quantity
  end

  def receive(item_type, name, quantity)
    item_class(item_type).create(name, quantity)
  end

  private

  def item_class(item_type)
    case item_type
    when :book
      InStockBook
    when :video
      InStockVideo
    end
  end
end

class InStockBook
  def self.titles
    @titles ||= {}
  end
  
  def self.find_by_name(name)
    titles[name]
  end

  def self.create(name, quantity)
    titles[name] = new(name, quantity)
  end

  def initialize(name, quantity)
    @title     = name
    @quantity  = quantity
  end

  attr_reader :title, :quantity

  def isbn
    @isbn ||= isbn_from_service
  end

  private

  def isbn_from_service
    isbn_service_connect

    isbn = @isbn_service.find_isbn_for(@title)

    isbn_service_disconnect

    return isbn
  end

  def isbn_service_connect
    @isbn_service = IsbnService.new
    @isbn_service.connect
  end

  def isbn_service_disconnect
    @isbn_service.disconnect
  end
end
```

If you guessed that `Inventory` was the object which demonstrated a private method that doesn't implement an external behavior, you guessed right. The sole purpose of `Inventory#item_class` is just to make the code in `Inventory#count` and `Inventory#receive` a bit cleaner to read. Therefore, it'd be wasteful to write an explicit test such as the one below.

```ruby
def test_item_class
  assert_equal InStockBook, Inventory.send(:item_class, :book)
end
```

The following tests implicitly cover the functionality of `Inventory#item_class` while focusing on actual interactions through the public interface.

```ruby
def test_stocking_a_book
  Inventory.receive(:book, "Ruby Best Practices", 100)
  assert_equal 100, Inventory.count(:book, "Ruby Best Practices")
end
```

Because indirectly testing a private method will result in the same code coverage results as testing the method directly, you won't silently miss out on a failure if `Inventory#item_class` does not work as expected. However, by writing your tests this way, you focus primarily on what can be done to the object via its external interface. This leads to clearer, more maintainable tests. If a user is expected to add books through `Inventory#receive`, they should not need to know about `InStockBook`, so it can be regarded as an implementation detail. Changing the definition of `Inventory#item_class` or even removing it entirely will not require a change to these tests as long as you maintain the signature of the objects public API.

Now that we've identified the approach for testing `Inventory`, we are left with `Book` and `InStockBook` to discuss. Of the two, the problem with `Book` is a little more obvious, so we'll tackle it first.

Book implements a method called `available_for_purchase?`, which relies on a private method called `copies_remaining` to operate. The following code demonstrates a poorly implemented test.
 
```ruby
def test_copies_remaining
  book = Book.new("Ruby Best Practices")
  Inventory.receive(book.name, 10)
 
  assert_equal book.send(:copies_remaining), 10 
end
```

The reason why this is poor is because once again, we are relying on `send` to call a private method in our tests. Our theory from the previous example is that private methods do not need to be tested because they don't actually implement behavior. However, `Book#copies_remaining` seems like something you might want to actually make use of. If you imagine a web front-end for an e-commerce site, it's easy to visualize both an indicator of whether an item is in stock, as well as how many of that item are still available.

The rule of thumb here is that if a method provides a sensible behavior that fits the context of your object, it's better off to just make it public. The following test seems very natural to me.

```ruby
def test_copies_remaining
  book = Book.new("Ruby Best Practices")
  Inventory.receive(book.name, 10)
  
  assert_equal book.copies_remaining, 10 
end
```

So far we've seen two extremes: Private methods that are rightfully private and do not need to be tested explicitly, and private methods that ought to be public so that they can be tested explicitly. We will now examine the space between these two opposite ends of the spectrum.  

Let's think a bit about how we could test the `InStockBook#isbn` shown below.

```ruby
class InStockBook

  # .. other features omitted

  def isbn
    @isbn ||= isbn_from_service
  end

end
```

One way to do it the would be to mock out the call to `isbn_from_service` as we do in the following tests.

```ruby
def test_retreive_isbn
  book = InStockBook.new("Ruby Best Practices", 10)
  book.expects(:isbn_from_service).once.returns("978-0-596-52300-8")

  # Verify caching by calling isbn twice but expecting only one service
  # call to be made
  2.times { assert_equal "978-0-596-52300-8", @book.isbn }
end
```

The downside of this approach is that by mocking out the call to `isbn_from_service`, we're bypassing all of the following code, leaving it untested.

```ruby
def isbn_from_service
  isbn_service_connect

  isbn = @isbn_service.find_isbn_for(@title)

  isbn_service_disconnect

  return isbn
end

def isbn_service_connect
  @isbn_service = IsbnService.new
  @isbn_service.connect
end

def isbn_service_disconnect
  @isbn_service.disconnect
end
```

Making these methods public on `InStockBook` doesn't make much sense, but we also can't say that these are mere implementation details that can be ignored. In these situations, typically some redesign is necessary, and in this case, a simple shift of this functionality upstream to the `IsbnService` class makes the most sense.

```ruby 
class IsbnService

  def self.find_isbn_for(title)
    service = new

    service.connect
    isbn = service.find_isbn_for(title) # delegate to instance
    service.disconnect

    return isbn
  end

  # .. other functionality

end
```

This functionality can now easily be tested as a public behavior of the `IsbnService` class, where it won't get jumbled up with `InStockBook`'s logic. All that's left to do is rewrite our `InStockBook#isbn` method so that it delegates to this new class.

```ruby
class InStockBook

  # .. other features omitted

  def isbn
    @isbn ||= IsbnService.find_isbn_for(@title)
  end

end
```

Our updated `isbn` tests only need to change slightly to accommodate this
change:

```ruby
def test_retreive_isbn
  book = InStockBook.new("Ruby Best Practices", 10)
  IsbnService.expects(:find_isbn_for).with(book.title).once.
              returns("978-0-596-52300-8")

  # Verify caching by calling isbn twice but expecting only one service
  # call to be made
  2.times { assert_equal "978-0-596-52300-8", @book.isbn }
end
```

Now, when reading the tests for `InStockBook`, the developer can safely gloss
over `IsbnService`'s implementation until its contract changes. With this
dilemma solved, we've now comprehensively categorized the strategies that allow
you to avoid testing private methods without sacrificing the clarity and
coverage of your test suite.

### Reflections

We've now seen examples of how to deal with all of the following situations that might tempt us to use `send` in our tests unnecessarily:

1. A method that does not have behavior of its own (a helper function) 
1. A method that actually deserves to be public on the current object 
1. A method that is only private to hide a design flaw

Can you think of a situation where none of these approaches seem to work? Please feel free to share them in the comments section below.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/034-issue-5-testing-antipatterns.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/006-good-and-bad-code.md
================================================
In this issue and the next one, I'd like to demonstrate one of my favorite learning exercises while inviting you to follow along at home. It's something I usually do while teaching in a one-on-one setting, but I think we can adapt it for a broader audience and still get a lot out of it.

In this exercise, the goal is to first produce some bad code, and then steadily improve it while explaining why each change is an improvement. I usually start with a very simple problem but then add some twists about how to implement it to make sure it comes out pretty bad.

One surefire way of writing bad code without resorting to intentionally writing things worse than they should be is to eliminate a few of Ruby's key organizational tools. In particular, if you want to write ugly code without it seeming fake, it is easy to do so if you never write any user defined functions, classes, or modules. So we'll do exactly that!

### Implementing Tic-Tac-Toe as a single procedure.

I've chosen the game [Tic-Tac-Toe](http://en.wikipedia.org/wiki/Tic-Tac-Toe) as the problem to focus on, because it only involves a few simple rules and can be implemented by anyone who has basic programming skills.

In fact, if you ignore end game conditions and error handling, you can get a simple prompt for a two player game with just a few lines of Ruby.

```ruby
board = [[nil,nil,nil],
         [nil,nil,nil],
         [nil,nil,nil]]

players = [:X, :O].cycle

loop do
  current_player = players.next
  puts board.map { |row| row.map { |e| e || " " }.join("|") }.join("\n")
  print "\n>> "
  row, col = gets.split.map { |e| e.to_i }
  puts
  board[row][col] = current_player
end
```

But of course, the devil is in the details. To get a fully playable game, you need some basic error checking to ensure that you can't play out of bounds or on top of another player's marker. You also need to figure out when a player has won, and when the game has ended in a draw. While this doesn't sound like a lot of work, you'll see in the code below how much complexity these simple changes add.

```ruby
board   = [[nil,nil,nil],
           [nil,nil,nil],
           [nil,nil,nil]]

left_diagonal  = [[0,0],[1,1],[2,2]]
right_diagonal = [[2,0],[1,1],[0,2]]

players = [:X, :O].cycle

current_player = players.next

loop do
  puts board.map { |row| row.map { |e| e || " " }.join("|") }.join("\n")
  print "\n>> "
  row, col = gets.split.map { |e| e.to_i }
  puts

  begin
    cell_contents = board.fetch(row).fetch(col)
  rescue IndexError
    puts "Out of bounds, try another position"
    next
  end

  if cell_contents
    puts "Cell occupied, try another position"
    next
  end

  board[row][col] = current_player

  lines = []

  [left_diagonal, right_diagonal].each do |line|
    lines << line if line.include?([row,col])
  end

  lines << (0..2).map { |c1| [row, c1] }
  lines << (0..2).map { |r1| [r1, col] }

  win = lines.any? do |line|
    line.all? { |row,col| board[row][col] == current_player }
  end

  if win
    puts "#{current_player} wins!"
    exit
  end

  if board.flatten.compact.length == 9
    puts "It's a draw!"
    exit
  end

  current_player = players.next
end
```

While relatively short, you need to read through the whole script to really understand how any part of it operates. Of course, this script did not spring together fully formed, there was a thought process that drove it to this final implementation. For those curious, you can [follow my stream of consciousness notes](https://gist.github.com/24ef3c8209877c1946bb) about what I was building and why in a step by step fashion.

Seeing these notes will hopefully give you a bit of a sense of how this process might have gone if we were pair programming on this project, working in tiny iterations to push forward just a little bit farther each time. If so, you might already be catching a glimpse of what this exercise is all about. Otherwise, there is still more for us to do!

### What Happens Next?

I've placed my bad tictactoe example in a [repository on github](https://github.com/sandal/tictactoe/tree/7fd72a33aec33f75909d8c9d59a43423b0f66b24). If you'd like to participate, please fork this repository and make one change to the code at a time, leaving detailed reasoning in each commit message as to why you're making the change. Once you're happy with what you've got, post a link in the comments section on this post so others can check out what you have done.

In the next issue, I will post my own iterative set of improvements, as well as links to some reader submissions. I will also summarize the lessons that can be learned from using this technique, and provide a few suggestions for other problems to attempt in this fashion.

### Reflections

Please leave any questions, thoughts, or suggestions in the comments section below. These articles are much better when they're treated as discussions rather than monologues. 
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/035-issue-6-good-and-bad-code.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/007-good-and-bad-code.md
================================================
In Issue #6, you got to see my intentionally bad implementation of Tic Tac Toe. For today, I have promised to show you some better code and the steps I took to get there. But before we move forward, let's take a quick look back at where we started.

To start this exercise, I had challenged myself to implement this simple game without using any user defined classes or methods. Given that I wanted to make sure I produced *bad* code to start with, I got a little nervous when my back-of-the-napkin proof of concept didn't come out looking that bad. Here it is again below, for those who forgot about it.

```ruby
board = [[nil,nil,nil],
         [nil,nil,nil],
         [nil,nil,nil]]

players = [:X, :O].cycle

loop do
  current_player = players.next
  puts board.map { |row| row.map { |e| e || " " }.join("|") }.join("\n")
  print "\n>> "
  row, col = gets.split.map { |e| e.to_i }
  puts
  board[row][col] = current_player
end
```

The above code is good demo-ware, as long as you type really carefully and conveniently forget to finish a game before hitting ctrl+c. But to make a real, playable implementation, some end game conditions and basic validations are necessary. To my great joy, adding those new features caused this tight little script to explode into a hot mess of intertwined logic and nasty little hacks. Check out [the source tree](https://github.com/sandal/tictactoe/tree/7fd72a33aec33f75909d8c9d59a43423b0f66b24) that we ended up with at the end of Issue #6 to see how things turned.

While concise at less than 60 lines of code, it's pretty easy to see that this isn't the kind of software we should aspire to be writing. So the challenge was to start here and end up somewhere better.

Whenever I do this exercise with my students, there is a roadmap I follow that tends to lead to some decent insights. It roughly goes like this:

* Get some basic file structures and namespaces in place so that you get yourself out of the global namespace and open the doors for scripting some examples or running things in irb without firing off a procedure automatically.

* Break down the procedure into some separable chunks so that you can think about smaller parts of the problems, and more easily see the dependencies between the steps in the procedure.

* Re-think the design by identifying areas where objects can put an abstraction barrier between different layers of data and logic. Strive to have each bit of code do one thing and one thing well.

* Identify the leaky abstractions and dangly bits that didn't get ironed out by the last step. Aim for beautiful solutions, but be skeptical of over-engineering at this point. No problem can be modeled perfectly

* Reflect on the exercise, and ask yourself whether you've gone far enough with your cleanup. If you feel like so, then be sure to think about whether you've gone *too* far!

This is the approach I took as I worked on this problem myself, and you'll be able to see it step by step in the git logs. I tried to write good log messages, so I will link to them rather than repeat what was said, but I'll also share some more big-picture oriented thoughts as I walk you through my work.

### Basic organization first

Here is my [first commit](https://github.com/sandal/tictactoe/commit/5af96941d74f8014a3276b77fe67c17e0ed5e2df) of the evening. And this is the [source tree](https://github.com/sandal/tictactoe/tree/5af96941d74f8014a3276b77fe67c17e0ed5e2df).

Tiny changes really, but it's the first thing I do as soon as I've exited 'spike
mode' on any project, no matter how small. I've used a standard structure, and
it does two things for me:

1. Allows me to load my whole library with a single require. (See app.rb for example and note how it doesn't change throughout this walkthrough)

1. Places 100% of what I build under a single constant's namespace (i.e. `TicTacToe`)

These two points pretty much guarantee me that I won't have any naming clashes or unexpected collisions with other people's code unless I plan on loading a library that might clobber the name `TicTacToe` or the `require` path of <i>"tictactoe/*"</i>. It also makes it easy for me to start interacting with my code from scripts I write, from irb, and from unit tests. For so little work, we get a ton of benefit, and this is a great place to start when doing any sort of cleanup.

### Basic Slicing and Dicing

My next goal is to start breaking my monolithic procedure into some smaller chunks so I can get a sense of what parts go well together and how they need to interact with each other.

I start by realizing that using a singleton pattern for `Game`, while possible, isn't a great idea. A function bag approach in which we pass board and player information around like crazy also wouldn't be great, so I decide to make `Game` an ordinary class in this [commit](https://github.com/sandal/tictactoe/commit/2579626bd73fc7ad9e7d0a87419d5ecab2aacdda).

Read the message, and then if you'd like, have a look at the [updated source tree](https://github.com/sandal/tictactoe/tree/2579626bd73fc7ad9e7d0a87419d5ecab2aacdda).

I immediately make use this refactoring by breaking down the original game procedure into several smaller, simpler methods. ([commit](https://github.com/sandal/tictactoe/commit/286724de5328fda779caa500ccc76a0ad5de2bd7), [source](https://github.com/sandal/tictactoe/tree/286724de5328fda779caa500ccc76a0ad5de2bd7))

At this point, it's not uncommon for folks to think they're done refactoring. By giving things nicer names and distributing the pain points so that they're not all crammed together in one place, the code feels cleaner. But upon further investigation of this code, while perhaps understandability and organization have improved, flexibility and abstraction have not. This is what I like to call 'procedural programming with objects', and we can do better than this.

The good news is, with the code cleaned up a bit, we see where some of the pain points are. When it seems like a large amount of your code is dedicated to handling a particular concept, that means you have an object begging to be born. Our handling of the game board logic in this code is a prime example.

### Sneaking in Domain Models

A key principle of object oriented design is to do one thing and do it well. But what does that mean? Hopefully, this refactored `Board` class gives you an idea!
([commit](https://github.com/sandal/tictactoe/commit/efcbf51bcc1f7d4d094c671b60761229aec3dded), [source](https://github.com/sandal/tictactoe/tree/efcbf51bcc1f7d4d094c671b60761229aec3dded))

If you look at the `Board` class, you'll see that it takes the concept of a Tic Tac Toe board and solidifies it so that when `Game` works with it, `Board` does the heavy lifting and `Game` mostly just calls the methods it needs to get its job done. This lets `Game` forget about some of the finer points like what the individual kinds of illegal moves are, or how to compute the intersecting lines that cross through a given point. This sort of black box effect gives us some real abstraction, which is exactly why object oriented programming is as good as they say it is.

With this complex board logic out of the way and some updates to the way flow is handled in game, it's obvious that `Game` is now something like a controller, and `Board` is a model. But there are still some loose ends in `Game`, things that actually look like logic rather than just flow control and dispatch. The majority of the code you see in this class has to do with implementing a user interface and basic event loop. So, methods like `check_move`, `check_win`, and `check_draw` feel a little bit out of place, since they implement actual logic about the rules of the game rather than just how players interact with it.

Sometimes, little leaks like this aren't a big deal. In fact, the code looks reasonable to me at this point and if I were doing this for my day job and wasn't trying to get in the record books for 'World's Best Tic Tac Toe Implementation', I'd probably stop here.

But we're already cruising now, so why don't we try to shoot for the stars?

### Grail Quests

I really wanted to find a way to rip that last bit of domain logic out of `Game`, and after wrestling a little bit, I came up with something.
([commit](https://github.com/sandal/tictactoe/commit/0fef18d320af2bd1a08f5115a2b94e552205f218), [source](https://github.com/sandal/tictactoe/tree/0fef18d320af2bd1a08f5115a2b94e552205f218))

The thing I kept wrestling with was how to manage the screen output stuff. I wrestled with a bunch of ideas, including defining a simply `display()` method on `Game` like this:

```ruby
def display(message)
  puts message
end
```

The reason why I wanted this is so my Rules mixin could rely on a method that `Game` provided for display rather than directly assuming console output. But I think that what I ended up with is better.

Imagine that my `check_draw` method in Rules was written like this:

```ruby
def check_draw
  if @board.covered?
     display "It's a draw"
     game_over
  end
end
```

It's almost a trivial difference *except* that now we have a leak on the Rules side. If `TicTacToe::Game` is now meant to exclusively be a UI event loop, having the messages that are displayed to the user caught up in some module seems a bit ugly.

But instead, I chose to let `Game` fill in the blanks with an implementation like this:

```ruby
def check_draw
  if @board.covered?
     yield
     game_over
  end
end
```

This allows the draw logic to live in `Rules`, with calling code in `Game`
that looks like this:

```ruby
check_draw { puts "It's a draw" }
```

A place for everything and everything in its place! Time to go hang some banners on aircraft carriers, because well, Mission Accomplished.

### Fear, Uncertainty, and Doubt

Is this final implementation an example of good Ruby code? Yeah, probably. Is it excellent? I really have no idea. At the very least, it's almost certainly not 'The Best Tic Tac Toe Implementation Ever'.

But really, the kind of perfection I was trying to seek in this exercise is not really what we should be looking for in our day to day work. Right now I have the amps cranked up to 11, when 7 or 8 would really do fine. But as I said before, this is one of my favorite exercises for learning and teaching. Here's why: It really gets me thinking.

I'm still trying to decide on whether extracting out the `Rules` module was really necessary, and I also have some areas about this I still don't like. For example, I'm not sure whether `Board` should know more about the rules of the game, or even less. I don't like the hard coding I did of all the parameters of the game in there, but I can't put my finger on why. After all, it's very unlikely that Tic Tac Toe is suddenly going to become Chess and need to expand to an NxN board. Even if it did, wouldn't it need to change a whole lot to accommodate it?

Still, I don't like things like these constants:

```ruby
LEFT_DIAGONAL_POSITIONS  = [[0,0],[1,1],[2,2]]
RIGHT_DIAGONAL_POSITIONS = [[2,0],[1,1],[0,2]]
SPAN                     = (0..2)
CELL_COUNT               = 9
```

There is a natural connascence between all four of these values, but the code to generalize their creation would be longer and much uglier to read than the above. So maybe it's a good choice to do it this way, but it makes the mathematician in me uneasy.

Another thing I don't like about my design is `Board#to_s`, because putting presentation logic on domain logic is nasty. But to make a view object or otherwise promote one line of code to something more complex seems to be a cure that is worse than the disease.

But on the bright side of things, I really like the callback scheme for doing the bits of game logic like `check_win` and `check_draw` and passing in a block with the rendering code. This is actually a formal design pattern just hiding in a line of code, and things like that remind me of why Ruby is so beautiful.

Also, I've never used `throw` / `catch` before in real code. Never really saw why I'd need it. But at a glance, my use of it here actually seems pretty expressive and appropriate given the situation. But because I've never used it before, I'm still glancing at it sideways with considerable doubt. I even had to wrap it in a method called `game_over` to hide the throw keyword to get over my fear of its relative novelty. But now, my `game_over` method is like some sort of crazy goto call... and that makes me not so sure that this was a good idea afterall.

Oh yeah, and I also didn't write any tests while working on this code. I thought about writing them, but I felt that it'd cause me to think about the tests themselves more than the coding practices I was experimenting with. But then again, maybe if I wrote tests, I wouldn't be pondering the relative merits of my fancy `game_over()` goto.

And this is how this exercise always ends. It doesn't come together in a beautiful blossom of Ruby awesomeness, it just kind of falls off a cliff. But really, that's okay! Not every question needs to be answered, and as I said before, if this were something I was working on just to get a job done, I would happily make concessions where needed to avoid letting perfect become the enemy of the good.

Still, this sort of practice gnaws on your subconscious, and I've seen it lead to great progress in my own studies and in my students as well. Hopefully you've enjoyed seeing this process in action, and will give it a try soon if you weren't able to try it out this week.

### Submissions from our readers

I haven't had a chance to review them in depth, but a few readers did share
their own explorations with us. Check out the [github network graph](https://github.com/sandal/tictactoe/network) to see what others have done.

Looking forward to hearing your thoughts on this exercise, and whether it seems like something you could make good use of. Until next time, happy hacking!

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/036-issue-7-good-and-bad-code.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/008-uses-for-modules.md
================================================
> Note: This article series on modules is also available as a [PDF download]. The
> PDF version has been revised and is more up-to-date than what you see here.

[PDF download]:https://github.com/elm-city-craftworks/pr-monthly/blob/gh-pages/b5e5a89847701c4aa7c170cf/sept-2012-modules.pdf?raw=true

Modules are part of what makes Ruby's design beautiful. However, since they do not have a direct analogy in any mainstream programming language, it is easy to get a bit confused about what they should be used for. While most folks quickly encounter at least some of their use cases, typically only very experienced Ruby developers know their true versatilty.

In this four part article series, I aim to demystify Ruby modules by showing many practical use cases, explaining some tricky details along the way. We'll work through some of the fundamentals in the first two issues, and move into more advanced examples in the second two. Today we'll kick off this series by looking at the most simple, but perhaps most important ability modules offer us, the creation of namespaces.

### Modules for Namespacing

Imagine that you are writing an XML generation library, and in it, you have a class to generate your XML documents. Perhaps uncreatively, you choose the name `Document` for your class, creating something similar to what is shown below.

```ruby
class Document
  def generate
    # ...
  end
end
```

On its own, this seems to make a lot of sense; a user could do something simple like the following to make use of your library.

```ruby
require "your_xml_lib"
document = Document.new
# do something with document
puts document.generate
```

But imagine that you were using another library that generates PDF documents, which happens to use similar uncreative naming for its class that does the PDF document generation. Then, the following code would look equally valid.

```ruby
require "their_pdf_lib"
document = Document.new
# do something with document
puts document.generate
```

As long as the two libraries were never loaded at the same time, there would be no issue. But as soon as someone loaded both libraries, some quite confusing behavior would happen. One might think that defining two different classes with the same name would lead to some sort of error being raised by Ruby, but with open classes, that is not the case. Ruby would actually apply the definitions of `Document` one after the other, with whatever file was required last taking precedence. The end result would in all likelihood be a very broken `Document` class that could generate neither XML nor PDF.

But there is no reason for this to happen, as long as both libraries take care to namespace things. Shown below is an example of two `Document` classes that could co-exist peacefully.

```ruby
# somewhere in your_xml_lib

module XML
  class Document
    # ...
  end
end

# somewhere in their_pdf_lib

module PDF
  class Document
    # ...
  end
end
```

Using both classes in the same application is as easy, as long as you explicitly include the namespace when referring to each library's `Document` class.

```ruby
require "your_xml_lib"
require "their_pdf_lib"

# this pair of calls refer to two completely different classes
pdf_document = PDF::Document.new
xml_document = XML::Document.new
```

The clash has been prevented because each library has nested its `Document` class within a module, allowing the class to be defined within that namespace rather than at the global level. While this is a relatively straightforward concept, it's important to note a few things about what is really going on here.

Firstly, namespacing actually applies to the way constants are looked up in Ruby in general, not classes in particular. This means that it applies to modules nested within modules as well as ordinary constants as well.

```ruby
module A
  module B
  end
end

p A::B

module A
  C = 10
end

p A::C
```

Secondly, this same behavior of using modules as namespaces applies just as well to classes, as in the code below.

```ruby
class Blog
  class Comment
    #...
  end
end
```

Be sure to note that in this example, nesting a class within a class does not in any way make it a subclass or establish any relationship between `Blog` and `Blog::Comment` except that `Blog::Comment` is within the `Blog` namespace. In the example below, you can see that a class nested within another class looks the same as a class nested within a module.

```ruby
blog = Blog.new
comment = Blog::Comment.new
# ...
```

Of course, this technique is only really useful when you have a desired namespace for your library that also happens matches one of your class names. In all other situations, it makes sense to use a module for namespacing as it would prevent your users from creating instances of an empty and meaningless class.

Finally, it is important to understand that constants are looked up from the innermost nesting to the outermost, finally searching the global namespace. This can be a bit confusing at times, especially when you consider some corner cases.

For example, examine the following code:

```ruby
module FancyReporter
  class Document
    def initialize
       @output = String.new
    end

    attr_reader :output
  end
end
```

If you load this code into irb and play with a bit on its own, you can inspect an instance of Document to see that its output attribute is a core ruby `String` object, as shown below:

```ruby
>> FancyReporter::Document.new.output
=> ""
>> FancyReporter::Document.new.output.class
=> String
```

While this seems fairly obvious, it is easy for a bit of unrelated code written elsewhere to change everything. Consider the following code:

```ruby
module FancyReporter
  module String
    class Formatter
    end
  end
end
```

While the designer of `FancyReporter` was most likely trying to be well organized by offering `FancyReporter::String::Formatter`, this small change causes headaches because it changes the meaning of `String.new` in `Document`'s initialize method. In fact, you cannot even create an instance of `Document` before the following error is raised:

```ruby
?> FancyReporter::Document.new
NoMethodError: undefined method `new' for FancyReporter::String:Module
	from (irb):35:in `initialize'
	from (irb):53:in `new'
	from (irb):53
```

There are a number of ways this problem can be avoided. Often times, it's
possible to come up with alternative names that do not clash with core objects,
and when that's the case, it's preferable. In this particular case, `String.new`
can also be replaced with `""`, as nothing can change what objects are created
via Ruby's string literal syntax. But there is also an approach that works
independent of context, and that is to use explicit constant lookups from the
global namespace. You can see an example of explicit lookups in the following
code:

```ruby
module FancyReporter
  class Document
    def initialize
       @output = ::String.new
    end

    attr_reader :output
  end
end
```

Prepending any constant with `::` will force Ruby to skip the nested namespaces and bubble all the way up to the root. In this sense, the difference between `A::B` and `::A::B` is that the former is a sort of relative lookup whereas the latter is absolute from the root namespace.

In general, having to use absolute lookups may be a sign that there is an unnecessary name conflict within your application. But if upon investigation you find names that inheritently collide with one another, you can use this tool to avoid any ambiguity in your code.

While we've mostly covered the mechanics of namespacing, all this talk about `::` compels me to share a cautionary tale of mass cargoculting before we wrap up for today. Please bear with me as I stroke my beard for a moment.

### Abusing the Constant Lookup Operator (`::`)

In some older documentation, and some relatively recent code written by folks who learned from old documentation, you may see class methods being called in the manner shown below.

```ruby
YAML::load(File::read("foo.yaml"))
```

While the above code runs fine, it's only a historical accident that it does. In fact, `::` was never meant for method invocation, class methods or otherwise. You can easily demonstrate that `::` can be used to execute instance methods as well, which eliminates any notion that `::` has some special 'class methods only' distinction to it.

```ruby  
"foo"::reverse #=> "oof"
```

As far as I can tell, this style of method invocation actually came about as a documentation convention. In both formal documentation and in mailing list discussions, it can sometimes be difficult to discern whether someone is talking about a class method or instance method, since both can be called just as well with the dot operator. So, a convention was invented so that for a class `Foo`, the instance method `bar` would be referred to as `Foo#bar`, and the class method `bar` would be referred to as `Foo::bar`. This did away with the dot entirely, leaving no room for ambiguity.

Unfortunately, this lead to a confusing situation. Beginners would often type `Foo#bar` to try to call instance methods, but were at least promptly punished for doing so because such code will not run at all. However, typing `Foo::bar` does work! Thus, an entire generation of Ruby developers were born thinking that `::` is some sort of special operator for calling class methods, and to an extent, others followed suit as a new convention emerged.

The fact that `::` will happily call methods for you has to do with internal implementation details of MRI, and so it's actually an undefined behavior, subject to change. As far as I know, there is no guarantee it will actually work as expected, and so it shouldn't be relied upon.

In your code, you should feel free to replace any method calls that use this style with ordinary `Foo.bar` calls. This actually reflects more of the true nature of Ruby, in that it doesn't emphasize the difference between class level calls and instance level calls, since that distinction isn't especially important. In documentation, things are a little trickier, but it is now generally accepted that `Foo.bar` refers to a class method and `Foo#bar` refers to an instance method. In cases where that distinction alone might be confusing, you could always be explicit, as in the example below.

```ruby
obj.bar # obj is an instance of Foo
```

If this argument wasn't convincing enough, you should know that every time you replace a `Foo::bar` call with `Foo.bar`, a brand new baby unicorn is born beneath a magnificent double rainbow. That should be reason enough to reverse this outdated practice, right?

### Reflections 

This article probably gave you more details than you ever cared to know about namespacing. But future articles will be sure to blow your mind with what else modules can do. However, if you have any questions or thoughts about what we've discussed so far, feel free to leave them in the comments section below.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/037-issue-8-uses-for-modules.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/009-uses-for-modules.md
================================================
> Note: This article series on modules is also available as a [PDF download]. The
> PDF version has been revised and is more up-to-date than what you see here.

[PDF download]:https://github.com/elm-city-craftworks/pr-monthly/blob/gh-pages/b5e5a89847701c4aa7c170cf/sept-2012-modules.pdf?raw=true

### Using Mix-ins to Augment Class Definitions

Although knowing [how to use modules for namespacing](http://practicingruby.com/articles/36) is important, it's really only a small part of what you can do with modules. What modules do best is providing a convenient way to write code that can be mixed into other objects, augmenting their behaviors. Because modules facilitate code sharing in a way that is distinct from both the general OO concept of class inheritance and from things like Java's interfaces, they require you to think about your design in a way that's a bit different from most other object oriented programming languages.

While I imagine that most of our readers are comfortable with using mixins, I'll
refer to some core Ruby mixins to illustrate their power before moving on to more 
subtle points. For example, consider the following bit of code which implements lazily evaluated computations:

```ruby
class Computation

  def initialize(&block)
    @action = block
  end

  def result
    @result ||= @action.call
  end

  def <(other)
    result < other.result
  end

  def >(other)
    result > other.result
  end

  def >=(other)
    result >= other.result
  end

  def <=(other)
    result <= other.result
  end

  def ==(other)
    result == other.result
  end

end

a = Computation.new { 1 + 1 }
b = Computation.new { 4*5 }
c = Computation.new { -3 }

p a < b  #=> true
p a <= b #=> true
p b > c  #=> true
p b >= c #=> true
p a == b #=> false
```

While Ruby makes defining custom operators easy, there is a lot more code here than there needs to be. We can easily clean it up by mixing in Ruby's built in `Comparable` module.

```ruby
class Computation
  include Comparable

  def initialize(&block)
    @action = block
  end

  def result
    @result ||= @action.call
  end

  def <=>(other)
    return  0 if result == other.result
    return  1 if result > other.result
    return -1 if result < other.result
  end
end

a = Computation.new { 1 + 1 }
b = Computation.new { 4*5 }
c = Computation.new { -3 }

p a < b  #=> true
p a <= b #=> true
p b > c  #=> true
p b >= c #=> true
p a == b #=> false
```

We see that our individual operator definitions have disappeared, and in its place are two new bits of code. The first new thing is just an include statement that tells Ruby to mix the `Comparable` functionality into the `Computation` class definition. But in order to make use of the mixin, we need to tell `Comparable` how to evaluate the sort order of our `Computation` objects, and that's where `<=>` comes in.

The `<=>` method, sometimes called the spaceship operator, essentially fills in a template method that allows `Comparable` to work. It codifies the notion of comparison in an abstract manner by expecting the method to return `-1` when the current object is considered less than the object it is being compared to, `0` when the two are considered equal, and `1` when the current object is considered greater than the object it is being compared to.

If you're still scratching your head a bit, pretend that rather than being a core Ruby object, that we've implemented `Comparable` ourselves by writing the following code.

```ruby
module Comparable
  def ==(other)
    (self <=> other) == 0
  end

  def <(other)
    (self <=> other) == -1
  end

  def <=(other)
    self < other || self == other
  end

  def >(other)
    (self <=> other) == 1
  end

  def >=(other)
    self > other || self == other
  end
end
```

Now, if you imagine these method definitions literally getting pasted into your `Computation` class when `Comparable` is included, you'll see that it would provide a behavior that is functionally equivalent to our initial example.

Of course, it wouldn't make sense for Ruby to implement such a feature for us
without using it in its own structures. Because Ruby's numeric classes
all implement `<=>`, we are able to simply delegate our `<=>` call to the 
result of the computations.

```ruby
class Computation
  include Comparable

  def initialize(&block)
    @action = block
  end

  def result
    @result ||= @action.call
  end

  def <=>(other)
    result <=> other.result
  end
end
```

The only requirement for this code to work as expected is that each `Computation`'s result must implement the `<=>` method. Since all objects that mix in `Comparable` have to implement `<=>`, any comparable object returned as a result should work fine here.

While not a technically complicated example, there is surprising power in having a primitive built into your programming language which trivializes the implementation of the Template Method design pattern. If you look at Ruby's `Enumerable` module and the powerful features it offers, you might think it would be a much more complicated example to study. But it too hinges on Template Method and requires only an `each()` method to give you all sorts of complex functionality including things like `select()`, `map()`, and `inject()`. If you haven't tried it before, you should certainly try to roll your own `Enumerable` module to get a sense of just how useful mixins can be.

We can also invert this relationship by having our class define a template, and then relying on the module that we mix in to provide the necessary details. If we look back at a previous example `TicTacToe`, we can see a practical example of this technique by looking at the play method in our `TicTacToe::Game` class.

```ruby
module TicTacToe
  class Game
    def play
      catch(:finished) do
        loop do
          start_new_turn
          show_board

          check_move { |error_message| puts error_message }
          check_win { puts "#{current_player} wins" }
          check_draw { puts "It's a tie" }
        end
      end
    end

    # ...
  end
end
```

In this code, we wanted to keep our event loop abstract, and rely on a mixed in module to provide the logic for executing and validating a move as well as checking end game conditions. As a result, we ended up with the `TicTacToe::Rules` module shown below.

```ruby
module TicTacToe
  module Rules
    def check_move
      row, col = move_input
      board[row, col] = current_player
    rescue TicTacToe::Board::InvalidRequest => error
      yield error.message if block_given?
      retry
    end

    def check_win
      return false unless board.last_move

      win = board.intersecting_lines(*board.last_move).any? do |line|
        line.all? { |cell| cell == current_player }
      end

      if win
        yield
        game_over
      end
    end

    def check_draw
      if @board.covered?
        yield
        game_over
      end
    end
  end
end
```

When we look at this code, we see some basic business logic implementing the rules of Tic Tac Toe, with some placeholder hooks being provided by `yield()` that allows the calling code to inject some logic at certain key points in the process. This is how we manage to split the UI code from the game logic, without creating frivolous adapter classes.

While this is a more complicated example than our walkthrough of `Comparable`, the two share a common thread. In both cases, some coupling exists between the module and the object it is being mixed into. This is a common pattern when using mixins, in which the module and the code it is mixed into have to do a bit of a secret handshake to be able to talk to one another, but as long as they agree on that, neither needs to know about the other's inner workings. The end result is two components which must agree on an interface but do not need to necessarily understand each other's implementations. Code with this sort of coupling is easy to test and easy to refactor.

### Using Mix-ins to Augment Objects Directly

As you may already know, Ruby's mixin capability is not limited to simply including new behavior into a class definition. You can also extend the behavior of a class itself, through the use of the `extend()` method. We can look to the Ruby standard library <i>forwardable</i> for a nice example of how this is used. Consider the following trivial `Stack` implementation.

```ruby
require "forwardable"

class Stack
  extend Forwardable

  def_delegators :@data, :push, :pop, :size, :first, :empty?

  def initialize
    @data = []
  end
end
```

In this example, we can see that after we extend our `Stack` class with the `Forwardable` module, we are provided with a class level method called `def_delegators` which allows us to easily define methods which delegate to an object stored in the specified instance variable. Playing around with the `Stack` object a bit should illustrate what this code has done for us.

```ruby
>> stack = Stack.new
=> #<Stack:0x4f09c @data=[]>
>> stack.push 1
=> [1]
>> stack.push 2
=> [1, 2]
>> stack.push 3
=> [1, 2, 3]
>> stack.size
=> 3
>> until stack.empty?
>>   p stack.pop
>> end
3
2
1
```

As before, it may be helpful to think about how we might implement `Forwardable` ourselves. The following bit of code shows one way to approach the problem.

```ruby
module MyForwardable
  def def_delegators(ivar, *delegated_methods)
    delegated_methods.each do |m|
      define_method(m) do |*a, &b|
        obj = instance_variable_get(ivar)
        obj.send(m,*a, &b)
      end
    end
  end
end
```

While the metaprogramming aspects of this may be a bit noisy to read if you're not familiar with them, this is fairly vanilla dynamic Ruby code. If you've got Ruby 1.9.2 installed, you can actually try it out on your own and verify that it does indeed work as expected. But the practical use case of this code isn't what's important here.

The key thing to notice about this code is that while it essentially implements a class method, nothing in the module's syntax directly indicates this to be the case. The only hint we get that this is meant to be used at the class level is the use of `define_method()`, but we need to dig into the implementation code to notice that.

Before we wrap up, we should investigate why this is the case.

### A Brief Stroking of the Beard

The key thing to recognize is that `include()` mixes methods into the instances of the base object while `extend()` mixes methods into the base object itself. Notice that this is more general than a class method / instance method dichotomy.

Let's explore a few different possibilities using a somewhat contrived example so that we can focus on the mixin mechanics. First, we start with an ordinary module, which is somewhat useless on its own.

```ruby
module Greeter
  def hello
    "hi"
  end
end
```

By including `Greeter` into `SomeClass`, we make it so that we can now call `hello()` on instances of `SomeClass`.

```ruby
class SomeClass
  include Greeter
end

SomeClass.new.hello #=> "hi"
```

But as we saw in the `Forwardable` example, extending `AnotherClass` with `Greeter` would allow us to call the hello method directly at the class level, as in the example below.

```ruby
class AnotherClass
  extend Greeter
end

AnotherClass.hello #=> "hi"
```

Be sure to note at this point that `extend()` and `include()` are two totally
different operations. Because you did not extend `SomeClass` with `Greeter`, you
could not call `SomeClass.hello()`. Similarly, you cannot call
`AnotherClass.new.hello()` without explicitly including `Greeter`.

From the examples so far, it might seem as if `include()` is for defining instance methods, and `extend()` is for class methods. But that is not quite accurate, and the next bit of code illustrates just how much deeper the rabbit hole goes.

```ruby
obj = Object.new
obj.extend(Greeter)
obj.hello #=> "hi"
```

Before you let this example make you go cross-eyed, let's review the key point I made at the beginning of this section: <i>The key thing to recognize is that `include()` mixes methods into the instances of the base object while `extend()` mixes methods into the base object itself.</i>

Since not every base object can have instances, not every object can have modules included into them (in fact, only classes can). But *every* object can be extended by modules. This includes, among other things, classes and modules themselves.

Let's try to bring the two `extend()` examples closer together with the following little snippet:

```ruby
MyClass = Class.new
MyClass.extend(Greeter)
MyClass.hello #=> "hi"
```

If you feel like you understand the lines above, you're ready for the rest
of this mini-series. If not, please ponder the following questions and leave a
comment sharing your thoughts.

### Questions To Consider

  * Why do we have both `include()` and `extend()` available to us? Why not just have one way of doing mixins?

  * When you write `extend()` within a class definition, does it do any sort of special casing? Or is it the same as calling `extend()` on any other object?

  * Except for mixing in class methods, what is `extend()` useful for?

Please feel free to ask for hints on any of these if you're stumped, or share your answers if you'd like to help others and maybe get a bit of feedback to check your assumptions against.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/038-issue-9-uses-for-modules.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/010-uses-for-modules.md
================================================
> Note: This article series on modules is also available as a [PDF download]. The
> PDF version has been revised and is more up-to-date than what you see here.

[PDF download]:https://github.com/elm-city-craftworks/pr-monthly/blob/gh-pages/b5e5a89847701c4aa7c170cf/sept-2012-modules.pdf?raw=true

In the last two issues, we covered mixins and namespacing, two of the most common uses for modules. In the second half of this series, we'll look at some other ways to use modules that are not quite so obvious.

We can now focus on the question that caused me to write this series in the
first place. Many readers were confused by my use of `extend self` within
earlier Practicing Ruby articles, and this lead to a number of interesting
questions on the mailing list at the time these articles were originally
published. While I tried my best to answer them directly, I think we're in better
shape to study this topic now that the last two articles have laid a 
foundation for us.

### Review of how `extend()` works

To understand this trick of mixing modules into themselves, one first must understand how `extend()` works. We covered this concept at the end of the last article, but we can touch on it again for good measure. Start by considering the trivial module shown below.

```ruby
module Greeter
  def hello
    "hi"
  end
end 
```

We had shown that unlike `include()` which is especially designed for augmenting class definitions so that a mixin can add instance methods to some target class, `extend()` has a much more simple behavior and works with any object.

```ruby
obj = Object.new
obj.extend(Greeter)
obj.hello #=> "hi"
```

From this, we can see that mixing in a module by using extend simply mixes the methods defined by the module directly at that object's level. In this way, the methods defined by the module are mixed into the receiver, no matter what that object is.

In Ruby, classes and modules are ordinary objects. We can confirm this by doing a tiny bit of introspection on `Greeter`.

```ruby
>> Greeter.object_id
=> 212500
>> Greeter.class
=> Module
>> Greeter.respond_to?(:extend)
=> true
```

While this may be a mental leap for some, you might be able to find peace with it by considering the ordinary module definition syntax to be a bit of sugar that is functionally equivalent to the following bit of code.

```ruby  
Greeter = Module.new do
  def hello
    "hi"
  end
end
```

When written in this way, it becomes far more obvious that `Greeter` is actually just an instance of the class Module, making it an ordinary Ruby object at its core. Once you feel that you understand this point, consider what happens when the following line of code is run.

```ruby
Greeter.extend(Greeter)
```

If we compare this to previous examples of `extend()`, it should be clear now that despite the seemingly circular reference, this line does exactly what it would if called on any other object: It mixes the methods defined by `Greeter` directly into the `Greeter` object itself. A simple test confirms this to be true.

```ruby
Greeter.hello #=> "hi"
```

If we unravel things a bit, we find that we could have written our `extend()` call slightly differently, by doing it from within the module definition itself:

```ruby
module Greeter
  extend Greeter

  def hello
    "hi"
  end
end
```

The reason `extend()` works here is because `self == Greeter` in this context.
Noticing this detail allows us to use slightly more dynamic approach, resulting
in the following code.

```ruby
module Greeter
  extend self

  def hello
    "hi"
  end
end
```

You'll find this new code to be functionally identical to the previous example, but slightly more flexible. Now, if we change the name of our module, we won't need to update our `extend()` call. This is why folks tend to write `extend self` rather than `extend TheCurrentModule`.

Hopefully by now, it is clear that this trick does not involve any sort of special casing for modules, and is an ordinary application of the `extend()` method provided by every Ruby object. The only thing that might be confusing is the seemingly recursive nature of the technique, but this issue disappears when you recognize that modules are not mixed into anything by default, and that modules themselves are not directly related to the methods they define. If you understand the difference between class and instance methods in Ruby, this isn't a far stretch from that concept.

While the inner workings of modules are an interesting academic topic, my emphasis is always firmly set on practical applications of programming techniques rather than detached conceptual theory. So now that we've answered 'how does this work?', let's focus on the much more interesting 'how can I use it?' topic.

### Self-Mixins as Function Bags

A fascinating thing about Ruby is the wide range of different software design paradigms it supports. While object-oriented design is heavily favored, Ruby can do a surprisingly good job of emulating everything from procedure programming to prototype-based programming. But the one area that Ruby overlaps most with is functional programming.

Now, before you retire your parenthesis for good and herald Ruby as a replacement for LISP, be warned: There is a lot about Ruby's design that makes it a horrible language for functional programming. But when used sparingly, techniques from the functional world fit surprisingly well in Ruby programs. The technique I find most useful is the ability to organize related functions together under a single namespace.

When we create class definitions, we tend to think of the objects we're building as little structures which manage state and provide behaviors which manipulate that state. But sometimes, a more stateless model makes sense. The closer you get to pure mathematics, the more a pure functional model makes sense. We need to look no farther than Ruby's own `Math` module for an example:

```ruby
>> Math.sin(Math::PI/2.0)
=> 1.0
>> Math.log(Math::E)
=> 1.0
```

It seems unlikely that we'd want to create an instance of a `Math` object, since
it doesn't really deal with any state that persists beyond a single function
call. But it might be desirable to mix this functionality into another object so
that you can call math functions without repeating the `Math` constant
excessively. For this reason, Ruby implements `Math` as a module.

```ruby
>> Math.class
=> Module
```

For another great example of modular code design in Ruby itself, be sure to check out the `FileUtils` standard library, which allows you to execute basic *nix file operations as if they were just ordinary function calls.

After seeing how Ruby is using this technique, I didn't find it hard to stumble upon scenarios in my own code that could benefit from a similar design. For example, when I was working on building out the backend for a trivia website, I was given some logic for normalizing user input so that it could be compared against a predetermined pattern.

While I could have stuck this logic in a number of different places, I decided I wanted to put it within a module of its own, because its logic did not rely on any persistent state and could be defined independently of the way our questions and quizzes were modeled. The following code is what I came up with:

```ruby
module MinimalAnswer
  extend self

  def match?(pattern, input)
    pattern.split(/,/).any? do |e| 
      normalize(input) =~ /\b#{normalize(e)}/i 
    end
  end

  private

  def normalize(input)
    input.downcase.strip.gsub(/\s+/," ").gsub(/[?.!\-,:'"]/, '')
  end
end
```

The nice thing about the code above is that using a modular design doesn't force you to give up things like private methods. This allows you to keep your user facing API narrow while still being able to break things out into helper methods.

Here is a simple example of how my `MinimalAnswer` module is used within the application:

```ruby
>> MinimalAnswer.match?("Cop,Police Officer", "COP")
=> true
>> MinimalAnswer.match?("Cop,Police Officer", "police officer")
=> true
>> MinimalAnswer.match?("Cop,Police Officer", "police office")
=> false
>> MinimalAnswer.match?("Cop,Police Officer", "police officer.")
=> true
```

Now as I said before, this is a minor bit of functionality and could probably be shelved onto something like a `Question` object or somewhere else within the system. But the downside of that approach would be that as this `MinimalAnswer` logic began to get more complex, it would begin to stretch the scope of whatever object you attached this logic to. By breaking it out into a module right away, we give this code its own namespace to grow in, and also make it possible to test the logic in isolation, rather than trying to bootstrap a potentially much more complex object in order to test it.

So whenever you have a bit of logic that seems to not have many state dependencies between its functions, you might consider this approach. But since stateless code is rare in Ruby, you may wonder if learning about self-mixins really bought us that much.

As it turns out, the technique can also be used in more stateful scenarios when you recognize that Ruby modules are objects themselves, and like any object, can contain instance data.

### Self-Mixins for Implementing Singleton Pattern

Ruby overloads the term 'singleton object', so we need to be careful about terminology here. What I'm about to show you is how to use these self-mixed modules to implement something similar to the [Singleton design pattern](http://en.wikipedia.org/wiki/Singleton_pattern).

I've found in object design that objects typically need zero, one, or many instances. When an object doesn't really need to be instantiated at all because it has no data in common between its behaviors, the modular approach we just reviewed often works best. The vast majority of the remaining cases fall into ordinary class definitions which facilitate many instances. Virtually everything we model fits into this category, so it's not worth discussing in detail. However, there are some cases in which a single object is really all we need. In particular, configuration systems come to mind.

The following example shows a simple DSL I wrote for the trivia application I had mentioned earlier. It may look familiar, and that is because it appeared in our discussion on writing configuration systems some weeks ago. This time around, our focus will be on how this system actually works rather than what purpose it serves.

```ruby
AccessControl.configure do
  role "basic",
    :permissions => [:read_answers, :answer_questions]

  role "premium",
    :parent      => "basic",
    :permissions => [:hide_advertisements]

  role "manager",
    :parent      => "premium",
    :permissions => [:create_quizzes, :edit_quizzes]

  role "owner",
    :parent      => "manager",
    :permissions => [:edit_users, :deactivate_users]
end 
```

To implement code that allows the definitions above to be modeled internally, we need to consider how this system will be used. While it is easy to imagine roles shifting over time, getting added and removed as needed, it's hard to imagine what the utility of having more than one `AccessControl` object would be.

For this reason, it's safe to say that `AccessControl` configuration data is global information, and so does not need the data segregation that creating instances of a class provides.

By modeling `AccessControl` as a module rather than class, we end up with an object that we can store data on that can't be instantiated.

```ruby
module AccessControl
  extend self

  def configure(&block)
    instance_eval(&block)
  end

  def definitions
    @definitions ||= {}
  end

  # Role definition omitted, replace with a stub if you want to test
  # or refer to Practicing Ruby Issue #4
  def role(level, options={})
    definitions[level] = Role.new(level, options)
  end

  def roles_with_permission(permission)
    definitions.select { |k,v| v.allows?(permission) }.map { |k,_| k }
  end

  def [](level)
    definitions[level]
  end 
end
```

There are two minor points of potential confusion in this code worth discussing, the first is the use of `instance_eval` in `configure()`, and the second is that the `definitions()` method refers to instance variables. This is where we need to remind ourselves that the scope of methods defined by a module cannot be determined until it is mixed into something.

Once we recognize these key points, a bit of introspection shows us what is really going on.

```ruby
>> AccessControl.configure { "I am #{self.inspect}" }
=> "I am AccessControl"
>> AccessControl.instance_eval { "I am #{self.inspect}" }
=> "I am AccessControl"
>> AccessControl.instance_variables
=> ["@definitions"]
```

Since `AccessControl` is an ordinary Ruby object, it has ordinary instance variables and can make use of `instance_eval` just like any other object. The key difference here is that `AccessControl` is a module, not a class, and so cannot be used as a factory for creating more instances. In fact, calling `AccessControl.new` raises a `NoMethodError`.

In a traditional implementation of Singleton Pattern, you have a class which disables instantiation through the ordinary means, and creates a single instance that is accessible through the class method `instance()`. However, this seems a bit superfluous in a language in which classes are full blown objects, and so isn't necessary in Ruby.

For cases like the configuration system we've shown here, choosing to use this approach is reasonable. That having been said, the reason why I don't have another example that I can easily show you is that with the exception of this narrow application for configuration objects, I find it relatively rare to have a legitimate need for the Singleton Pattern. I'm sure if I thought long and hard on it, I could dig some other examples up, but upon looking at recent projects I find that variants of the above are all I use this technique for.

However, if you work with other people's code, it is likely that you'll run into someone implementing Singleton Pattern this way. Now, rather than scratching your head, you will have a solid understanding of how this technique works, and why someone might want to use it.

### Reflections

In Issue 11, we'll wrap up with some even more specialized uses for modules, showing how they can be used to build plugin systems as well as how they can be used as a replacement for monkey patching. But before we close the books on today's lesson, I'd like to share some thoughts that were rattling around in the back of my mind while I was preparing this article.

The techniques I've shown today can be useful in certain edge case scenarios
where an ordinary class definition might not be the best tool to use. In my own
code, I tend to use the first technique of creating function bags often but sparingly, 
and the second technique of building singleton objects rarely and typically only 
for configuration systems.

Upon reflection, I wonder to myself whether the upsides of these techniques outweigh the cost of explaining them. I don't really have a definitive answer to that question, but it's really something I think about often.

On the one hand, I feel that users of Ruby should have an ingrained understanding of its object system. After all, these are actually fairly straightforward techniques once you understand how things work under the hood. It's also true that you can't really claim to understand Ruby's object system without fully understanding these examples. Having a weak understanding of how Ruby's objects work is sure to rob you of the joy of working in Ruby, so for this reason, I feel like 'dumbing down' our code would be a bad thing.

On the other hand, I think that for the small gains yielded by using these techniques, we require those who are reading our code to understand a whole score of details that are unique to Ruby. When you consider that by changing a couple lines of code, you can have a design which is not much worse but is understandable by pretty much anyone who has programmed in an OO language before, it's certainly tempting to cater to the lowest common denominator.

But this sort of split-mindedness is inevitable in Ruby, and comes up in many scenarios. The truth of the matter is that it's going to take many more years before Ruby is truly understood by the programming community at large. But as more people dive deeper into Ruby, Ruby is starting to come into its own, and the mindset that things should be done as they are in other languages is not nearly as common as it was several years ago. For this reason, it's important to stop thinking of Ruby in terms of whatever language you've come from, and start thinking of it as its own thing. As soon as you do that, a whole range of possibilities open up.

At least, that's what I think. What about you?

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/040-issue-10-uses-for-modules.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/010a-uses-for-modules.md
================================================
> Note: This article series on modules is also available as a [PDF download]. The
> PDF version has been revised and is more up-to-date than what you see here.

[PDF download]:https://github.com/elm-city-craftworks/pr-monthly/blob/gh-pages/b5e5a89847701c4aa7c170cf/sept-2012-modules.pdf?raw=true

In the [last issue](http://practicingruby.com/articles/38), we discussed the use of `extend self` in great detail, but neglected to cover a pair of alternatives that seem on the surface to be functionally equivalent. While I don't want to spend too much time rehashing an old topic, I want to at least provide an example of each approach and comment on their quirks.

### Defining methods at the module level

Occasionally folks ask whether mixing a module into itself via `extend()` is equivalent to the code shown below.

```ruby
module Greeter
  def self.hello
    "hi"
  end
end
```

The short answer to that question is "no", but it is easy to see where the confusion comes from, because calling `Greeter.hello` does indeed work as expected. But the important distinction is that methods defined in this way are simply directly defined on the module itself and so cannot be mixed into anything at all. There is really very little difference between the above code and the example below.

```ruby  
obj = Object.new

def obj.hello
  "hi"
end
```

Consider our earlier example of Ruby's `Math` or `FileUtils` modules. With both of these modules, you can envision scenarios in which you would call the functions on the modules themselves. But there are also cases where using these modules as mixins would make a lot of sense. For example, Ruby itself ships with a math mode (-m) for irb which mixes in the `Math` module at the top level so you can call its functions directly.

```ruby
$ irb -m
>> sin(Math::PI/2)
=> 1.0
```

In the above example, if `sin()` were implemented by defining the method
directly on the `Math` module, there would be no way to mix it into anything.
While sometimes it might make sense to force a module to never be used as a
mixin, that use case is rare, and so little is gained by defining methods on
modules rather than using the `extend self` technique.

### Using `module_function`

Before people got in the habit of mixing modules into themselves, they often relied on a more specialized feature called `module_function` to accomplish the same goals.

```ruby
module Greeter
  module_function

  def hello
    "hi"
  end
end
```

This code allows the direct calling of `Greeter.hello`, and does not prevent
`Greeter` from being mixed into other objects. The `module_function` approach
also allows you to choose certain methods to be module functions while 
leaving others accessible via mixin only:

```ruby
module Greeter
  def hello
    "hi"
  end

  def goodbye
    "bye"
  end

  module_function :hello
end
```

With this modified definition, it is still possible to call `Greeter.hello`, but attempting to call `Greeter.goodbye` would raise a `NoMethodError`. This sort of sounds like it offers the benefits of extending a module with itself, but with some added granularity. Unfortunately, there is something about `module_function` that makes it quite weird to work with.

As it turns out, `module_function` works very different under the hood than self-mixins do. This is because `module_function` actually doesn't manipulate the method lookup path, but instead, it makes a direct copy of the specified methods and attaches them to the module itself. If that sounds too weird to be true, check out the code below.

```ruby 
module Greeter
  def hello
    "hi"
  end

  module_function :hello

  def hello
    "howdy"
  end
end

Greeter.hello #=> "hi"

class Foo
  include Greeter
end

Foo.new.hello #=> "howdy"
```

Pretty weird behavior, right? You may find it interesting to know that I was not actually aware that `module_function` made copies of methods until I wrote Issue #10 and was tipped off about this by one of our readers. However, I did know about one of the consequences of `module_function` being implemented in this way: private methods cannot be used in conjunction with `module_function`. That means that the following example cannot be literally translated to use `module_function`.

```ruby
module MinimalAnswer
  extend self

  def match?(pattern, input)
    pattern.split(/,/).any? do |e|
      normalize(input) =~ /\b#{normalize(e)}/i
    end
  end

  private

  def normalize(input)
    input.downcase.strip.gsub(/\s+/," ").gsub(/[?.!\-,:'"]/, '')
  end
end 
```

From these examples, we see that `module_function` is more flexible than defining methods directly on your modules, but not nearly as versatile as extending a module with itself. While the ability to selectively define which methods can be called directly on the module is nice in theory, I've yet to see a use case for it where it would lead to a much better design.

### Reflections

With the alternatives to `extend self` having unpleasant quirks, it's no surprise that they're quickly falling out of fashion in the Ruby world. But since no technical decision should be made based on dogma or a blind-faith acceptance of community conventions, these notes hopefully provide the necessary evidence to help you make good design decisions on your own.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/041-issue-10.5-uses-for-modules.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/011-uses-for-modules.md
================================================
> Note: This article series on modules is also available as a [PDF download]. The
> PDF version has been revised and is more up-to-date than what you see here.

[PDF download]:https://github.com/elm-city-craftworks/pr-monthly/blob/gh-pages/b5e5a89847701c4aa7c170cf/sept-2012-modules.pdf?raw=true

Today we're going to wrap up this series on modules by looking at how mixins can be useful for implementing custom behavior on individual objects. In particular, we'll be looking at how modules can be used both as a replacement for monkey patching, as well as for constructing systems that can be extended without the need for monkey patching. While neither of these techniques are going to be something you'll use every day, they really come in handy when you run into a situation that calls for them.

### Modules instead of Monkey Patches

Back in the bad old days before Prawn, I was working on a reporting framework called Ruby Reports (Ruport), which generated PDF reports via `PDF::Writer`. At the time, `PDF::Writer` was quite buggy, and essentially abandoned, but was the only game in town when it came to PDF generation.

One of the bugs was something fairly critical: Memory consumption for outputting simple PDF tables would balloon like crazy, causing a document with more than a few pages to take anywhere from several minutes to several *hours* to run.

The original author of the library had a patch laying around that inserted a hook which did some caching that greatly reduced the memory consumption, but he had not tested it extensively and did not want to want to cut a release. I had talked to him about possibly monkey patching `PDF::Document` in Ruport's code to add this patch, but together, we came up with a better solution: wrap the patch in a module.

```ruby
module PDFWriterMemoryPatch
  unless self.class.instance_methods.include?("_post_transaction_rewind")
    def _post_transaction_rewind
      @objects.each { |e| e.instance_variable_set(:@parent,self) }
    end
  end
end
```

In Ruport's PDF formatter code, we did something like the following to apply our patch:

```ruby
@document = PDF::Document.new
@document.extend(Ruport::PDFWriterMemoryPatch)
```

Throughout our application, whenever someone interacted with a `PDF::Document` instance we created, they had a patched instance that fixed the memory leak. This meant from the Ruport user's perspective, the bug was fixed. So what makes this different from monkey patching?

Because we were only manipulating the individual objects that we created in our library, we were not making a global change that might surprise people. For example if someone was building an application that only implicitly loaded Ruport as a dependency, and they created a `PDF::Document` instance, our patch would not be loaded. This prevented us from causing unexpected behavior in any code that lived outside of Ruport itself.

While this approach didn't shield us from the risks that a future change to `PDF::Writer` could potentially break our patch in Ruport, it did prevent any risk of global consequences. Anyone who's ever spent a day scratching their head because of some sloppy monkey patch in a third party dependency will immediately be able to see the value of this sort of isolation.

The neat thing is that a similar approach can be used for core extensions as
well. Rather than re-opening Ruby core classes, you can imbue individual
instances with custom behavior, getting many of the benefits of monkey patching
without the disadvantages. For example, suppose you want to add the `sum)()` and
`average()` methods to Array. If we were monkey patching, we'd write something
like the following code:

```ruby
class Array
  def sum
    inject(0) { |s,e| s + e }
  end

  def average
    sum.to_f / length
  end
end

obj = [1,3,5,7]
obj.sum     #=> 16
obj.average #=> 4
```

The danger here of course is that you'd be globally stomping anyone else's definition of `sum()` and `average()`, which can lead to ugly conflicts. All these problems can be avoided with a minor modification.

```ruby
module ArrayMathHelpers
  def sum
    inject(0) { |s,e| s + e }
  end

  def average
    sum.to_f / length
  end
end

obj = [1,3,5,7]
obj.extend(ArrayMathHelpers)
obj.sum     #=> 16
obj.average #=> 4
```

By explicitly mixing in the `ArrayMathHelpers` module, we isolate our changes just to the objects we've created ourselves. With slight modification, this technique can also be used with objects passed into functions, typically by making a copy of the object before working on it.

Because modules mixed into an instance of an object are looked up before 
the methods defined by its class, 
you can actually use this technique for modifying existing behavior of an object as well. 
The example below demonstrates modifying `<<` on strings so that it allows appending 
arbitrary objects to a string through coercion.

```ruby
module LooseStringAppend
  def <<(value)
    super
  rescue TypeError
    super(value.to_s)
  end
end

a = "foo"
a.extend(LooseStringAppend)
a << :bar << :baz #=> "foobarbaz"
```

Of course this (like most core modifications), is a horrible idea. But speaking as a pure technique, this is far better than the alternative global monkey patch shown below:

```ruby
class String
  alias_method :old_append, :<<
  
  def <<(value)
    old_append(value)
  rescue TypeError
    old_append(value.to_s)
  end
end
```

When using per-object mixins as an alternative to monkey patching, what you gain is essentially two things: A first class seat in the lookup path allowing you to make use of `super()`, and isolation on a per-object behavior so that consumers of your code don't curse you for patching things in unexpected ways. While this approach isn't always available, it is definitely preferable whenever you can choose it over monkey patching.

In Ruby 2.0, we may end up with even better option for this sort of thing called refinements, which are also module based. But for now, if you must hack other people's objects, this approach is a civil way to do it.

We'll now take a look at how to produce libraries and applications that actively encourage extensions to be done this way.

### Modules as Extension Points

This last section is not so much about practical advice as it is about taking what we've learned so far and really stretching it as far as possible into new territories. In essence, what follows are my own experiments with ideas that I'm not fully sure are good, but find interesting enough to share with you.

In previous Practicing Ruby issues, I've shown some code from a command line client we've used for time tracking in my consulting work. The tool itself never quite matured far enough to be release ready, but I used it as a testing ground for new design ideas, so it is a good conversation starter at least.

Today, I want to show how we implemented commands for it. Essentially, I want to walk through what happens when someone types the following command into their console:

```ruby
$ turbine start
Timer started at Wed Dec 15 17:55:37 -0500 2010
```

Because we knew this tool would evolve over time, we wanted to make it as hackable as possible. To do this, we set up a system in which commands get installed into a hidden folder in each project, making it trivial to modify existing commands or add new ones. Here's a quick directory listing to show what that structure looks like:

```ruby
$ ls .turbine/commands/standard/
add.rb		project.rb	rewind.rb	status.rb commit.rb push.rb		
staged.rb	stop.rb drop.rb	reset.rb start.rb
```

As you might expect, start.rb defines the start command. Here's what its source
looks like:

```ruby
Turbine::Application.extension(:start_command) do
  def start
    timer = Turbine::Timer.new
    if timer.running?
      prompt.say "Timer already started, please stop or rewind first"
    else
      timer.write_timestamp
      prompt.say "Timer started at #{Time.now}"
    end
  end
end
```

You'll notice that all our commands are direct mappings to method
calls, which are responsible for doing all the work. While I've simplified the
following definition to remove some domain specific callbacks and options 
parsing, the following example shows the basic harness which registers 
Turbine's commands:

```ruby
module Turbine
  class Application
    def self.extensions
      @extensions ||= {}
    end

    def self.extension(key, &block)
      extensions[key] = Module.new(&block)
    end

    def initialize
      self.class.extensions.each do |_, extension|
        extend(extension)
      end
    end
  
    def run(command)
      send(command)
    end
  end
end
```

From this, we see that `Turbine::Application` stores a Hash of anonymous modules
which are created on the fly whenever the `extension()` is called. The
interesting thing about this design is that the commands aren't applied globally
to `Turbine::Application`, but instead, are mixed in at the instance level. This
approach allows us to selectively disable features, or completely replace them 
with alternative implementations.

For example, consider a custom command that gets loaded after the standard commands, which is implemented like this:

```ruby
Turbine::Application.extension(:start_command) do
  def go
    puts "Let's go!"
  end
end
```

Because the module defining the `go()` method would replace the original module in the extensions hash, the original module ends up getting completely wiped out. In retrospect, for my particular use case, this approach seems to be like using a thermonuclear weapon where a slingshot would do, but you can't argue that this fails to take extensibility to whole new limits.

Eventually, when someone falls off the deep end in their study of modules, they ask 'is it possible to uninclude them?', and the short answer to that question is "No", promptly followed up with "Why would you want to do that?". But what we've shown here is a good approximation for unincluding a module, even if we haven't quite figured out the answer to the 'why' part yet.

But sometimes, we have to explore just for the fun of it, right? :)

### Reflections

I have had a blast writing to you all about modules and answering your questions as they come up. Unfortunately, the topic is even bigger than I thought, and there are at least two full articles I could write on the topic,which might actually be more practical and immediately relevant than the materials I've shared today. In particular, we didn't cover things like the `included()` and `extended()` hooks, which can be quite useful and are worth investigating on your own.

Moving forward, my goals for Practicing Ruby are to be able to hit a wide range of topics, so we'll probably move away from the fundamentals of Ruby's object system and go back to some more problem-solving oriented topics in the coming weeks. But if you like this kind of format, please let me know.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/043-issue-11-uses-for-modules.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/012-rapid-prototyping.md
================================================
Ruby makes it easy to quickly put together a proof-of-concept for almost any kind of project, as long as you have some experience in rapid application development. In this article, I will go over how I build prototypes, sharing the tricks that have worked well for me.

Today we'll be walking through a bit of code that implements a small chunk of a falling blocks game that is similar to Tetris. If you're not familiar with Tetris, head over to [freetetris.org](http://freetetris.org) and play it a bit before reading this article.

Assuming you're now familiar with the general idea behind the game, I'll walk you through the thought process that I went through from the initial idea of working on a falling blocks game to the small bit of code I have written for this issue.

### The Planning Phase

After running through a few ideas, I settled on a falling blocks game as a good example of a problem that's too big to be tackled in a single sitting, but easy enough to make some quick progress on.

The next step for me was to come up with a target set of requirements for my
prototype. To prevent the possibilities from seeming endless, I had to set a
time limit up front to make this decision making process easier. Because 
very small  chunks of focused effort can get you far in Ruby, I settled on
coming up with something I felt I could build within an hour or two.

I knew right away this meant that I wasn't going to make an interactive demo. Synchronizing user input and screen output is something that may be easy for folks who do it regularly, but my concurrency knowledge is very limited, and I'd risk spending several hours on that side of things and coming up empty if I went down that path. Fortunately, even without an event loop, there are still a lot of options for building a convincing demo.

In my initial optimism, I thought what I'd like to be able to do is place a piece on the screen, and then let gravity take over, eliminating any completed lines as it fell into place. But this would require me to implement collision detection, something I didn't want to tackle right away.

Eventually, I came up with the idea of just implementing the action that happens when a piece collides with the junk on the grid. This process involved turning the active piece into inactive junk, and then removing any completed rows from the grid. This is something that I felt fit within the range of what I could do within an hour or two, so I decided to sleep on it and see if any unknowns bubbled up to the surface.

I could have just started hacking right away, but ironically that's a practice I typically avoid when putting together rapid prototypes. If this were a commercial project and I quoted the customer 2-4 hours, I'd want to use their money in the best possible way, and picking the wrong scope for my project would be a surefire way to either blow the budget or fail to produce something interesting. I find a few hours of passive noodling helps me see unexpected issues before they bite me.

Fortunately, this idea managed to pass the test of time, and I set out to begin coding by turning the idea into a set of requirements.

### The Requirements Phase

A good prototype does not come from a top-down or bottom-up design, but instead comes from starting in the middle and building outwards. By taking a small vertical slice of the problem at hand, you are forced to think about many aspects of the system, but not in a way that requires you consider the whole problem all at once. This allows most of your knowledge and often a good chunk of your code to be re-used when you approach the full project.

The key is to start with a behavior the user can actually observe. This means that you should be thinking in terms of features rather than functions and objects. Some folks use story frameworks such as Cucumber to help them formalize this sort of inside-out thinking, but personally, I prefer just to come up with a good, clear example and not worry about shoehorning it into a formal setting.

To do this, I created a simple text file filled with ascii art that codified two cases: One in which a line was cleared, and where no lines were cleared. Both cases are shown below.


### CASE 1: REMOVING COMPLETED LINES

```
==========
           
           
   #       
   #|    | 
  |#||  ||
|||#||||||
==========
```

BECOMES:

```
==========
           
           
   |       
   ||    | 
  ||||  ||
==========
```

### CASE 2: COLLISION WITHOUT ANY COMPLETED LINES

```
==========
           
           
  #       
  ##|    |
  |#||  ||
||| ||||||
==========
```

BECOMES:

```
==========
           
           
  |       
  |||    | 
  ||||  ||
||| ||||||
==========
```

---------------------------------------------------------------------

With the goals for the prototype clearly outlined, I set out to write a simple program that would perform the necessary transformations.

### The Coding Phase

One thing I'll openly admit is that when prototyping something that will take me less than a half day from end to end, I tend to relax my standards on both testing and writing clean code. The reason for this is that when I'm trying to take a nose-dive into a new problem domain, I find my best practices actually get in the way until I have at least a basic understanding of the project.

What I'll typically do instead is write a single file that implements both the objects I need and an example that gets me closer to my goal. For this project, I started with a canvas object for rendering output similar to what I outlined in my requirements.

Imagining this canvas object already existed, I wrote some code for generating the very first bit out output we see in the requirements.

```ruby
canvas = FallingBlocks::Canvas.new

(0..2).map do |x|
  canvas.paint([x,0], "|")
end

canvas.paint([2,1], "|")

(0..3).map do |y|
  canvas.paint([3,y], "#")
end

(4..9).map do |x|
  canvas.paint([x,0], "|")
end

[4,5,8,9].map do |x|
  canvas.paint([x,1], "|")
end

canvas.paint([4,2], "|")
canvas.paint([9,2], "|")

puts canvas 
```

While I use a few loops for convenience, it's easy to see that this code does little more than put symbols on a text grid at the specified (x,y) coordinates. Once `FallingBlocks::Canvas` is implemented, we'd expect the following output from this example:

```
==========
           
           
   #       
   #|    | 
  |#||  ||
|||#||||||
==========
```

What we have done is narrowed the problem down to a much simpler task, making it easier to get started. The following implementation is sufficient to get the example working, and is simple enough that we probably don't need to discuss it further.

```ruby
module FallingBlocks
  class Canvas
    SIZE = 10

    def initialize
      @data = SIZE.times.map { Array.new(SIZE) }
    end

    def paint(point, marker)
      x,y = point
      @data[SIZE-y-1][x] = marker
    end

    def to_s
      [separator, body, separator].join("\n")
    end

    def separator
      "="*SIZE
    end

    def body
      @data.map do |row|
        row.map { |e| e || " " }.join
      end.join("\n")
    end
  end
end
```

However, things get a little more hairy once we've plucked this low hanging fruit. So far, we've built a tool for painting the picture of what's going on, but that doesn't tell us anything about the underlying structure. This is a good time to start thinking about what Tetris pieces are.

While a full implementation of the game would require implementing rotations and movement, our prototype looks at pieces frozen in time. This means that a piece is really just represented by a collection of points. If we define each piece based on an origin of [0,0], we end up with something like this for a vertical line:

```ruby
line = FallingBlocks::Piece.new([[0,0],[0,1],[0,2],[0,3]])
```

Similarly, a bent S-shaped piece would be defined like this:

```ruby
bent = FallingBlocks::Piece.new([[0,1],[0,2],[1,0],[1,1]])
```

In order to position these pieces on a grid, what we'd need as an anchor point that could be used to translate the positions occupied by the pieces into another coordinate space.

We could use the origin at [0,0], but for aesthetic reason, I didn't like the mental model of grasping a piece by a position that could potentially be unoccupied. Instead, I decided to define the anchor as the top-left position occupied by the piece, which could later be translated to a different position on the canvas. This gives us an anchor of [0,3] for the line, and an anchor of [0,2] for the bent shape. I wrote the following example to outline how the API should work.

```ruby 
line = FallingBlocks::Piece.new([[0,0],[0,1],[0,2],[0,3]])
p line.anchor #=> [0,3]

bent = FallingBlocks::Piece.new([[0,1],[0,2],[1,0],[1,1]])
p bent.anchor #=> [0,2]
```

Once again, a simple example gives me enough constraints to make it easy to write an object that implements the desired behavior.

```ruby
class Piece
  def initialize(points)
    @points = points
    establish_anchor
  end

  attr_reader :points, :anchor

  # Gets the top-left most point
  def establish_anchor
    @anchor = @points.max_by { |x,y| [y,-x] }
  end
end
```

As I was writing this code, I stopped for a moment and considered that this logic, as well as the logic written earlier that manipulates (x,y) coordinates to fit inside a row-major data structure are the sort of things I really like to write unit tests for. There is nothing particularly tricky about this code, but the lack of tests makes it harder to see what's going on at a glance. Still, this sort of tension is normal when prototyping, and at this point I wasn't even 30 minutes into working on the problem, so I let the feeling pass.

The next step was to paint these pieces onto the canvas, and I decided to start
with their absolute coordinates to verify my shape definitions. The following example 
outlines the behavior I had expected.

```ruby
canvas = FallingBlocks::Canvas.new

bent_shape = FallingBlocks::Piece.new([[0,1],[0,2],[1,0],[1,1]])
bent_shape.paint(canvas)

puts canvas
```

OUTPUTS:

```
==========
          
          
#         
##        
 #        
==========
```

Getting this far was easy, the following definition of `Piece` does the trick:

```ruby
class Piece
   SYMBOL = "#"

  def initialize(points)
    @points = points
    establish_anchor
  end

  attr_reader :points, :anchor

  # Gets the top-left most point
  def establish_anchor
    @anchor = @points.min_by { |x,y| [y,-x] }
  end

  def paint(canvas)
    points.each do |point|
      canvas.paint(point, SYMBOL)
    end
  end
end
```

This demonstrates to me that the concept of considering pieces as a collection of points can work, and that my basic coordinates for a bent piece are right. But since I need a way to translate these coordinates to arbitrary positions of the grid for this code to be useful, this iteration was only a stepping stone. A new example pushes us forward.

```ruby
canvas = FallingBlocks::Canvas.new

bent_shape = FallingBlocks::Piece.new([[0,1],[0,2],[1,0],[1,1]])

canvas.paint_shape(bent_shape, [2,3])

puts canvas
```

OUTPUTS

```
==========
          
          
  #       
  ##      
   #      
          
==========
```

As you can see in the code above, I decided that my `Piece#paint` method was probably better off as `Canvas#paint_shape`, just to collect the presentation logic in one place. Here's what the updated code ended up looking like.

```ruby
class Canvas
 # ...

 def paint_shape(shape, position)
   shape.translated_points(position).each do |point|
     paint(point, Piece::SYMBOL)
   end
 end
end
```

This new code does not rely directly on the `Piece#points` method anymore, but instead, passes a position to the newly created `Piece#translated_points` to get a set of coordinates anchored by the specified position.

```ruby
class Piece
  #...
  
  def translated_points(new_anchor)
    new_x, new_y = new_anchor
    old_x, old_y = anchor

    dx = new_x - old_x
    dy = new_y - old_y
    
    points.map { |x,y| [x+dx, y+dy] }
  end
end
```

While this mapping isn't very complex, it's yet another point where I was
thinking 'gee, I should be writing tests', and a couple subtle bugs that
cropped up while implementing it confirmed my gut feeling. But with the light
visible at the end of the tunnel, I wrote an example to unify piece objects 
with the junk left on the grid from previous moves.

```ruby
game = FallingBlocks::Game.new
bent_shape = FallingBlocks::Piece.new([[0,1],[0,2],[1,0],[1,1]])
game.piece = bent_shape
game.piece_position = [2,3]
game.junk += [[0,0], [1,0], [2,0], [2,1], [4,0],
              [4,1], [4,2], [5,0], [5,1], [6,0],
              [7,0], [8,0], [8,1], [9,0], [9,1],
              [9,2]]

puts game
```

OUTPUTS:

```
==========


  #
  ##|    |
  |#||  ||
||| ||||||
==========
```

The key component that tied this all together is the `Game` object, which essentially is just a container that knows how to use a `Canvas` object to render itself.

```ruby
class Game
  def initialize
    @junk = []
    @piece = nil
    @piece_position = []
  end

  attr_accessor :junk, :piece, :piece_position

  def to_s
    canvas = Canvas.new

    junk.each do |pos|
      canvas.paint(pos, "|")
    end

    canvas.paint_shape(piece, piece_position, "#")

    canvas.to_s
  end
end
```

I made a small change to `Canvas#paint_shape` so that the symbol used to display pieces on the grid was parameterized rather than stored in `Piece::SYMBOL`. This isn't a major change and was just another attempt at moving display code away from the data models.

After all this work, we've made it back to the output we were getting out of our first example, but without the smoke and mirrors. Still, the model is not as solid as I'd hoped for, and some last minute changes were needed to bridge the gap before this code was ready to implement the two use cases I was targeting.

Since the last iteration would be a bit cumbersome to describe in newsletter form, please just "check out my final commit":http://is.gd/jbvdB for this project on github. With this new code, it's possible to get output identical to our target story through the following two examples.

### CASE 1: line_shape_demo.rb

```ruby
require_relative "falling_blocks"

game = FallingBlocks::Game.new
line_shape = FallingBlocks::Piece.new([[0,0],[0,1],[0,2],[0,3]])
game.piece = line_shape
game.piece_position = [3,3]
game.add_junk([[0,0], [1,0], [2,0], [2,1], [4,0],
              [4,1], [4,2], [5,0], [5,1], [6,0],
              [7,0], [8,0], [8,1], [9,0], [9,1],
              [9,2]])

puts game

puts "\nBECOMES:\n\n"

game.update_junk
puts game
```

### CASE 2: bended_shape_demo.rb

```ruby
require_relative "falling_blocks"

game = FallingBlocks::Game.new
bent_shape = FallingBlocks::Piece.new([[0,1],[0,2],[1,0],[1,1]])
game.piece = bent_shape
game.piece_position = [2,3]
game.add_junk([[0,0], [1,0], [2,0], [2,1], [4,0],
              [4,1], [4,2], [5,0], [5,1], [6,0],
              [7,0], [8,0], [8,1], [9,0], [9,1],
              [9,2]])

puts game

puts "\nBECOMES:\n\n"

game.update_junk
puts game
```

### Reflections

Once I outlined the story by drawing some ascii art, it took me just over 1.5 hours to produce working code that performs the transformations described. Overall, I'd call that a success.

That having been said, working on this problem was not without hurdles. While it turns out that removing completed lines and turning pieces into junk upon collision is surprisingly simple, I am still uneasy about my final design. It seems that there is considerable duplication between the grid maintained by `Game` and the `Canvas` object. But a refactoring here would be non-trivial, and I wouldn't want to attempt it without laying down some tests to minimize the amount of time hunting down subtle bugs.

For me, this is about as far as I can write code organically in a single sitting without either writing tests, or doing some proper design in front of whiteboard, or a combination of the two. I think it's important to recognize this limit, and also note that it varies from person to person and project to project. The key to writing a good prototype is getting as close to that line as you can without flying off the edge of a cliff.

In the end though, what I like about this prototype is that it isn't just an illusion. With a little work, it'd be easy enough to scale up to my initial ambition of demonstrating a free falling piece. By adding some tests and doing some refactoring, it'd be possible to evolve this code into something that could be used in production rather than just treating it as throwaway demo-ware.

Hopefully, seeing how I decomposed the problem, and having a bit of insight into what my though process was like as I worked on this project has helped you understand what goes into making proof-of-concept code in Ruby. I've not actually taught extensively about this process before, so describing it is a bit of an experiment for me. Let me know what you think!

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/044-issue-12-rapid-prototyping.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/013-obfuscations.md
================================================
Rather than always focusing on SERIOUS BUSINESS, I'd like share something a little more light hearted today. Whether you celebrate Christmas or not, I think you'll find this little holiday themed hack a great deal of fun to play with.

### Christian Neukirchen's Christmas Hack

When I first started programming in Ruby, the ruby-talk mailing list was the best place to interact with the community and keep up with other active Ruby hackers. But because there were a lot more hobbyists in 2004 than there were people doing Ruby as a full time job, the posts focused on sharing fun hacks just as often as they did on discussing practical issues.

One of my favorites was [Christian Neukirchen](http://twitter.com/#!/chneukirchen)'s obfuscated Christmas message to the Ruby community in 2004. I've copied the source code below, and I encourage you to run it and see that it is indeed a valid Ruby program!

```ruby
s="IyBUaGFua3MgZm9yIGxvb2tpbmcgYXQgbXkgY29kZ
S4KIwojIENvcHlyaWdodCAoQykgMjAwMiAgQ2hyaXN0a
WFuI      E       5       l       d     Wtpc                                
mNoZ  W       4      gP       G       N obmV
1a2l      y       Y 2hlb  k       B     nbWF
pbC5  j       b    20+CiM     K       I yBUa
GlzI      H       Byb2dyYW        0     gaXM
gZnJ  l       Z  SBzb2Z0d2F   y       Z Tsge
W91I      G     NhbiByZWRpc3      R     yaWJ
1dGU  g        aXQgYW5kL29yCi M       g bW9k
aWZ5      I   Gl0IHVuZGVyIHRoZ    S     B0ZX
Jtcy  B      vZiB0aGUgR05VIEdlb       m VyYW
wgUH      V      ibGljIExpY       2     Vuc2
UuCg  p       T VERPVVQuc3lu  Y       y A9IH
RydW      U    KZDEsIGQyID0gM     C     4xNS
wgMC  4       wNgpzID0gIk1lcnJ        5 IGNo
cmlz      d  G1hcywgLi4uIGFuZCB   h     IGhh
cHB5  I     G5ldyB5ZWFyIgptID0gJ      X d7LC
AuID       ogISArICogMCBPIEB9CnUg P     SAiI
CIgK  i   BzLnNpemUKCnByaW50ICJcci    A gI3t
1fVx      y   IjsKCigwLi4ocy5z    a     XplL
TEpK  S      50b19hLnNvcnRfYnkg       e yByY
W5kI      H 0uZWFjaCB7IHxyfAogIH  N     sZWV
wIGQ  x    CiAgbmV4dCBpZiBzW3JdID     0 9ICI
gIls      wXQogIG0uZWFjaCB7IHxrfAo      gICA
gdVt  y  XSA9IGsKICAgIHByaW50ICIgIC   N 7dX1
cciI    KICAgIHNsZWVwIGQyCiAgfQogIHV    bcl0
gPSB   zW3JdCiAgcHJpbnQgIiAgI3t1fVxyI g p9Cg
pzbG  VlcCBkMgpwcmludCAiICAje3V9IVxyI   jsKc
2xlZ  X       A    gMwpwc     m       l udCA
iICA      j        e3V9IS A       g     LS1j
aHJp  c       z    JcbiI7     C       g ojIG
ZpbG      x        lciBzc G       F     jZSA
jIyM  j       I    yMjIyM     j       I yMjI
yMjI      y       M       j       I     yMjI
yMK";eval s.delete!(" \n").unpack("m*")[0]##
### Copyright (C) 2004  Christian Neukirchen
```

When run, this code prints out <i>"Merry christmas, ... and a happy new year! --chris2"</i> by randomly filling in each character in a little animation. After some folks commented on how cool this hack was, someone inevitably asked how it was done, which lead another Ruby hacker Michael Neumann to post his guess to the list. Here is what he said:

>Pretty easy (except drawing the tree :). Write the source-code first, then `base64` encode it, and insert newlines/whitespace to make the picture.

At the time, I was too much of a beginner with Ruby to fully appreciate the solution discussion, and mostly just chalked it up to magic. But now, the above statement is immediately obvious to me, and since it wasn't further explained in the mailing list thread, I can give an example for those who are in the same shoes now that I was in a few years ago.

What I didn't know at the time is that `Base64` is an encoding that allows you to translate any binary data into purely printable characters by converting the contents into a string of characters that uses basic alphanumeric values. I would have known that if I read the documentation for Ruby's `Base64` standard library, but again, I was a newbie at the time. :)

It turns out that the idea for `Base64` encoding was extracted from how MIME attachments in email are implemented. This is all stuff you can find on wikipedia, so rather than digging into the gory details, let's see how it relates to the problem at hand.

The following small snippet should clear things up a bit.

```ruby
>> source = "puts 'hello world'"
=> "puts 'hello world'"
>> encoded_source = Base64.encode64(source)
=> "cHV0cyAnaGVsbG8gd29ybGQn\n"
>> Base64.decode64(encoded_source)
=> "puts 'hello world'"
>> eval Base64.decode64(encoded_source)
hello world
=> nil
```

Another way of decoding `Base64` encoded strings is via the `String#unpack` method, using the template `"m*"`. You can see this in Christian's code, which is what tipped Michael off in the first place. With that in mind, we can build a tiny obfuscated "Hello World" program.

```ruby
s = 
"c  H   V0cyA 
 n  a     G
 VsbG     8
 g  d     2
 9  y    bGQn"

eval s.delete(" \n").unpack("m*")[0]
```

In the end, Michael was right when he said this was pretty easy to do. As long
as you understand some basic string manipulation and how to decode a `base64` 
encoded string, you could use this technique to render your code as pretty much any arbitrary ASCII art.

Of course, one would expect that the guy who eventually would go on to create something as clever and useful as the [Rack web server interface](https://github.com/rack/rack) would have an extra trick or two up his sleeve. Not to disappoint, Christian confirmed Michael's explanation was valid, but in the process revealed that he felt it'd be too fragile and tedious to manually format the code himself into the desired ascii art.

For those curious about how he got around this problem, you can check out his [full solution](http://groups.google.com/group/comp.lang.ruby/msg/aa5b4f8eaa85e6b8?dmode=source)
 which implements a code generator that fills in a template with the `base64` encoded source.

While the code should be pretty easy to follow with a little effort, feel free to post questions here if you need help figuring things out. It's a really neat bit of code and is worth exploring, so I don't mind giving some hints where needed.

### Reflections

Writing this article reminded me of two lessons that I sometimes forget, even to this day.

The first lesson is that you can't judge the complexity of something by simply scratching its surface. When I saw this code posted to ruby-talk back in 2004, even though I was a newbie at the time, I could have figured it out if I only took a bit of time to study the topics that were being discussed. But since I saw a bunch of obscure binary data in the shape of a Christmas tree being passed to `eval()`, I judged the snippet as being too complicated for me, appreciated it for its magic, and moved on. That sort of lack of self-confidence can really prevent you from stumbling upon interesting new ideas, tools, and techniques.

The second lesson is that hacking doesn't always have to be SERIOUS BUSINESS.
Because I'm working on things I feel are super important most of the time, it's
easy for me to forget to be playful and generally curious. Sometimes I feel like
I'm too busy to do something just for the joy of the hack, and that worries me a bit. 
Writing this article reminded that I should resist this temptation, and make more 
time and space in my life for playful discovery, because it is a great way to learn 
and have fun at the same time.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/045-issue-14-obfuscations.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/014-duck-typing.md
================================================
Type systems are a fundamental part of every programming language. In fact, the way a language designer approaches typing goes a long way towards outlining the way that thoughts are expressed in that language.

Statically typed languages like C++ and Java make us tend to think of objects as abstract data structures that fit within a neatly defined hierarchy. In these languages, there isn't a major distinction between an object's class and its type, as the two concepts are tied together directly at the implementation level. But the marriage of class and type found in these languages is not a universal law shared by all object oriented programming languages.

By contrast, Ruby's dynamic nature facilitates a style of type system known as duck typing. In particular, duck typing breaks the strong association between an object's class and its type by defining types based on what an object can do rather than what class it was born from. This subtle shift in semantics changes virtually everything about how you need to think about designing object oriented systems, making it a great topic for Practicing Ruby to cover.

While duck typing is possible in many other languages, Ruby is designed from the ground up to support this style of objected oriented design. In this issue, we will cover some of the options that are available to us for doing Ruby-style type checking. 

### Type Checking Techniques

There are three common ways to do type checking in Ruby, two of which involve
duck typing, and one that does not. Here's an example of the approach 
that does *not* involve duck typing.

```ruby
def read_data(source)
  case source
  when String
    File.read(source)
  when IO
    source.read
  end
end
```

If you've been working with Ruby for a while, you've probably written code that
did type checking in this fashion. Ruby's case statement is powerful, and
makes this sort of logic easy to write. Our `read_data()` function works as
expected in the following common scenarios:

```ruby
filename = "foo.txt"
read_data(filename) #=> reads the contents of foo.txt by calling 
                    #   File.read()


input = File.open("foo.txt")
read_data(input) #=> reads the contents of foo.txt via 
                 #   the passed in file handle
```
  
But things begin to fall apart a bit when we decide we'd like `read_data()` to
work with a `Tempfile`, or with a `StringIO` object, or perhaps with a mock
object we've defined in our tests. We have baked into our logic the assumption that the input is always either a descendent of `String` or a descendent of `IO`. The purpose of duck typing is to remove these restrictions by focusing only on the messages that are being passed back and forth between objects rather than what class they belong to. The code below demonstrates one way you can do that.

```ruby
def read_data(source)
  return source.read if source.respond_to?(:read)
  return File.read(source.to_str) if source.respond_to?(:to_str)
  raise ArgumentError
end
```

With this modification, our method expects far less of its input. The passed in
object simply needs to implement either a meaningful `read()` or `to_str()`
method. In addition to being backwards compatible with our non-duck-typed code,
this new approach gives us access to many useful standin objects, including: `StringIO`, `Tempfile`, mock objects for testing, and any user defined objects that are either IO-like or String-like but not a descendent of either.

However, the following contrived example illustrates a final corner case that calls for a bit of extreme duck typing to resolve. Try to spot the problem before reading about how to solve it.

```ruby
class FileProxy
  def initialize(tempfile)
    @tempfile = tempfile
  end

  def method_missing(id, *args, &block)
    @tempfile.send(id, *args, &block)
  end
end
```

This code implements a proxy which forwards all of its messages to the wrapped `tempfile` object. However, like many hastily coded proxy objects in Ruby, it does not properly forward `respond_to?()` calls to the object it wraps. The irb session below illustrates the resulting false negative in our test.

```ruby
# Populate our tempfile through the proxy

>> proxy = FileProxy.new(Tempfile.new("foo.txt"))
=> #<FileProxy:0x39461c @tempfile=#<File:/var/f..foo.txt.7910.3>>
>> proxy << "foo bar baz"
=> #<File:/var/folders/sJ/sJo0IkPYFWCY3t5uH+gi0++++TQ/-Tmp-/foo.txt.7910.3>
>> proxy.rewind
=> 0

# Unsuccessfully test for presence of read() method

>> proxy.respond_to?(:read)
=> false

# But read() works as expected!

>> proxy.read
=> "foo bar baz"
```

This issue will cause `read_data()` to raise an `ArgumentError` when passed a `FileProxy`. In this case, the best solution is to fix `respond_to?()` so that it works as expected, but since you may often encounter libraries with bad behaviors like this, it's worth knowing what the duck typing fundamentalist would do in this situation.

```ruby
def read_data(source)
  begin 
    return source.read 
  rescue NoMethodError
    # do nothing, just catch the specific error you'd expect if
    # read() was not present.
  end

  begin
    File.read(source.to_str)
  rescue NoMethodError
    raise ArgumentError # now we've run out of valid cases, so let's
                        # raise a meaningful error
   end
end
```

With this final version, we preserve all the benefits of the previous duck
typing example, but we can work with objects that have dishonest `respond_to?()`
methods. Unfortunately, the cost for such flexibility includes code that is less
pleasant to read and is almost certainly going to run slower than either of our
previous implementations. Using the exception system for control flow isn't cheap, 
even if this is the most 'pure' form of type checking we can do.

While we've talked about the benefits and drawbacks of each of these approaches, I haven't given any direct advice on whether one way of doing type checking is better than the others, simply because there is no simple answer to that question.

I will paint a clearer picture in the next article by showing several
realistic examples of why duck typing can come in handy. Until then, I will
leave you with a few things to think about.

### Questions / Study Topics

* Is explicit class checking ever absolutely necessary? Are their situations in which even if other options are available, checking the class of an object is still the best thing to do?

* Name something weird that can happen when you write your contracts on the messages your objects respond to rather than what class of object they are.

* Try to identify some feature of Ruby that relies on duck typing either for its basic functionality or as an extension point meant to be customized by application programmers.

* Share a bit of code which does explicit class comparison that you think would be very difficult to convert to a duck-typing style.

* Share a bit of code (either your own or from a OSS project you like) that you feel uses duck typing effectively.

Feel free to leave a comment below if any of the above topics interest you.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/046-issue-14-duck-typing.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/015-duck-typing.md
================================================
Today, I've got a handful of neat examples to share, each which demonstrates an interesting use of duck typing. We'll start by looking a feature built into Ruby's core, and then look at a few examples from other open source Ruby projects.

### Type Coercion, Ruby-style

Many dynamically typed languages that offer both integer and floating point
arithmetic are smart about doing the right thing based on whether or not any
floats are used in a given expression. While I assume that you are already 
familiar with Ruby's behavior, the following example demonstrates what 
I've just described.

```ruby
>> 3/2
=> 1
>> 3/2.0
=> 1.5
```

This is an obvious candidate for implementation level special casing, but since all the primitive numeric types in Ruby are actually objects, Ruby prefers something a bit more flexible and consistent. What actually happens when an arithmetic operation is performed on a Ruby number is that a method called `coerce()` is called to do any necessary type modifications so that the computations work as expected. The irb session shown below demonstrates calling `coerce()` directly on both a `Fixnum` and a `Float`.

```ruby
>> 3.coerce(2)
=> [2, 3]
>> 3.coerce(2.0)
=> [2.0, 3.0]
>> 3.0.coerce(3)
=> [3.0, 3.0]
>> 3.0.coerce(2.0)
=> [2.0, 3.0]
```

Note that `Fixnum#coerce` only returns an array of Float values when its argument is a Float, but that `Float#coerce` always does this conversion. While what is shown above only demonstrates how floating point coercion works, we can actually create our own objects that duck type to Ruby numbers by simply defining a `coerce()` method on them.

To demonstrate this, I have created a partial implementation of a `BinaryInteger` object. A `BinaryInteger` is meant to act similar to Ruby's `Fixnum` objects but display itself to the user in binary notation. Here's an example of how such an object might be used:

```ruby
>> int = BinaryInteger.new(40)
=> 0b101000
>> 2 + int
=> 0b101010
>> 2.5 + int
TypeError: BinaryInteger can't be coerced into Float
	from ./binary_integer.rb:49:in `+'
	from (irb):4
	from :0
```

The following class definition does not quite produce a complete `Numeric` work-alike but it is sufficient for making the previous example work as shown. It also serves to demonstrate that `coerce()` is indeed the magic that ties all of Ruby's arithmetic operations together.

```ruby
class BinaryInteger
  def initialize(value)
    @value = value
  end

  attr_accessor :value

  def integer?
    true
  end

  def +(other)
    a,b = coerce(other) # use our own coerce here
    self.class.new(a.value + b.value)
  end

  def coerce(other)
    raise TypeError unless other.integer? 

    if other.respond_to?(:value)
      [self, other] # no coercion needed
    else
      [self, self.class.new(other)]
    end
  end

  def inspect
    "0b#{@value.to_s(2)}"
  end
end
```

While it can be tricky to puzzle through how `coerce()` should work, since you can't know in advance what the calling object will be, it is a lot more dynamic than enforcing class based typing. Getting in the practice of thinking in terms of the interactions between the objects in your project rather than their static definitions can lead to some very good design insights.

In addition to the `coerce()` method for arithmetic, Ruby uses a whole score of other coercion hooks, including `to_int`, `to_str`, and `to_ary`. These methods are called on the arguments passed to a number of `Fixnum`, `String`, and `Array` methods. The neat thing is that there is no strict requirement that these methods actually return `Fixnum`, `String`, or `Array` objects, as long as they act close enough to the real thing where it counts (i.e. for whatever messages that get sent to them).

We could probably spend all day going through other examples of where Ruby uses duck typing for coercion, for extension points, and tons of other uses. This is especially true when you consider that almost every mixin relies on a form of duck typing. For example, all functionality in `Enumerable` can work with anything that implements a sensible `each()` method. Similarly a suitable `<=>` operator unlocks all that `Comparable` has to offer. In both the core and standard library, you will find plenty of examples of this sort of design.

The key point to take away from these observations is that duck-typed APIs aren't some obscure edge case for the extensibility-obsessed, but instead, something baked into Ruby's philosophy from the ground up. This means that you can and should imitate this style in your own libraries when it makes sense to do so.

We'll now take a look at a pair of examples from the wild, one from my own project (Prawn), and another from Aaron Patterson's Rails 3.1 performance tuning adventures. Both involve the use of duck typing not for the purpose of infinite flexibility, but for addressing practical problems that come up in most moderately complex projects.

### Duck typing to avoid scope creep

The first example of duck typing in actual Ruby projects that I want to share is actually quite similar to the contrived `read_data()` example I shared on Tuesday. Today, rather than showing you the usage code first, I want you to take a look at the implementation and try to spot the usage of duck typing and guess at what it gains us before reading on.

```ruby
def image(file, options={})
  Prawn.verify_options [:at, :position, :vposition, :height,
                        :width, :scale, :fit], options

  if file.respond_to?(:read)
    image_content = file.read
  else
    raise ArgumentError, "#{file} not found" unless File.file?(file)
    image_content = File.binread(file)
  end

  # additional implementation details omitted.
end

# FULL IMPLEMENTATION OF image() at:
# https://github.com/sandal/prawn/blob/master/lib/prawn/images.rb#L65
```

If you guessed this code is used to make it so that the `image()` method can be called with either a file name or a file handle, you had the right idea. It does all of the things we discussed yesterday, allowing the use of this code with `StringIO`, `Tempfile`, any mock object that implements a `read()` method, etc. But the really interesting use case is the one that we actually wrote this feature for, shown below.

```ruby
require "open-uri"

Prawn::Document.generate("remote_images.pdf") do
  image open("http://prawn.majesticseacreature.com/images/prawn.png")
end
```

Through the use of `open-uri`, our duck-typed image method provides a nice way
of rendering remote content! While this might not have been an easy feature to
guess without knowing a bit about Prawn, it represents the elegant compromise that such an implementation affords us. Adding support for remote images was something that our users often asked for, but we wanted to avoid giving people the impression that Prawn was web-aware, and didn't want to support a special case for this sort of logic, as it'd require either an API change or an ugly hack to determine whether the provided string was either a URI or a file name.

The approach of accepting anything with a `read()` method combined with Ruby's standard library `open-uri` made for something that is easy to document and easy for our users to remember. While a simple hack, I was very satisfied with how this design turned out because it seemed to mostly eliminate the problem for our users while simultaneously avoiding some overly complex implementation code that might be brittle and hard to test.

These sort of tough design decisions are certainly not unique to Prawn, so we can now turn our eyes to Aaron Patterson's performance optimization work on Rails 3.1.

### Duck typing for performance tuning

One area Aaron Patterson found was a hotspot for many Rails apps are `ActiveRecord` scopes, which allow the users to create custom filters. For example, consider the following example which filters by email address.

```ruby
class Comment < ActiveRecord::Base
  scope :with_email, lambda { |email|
    where(:email => email)
  }
end

# Above code provides functionality shown below
User.with_email("gregory.t.brown@gmail.com").count #=> 1
```

The block syntax is nice and clean for simple things, but can get a bit unwieldy for complex logic. For example, if we wanted to throw in validations for the entered email addresses, our block would end up getting a bit ugly unless we implemented some private class methods to help out. If you're thinking that private class methods sound weird and might be a bit of a code smell, they are, and that's one indication that this API needs to be more flexible than what it is.

That said, Aaron was on a performance tuning mission, not an API overhaul. The
problem he found with the API was initially not an aesthetic one but an
implementation detail: Executing code stored in a `Proc` object is considerably
more computationally expensive than an ordinary method call. While this isn't
likely to be a bottleneck in ordinary situations, it is common for high traffic
Rails applications to really hammer on their scopes, since they're used for
filtering the data that is presented to users. The key insight Aaron had was
that making some other object quack like a `Proc` is as easy as implementing 
a `call()` method.

Shown below is the one line patch that changes the behavior of `scope()` to
allow the use of any object that implements a meaningful `call()` method:

```ruby
# BEFORE
options = filter.is_a?(Proc) ? filter.call(*args) : filter

# AFTER
options = filter.respond_to?(:call) ? filter.call(*args) : filter
```

With this nearly microscopic change, we can write a faster `with_email()` scope that also leaves room for complex logic such as validations in its own neatly defined namespace. The following definition is functionally equivalent to our original code that passes a `Proc` to `scope()`, but has a lot more potential for future growth.

```ruby
class EmailFilter 
  def initialize(model_class)
    @model_class = model_class
  end

  def call(email)
    validate_address(email)
    @model_class.where(:email => email)
  end

  private

  def validate_address(email)
    # do some validation magic here
  end
end

class User < ActiveRecord::Base
  scope :with_email, EmailFilter.new(self)
end
```

The nice thing about this patch is that nothing is lost by doing things this way. Often times, when moving from explicit class checking to behavior based checks, the only overhead is that debugging can be a bit more complicated since there is no easy way to verify that an object implementing `call()` actually does so in a sensible way. However, with adequate unit tests and decent documentation, this kind of fuzziness is rarely a big enough problem in practical applications to outweigh the benefits that come along with utilizing this technique.

Aside from the superficial improvements that come from converting `Proc` calls
to method calls, the general approach of writing duck typed interfaces tends to increase the potential for further performance improvements. When code is written to explicitly avoid assuming too much about how objects are implemented, it is easy to swap out objects that are more performant in edge cases, or implement aggressive caching where appropriate. While it may seem counterintuitive, the same dynamic nature that makes Ruby slow at the implementation level makes a wide range of algorithmic improvements possible. We unfortunately won't be exploring this topic today, but it would be a good topic for a future issue.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/047-issue-15-duck-typing-2.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/016-interesting-ruby-hackers.md
================================================
In this article, I've listed five people worth knowing about if you're involved in Ruby. If you're reading this in July 2011, please note that I wrote this article over 7 months ago, and so the descriptions you see below are slightly outdated. That having been said, I still think these five people are on the top of my list when it comes to interesting folks in our community.

### Wayne Seguin ([@wayneeseguin](http://twitter.com/wayneeseguin))

Wayne gave us [RVM](http://rvm.beginrescueend.com), the Ruby enVironment Manager. This tool quickly evolved from a bunch of crude shell script hacks to something that makes working with multiple Ruby versions and implementations a breeze. A tool which simply allowed manually switching between versions and implementations of Ruby would be useful on its own, but the thing that makes RVM special are all the shiny extras that come with it.

In addition to basic version switching, RVM provides gemsets which are sandboxes for your gem installation environment. This makes it possible for each of your projects to have its own gemset, eliminating concerns about different projects having dependencies that clash with one another. While this is a problem that can often be solved by version locking, having an extra layer of protection and organization is great.

Another neat feature of RVM is the ability to include a `.rvmrc` in any of your project roots, which causes `rvm` to automatically switch to the desired ruby version, implementation, and gemset that you specify in that file. This reduces the amount of manual switching needed, and makes commands like `ruby`, `irb`, `rake`, and `gem` 'just work' without having to think about what context you are in.

Another thing that is amazing about RVM is the amount of support Wayne offers for it. He is nearly infamous for his availability on IRC, and he seems to genuinely want to help anyone who is trying to use RVM. I've seen him cornered at least a few times at Ruby conferences by folks asking questions about how to do this or that with RVM, and he always seems to handle those situations gracefully. This is exactly the kind of spirit that makes me appreciate someone's work and makes me want to keep watching them to see what great things they'll come up with.

<i>UPDATE 2011.07.19: You should also check out Wayne's [BDSM framework](http://bdsm.beginrescueend.com).</i>

### Eleanor McHugh ([@feyeleanor](http://twitter.com/feyeleanor))

Eleanor McHugh is an incredibly clever and entertaining hacker who has a deep interest in concurrency and low level UNIX plumbing. She spent a lot of time in 2010 working on [GoLightly](http://github.com/feyeleanor/GoLightly), a lightweight virtual machine running on top of the Go programming language. Her original goal was to re-build miniruby on top of Go, but building the vm became a priority in of itself rather than just a stepping stone once she had a chance to dig into the problem.

What interests me about Eleanor is that she is the kind of person that decides to work on a project first and then figure out how to make it all come together later. I know she has been making some significant personal sacrifices so that she can work on GoLightly, and that sort of attitude is something I really like to see.

Eleanor was one of the guest speakers at Mendicant University in 2010, doing a Q&A session with me and the students. We touched on how pretty much every modern language handles concurrency, and then somehow deviated to discussing Eleanor's background in avionics, in which we collectively decided that TDD in that field worked something like "Whoops, the plane crashed, guess that's red." This of course lead us to a more serious discussion about testing and testability, but was a pretty hilarious diversion along the way.

Where one can really learn a ton from Eleanor is in a small group or one on one conversation. She is the ideal person to catch up with on the hallway track of a conference, or to grab a drink with after an event. Each time I've met up with her I've been consistently entertained and inspired by her stories, and find myself fortunate to be able to call her a friend.

<i>UPDATE 2011.07.19: Eleanor, like me, spends most of her time hacking on community projects. [She can use some help with her travel expenses](http://pledgie.com/campaigns/15689), so if you like what she's doing, please do contribute what you can.</i>

### Brian Ford ([@brixen](http://twitter.com/brixen))

Brian is one of the key [Rubinius](http://rubini.us) team members and also was instrumental in the creation and adoption of the [RubySpec](http://github.com/rubyspec/rubyspec), an executable specification of the Ruby language written in RSpec-like syntax.

While I do not closely follow Rubinius, I studied it a bit when researching for a talk on Ruby versions and implementations. In the process, I came to learn about RubySpec and the specialized testing framework they've built for it called [mspec](https://github.com/rubyspec/mspec). This stuff is seriously cool.

As you can imagine, building a testing framework to test Ruby itself is a harder problem than simply testing code you write using Ruby. To account for this, mspec does all sorts of neat things, allowing tests to be restricted to particular versions, implementations, and even specific patch levels of different Ruby packages. Another interesting aspect of mspec's implementation is that because it's designed to help Ruby implementers test their work, the code for implementing the testing framework intentionally uses a minimal subset of Ruby functionality. As someone interested in tricky design problems, I found myself consistently impressed by how mspec is implemented. While I'm not sure exactly how much of this is Brian's handiwork, he is one of the key folks who set the project in motion.

RubySpec itself is really impressive. If you haven't looked through it before, I strongly encourage that you do so. It provides comprehensive unit tests for a huge amount of Ruby's behavior, covering each feature in minute detail. I guarantee you that if you spend a little time reading through the specs, you'll find an edge case about some Ruby feature that you didn't know about, no matter how solid your understanding of Ruby is.

<strike>While we haven't officially announced the details, Brian and I will be working together to run Ruby Mendicant University's first Free Software Clinic. This will be a chance for some of our students to work with me as we contribute something interesting that should make RubySpec even more useful than it already is. More information will come about this topic soon.</strike>

In addition to his work on Rubinius and RubySpec, Brian happens to be an incredible teacher. While most of my interactions with him have been over IRC, he is capable of explaining complex and deep computer science topics in a way that makes them feel natural and manageable. I finally had a chance to see him give a talk in person at RubyConf 2010, and by watching [this video](http://confreaks.net/videos/454-rubyconf2010-poisoning-rubinius-the-_why-and-how), I think you'll get a sense of what I mean.

<i>UPDATE 2011.07.19: Brian and I haven't had a chance to work on open source projects together with the Mendicant University students yet, but I hope we'll have a chance to do so some time in the not-too-distant future. I struck the mention of our plans out in the description above to make it clear this original plan didn't pan out.</i>

### Tony Arcieri ([@bascule](http://twitter.com/bascule))

Tony is another Ruby hacker interested in concurrency, particularly the Actor model of concurrency. He has built a number of concurrency tools in Ruby, including [revactor](http://github.com/tarcieri/revactor), but eventually decided that what he really wanted was the syntax of Ruby with the baked in concurrency model of Erlang. This lead him to begin work on his own language, [Reia](http://github.com/tarcieri/reia).

For those who haven't seen it before, Reia is a fascinating language, even in its infancy. The syntax does look and feel like Ruby, but everything is Erlang under the hood. The functionality is mapped more towards Erlang than it is towards Ruby, which means that Reia is not aiming to be a feature complete Ruby implementation. Working in Reia is an interesting exercise in wondering what a smaller, more basic subset of Ruby's functionality might look like.

The neat thing about Reia is that a lot of its code is self hosting, similar to Rubinius. This, combined with the fact that you can easily reach down to the Erlang runtime and call functions provided in Erlang's core modules, makes it very easy to contribute to Reia's high level feature set. During RubyConf 2010 I decided to dip my toe in and help wrap a number of the methods in Erlang's List API to make them look and feel like the features provided by Ruby's Enumerable module, and I found contributing to the project very easy.

Tony is another hacker who is gifted at being a bit irreverent towards what are typically considered 'hard problems', and like Brian Ford, he is good at helping you understand that building a programming language isn't quite as hard as you might think. You can check this out for yourself by watching his [RubyConf 2010 talk](http://confreaks.net/videos/457-rubyconf2010-rev-revactor-reia).

<i>UPDATE 2011.07.19: Tony's projects move fast. I wouldn't be surprised if everything above is now out of date, but hunt down whatever he's working on now and you won't be disappointed.</i>

### Eric Hodel ([@drbrain](http://twitter.com/drbrain))

Eric has been in the Ruby community for as long as I can remember, and as a member of the Seattle Ruby Group, he automatically can be recognized as an insanely capable hacker.

What I feel Eric lacks is enough appreciation from the community for the very thankless work he was doing. Anyone who was around in Ruby before Rails knows that RubyGems greatly outgrew its initial design a long time ago. The code, originally hacked together at a conference, was never really meant to live in a world in which gem downloads are measured in the millions rather than the hundreds.

Similar arguments could be made about projects such as RDoc. Being able to autogenerate documentation is an important part of any language's infrastructure, but when Dave Thomas first put together RDoc, I doubt he could have anticipated how big Ruby would be and how long that code would still remain in active use.

Most people didn't want to touch RubyGems or RDoc, both because of how outdated the code was, and because any small change to either of them could easily piss off the entire Ruby world. But the more that Ruby's ecosystem evolved, the more it became clear that fighting against old, janky architecture was a huge waste of time.

Little by little, Eric worked towards fixing up both of these projects. Now, both RDoc and RubyGems are much, much better than what they were before. Each have extension systems that make it so that the core code can continue to get smaller and simpler over time, rather than the other way around. In the case of RubyGems, that extension system brought us Gemcutter (now rubygems.org), which is now the official means of distributing gems to the Ruby community. While we have Nick Quaranto to thank for this innovation, we have Eric to thank for making RubyGems better so that Gemcutter could actually come into existence in the first place.

If there is one person in the Ruby community that deserves thanks for taking our old and busted tooling and making it serviceable again, it's Eric.

<i>UPDATE 2011.07.19: Even despite the RubyGems turbulence over the last several months, I stand by this opinion of Eric's contributions 100%</i>

### Who's interesting to you?

These are the folks who caught my interest over the last year or so. Who is someone you think is worth knowing about?

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/048-issue-16-interesting-ruby-hackers.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/017-interesting-ruby-writers.md
================================================
In the [last issue](http://practicingruby.com/articles/45) we covered five folks who are working on really interesting projects. In this issue, I'd like to pay some attention to five other Ruby hackers who have caught my attention through their excellent technical writing. As a technical author myself, I am particularly moved by folks who educate and inform our community through more than just code, but through written words as well.

Similar to Issue #16's list, this lineup is in no particular order, is far too short to do the community justice, and is purely subjective in nature. None of these folks knew I was going to write about them and my recommendations are completely unsolicited.

### Magnus Holm ([@judofyr](http://twitter.com/judofyr))

If you've been on Twitter or Hacker News lately, you probably have run into an article or two from [Timeless](http://timelessrepo.com), a blog that Magnus is running with help from Steve Klabnik. The topics here range from the philosophical (see: [There is No Talent](http://timelessrepo.com/there-is-no-talent)), to the obscure low-level Ruby hack (see: [Tailin' Ruby](http://timelessrepo.com/tailin-ruby)), but the quality is consistently well above that of your run of the mill blog.

As it turns out, the reason why Timeless feels different than your average blog is because it is designed to be that way. Magnus describes the project as <i>"an attempt at creating a new kind of blog with a focus on frequently updated, quality content which last longer than the beta of your favorite framework"</i>. With so much web content being incredibly ephemeral in nature, the notion of timeless content is attractive, even if it means a considerable challenge for the author.

As a small historical note, Magnus is also one of the five folks who has written articles for the Ruby Best Practices blog. While he only wrote a total of [two articles](http://blog.rubybestpractices.com/posts/judofyr/index.html) for us, both were very well received by our readers. So even if I can't take credit for his recent work, I can at least claim that I knew his potential years ago :)

### Jeff Kreeftmeijer ([@jkreeftmeijer](http://twitter.com/jkreeftmeijer))

Jeff is another blogger who has rapidly produced [a ton of great material](http://jeffkreeftmeijer.com/archive/) over the last year or so. The topics on his blog bounce around fairly frequently, though he seems to take a particular interest in developer practices, particularly surrounding git, gem packaging, and testing tools.

What interests me about Jeff's writing is that while he's talking about fairly common topics, he typically focuses on just a single discussion point, which makes each of his posts a conversation waiting to happen. The process of watching Jeff make a point about some opinion he has formed about a given tool or technique and then seeing the community respond to that point has been a really enjoyable experience so far. For a nice example of what I've just described, check out Jeff's [Be awesome, write your gemspec yourself](http://jeffkreeftmeijer.com/2010/be-awesome-write-your-gemspec-yourself/)

Jeff's conversational approach to writing really leads to some productive conversations across a broad cross-section of the Ruby community. It's easy to see this phenomenon by either looking at the comments on the article above, or any other article of Jeff's that has gained some widespread attention (read: most of them).

Similar to Magnus, I admire Jeff for breaking out of the standard 'blogging template' and developing his own writing style that seems to work quite well.

### MenTaLguY ([@MenTaLguY](http://twitter.com/#!/MenTaLguY))

I may be showing my long, flowing Ruby beard by recommending MenTaLguY on a list of bloggers who impressed me in 2010, since the bulk of the materials of his that interested me were published in 2005-2006. But two articles posted in 2010 really caught my eye, and are worth recognition on their own: [Atomic Operations in Ruby](http://moonbase.rydia.net/mental/blog/programming/atomic-operations-in-ruby.html) and [The Biggest Mistake Everyone Makes With Closures](http://moonbase.rydia.net/mental/blog/programming/the-biggest-mistake-everyone-makes-with-closures.html).

These two articles are guaranteed to expose edge cases that will surprise all but the most diligent Rubyists, and are representative of the two things MenTaLguY has historically been known for: concurrency and functional programming.

His blog is written in the old fashioned 'everything in one pot' style, and due to a mixture of non-technical and technical content, can be very challenging to dig through. For this reason, I've gone way back to his 2006 and 2007 writing and pulled a few articles worth checking out, which may intice you to dig even deeper.

* [Simple Lazy Streams](http://moonbase.rydia.net/mental/blog/programming/simple-lazy-streams.html) 
* [Currying in Ruby](http://moonbase.rydia.net/mental/blog/programming/currying-in-ruby.html) 
* [Monads in Ruby](http://moonbase.rydia.net/mental/writings/programming/monads-in-ruby/)
* [Concise Memoization](http://moonbase.rydia.net/mental/blog/programming/concise-memoization.html)
* [Eavesdropping on Expressions](http://moonbase.rydia.net/mental/blog/programming/eavesdropping-on-expressions.html) 
* [Ruby Symbols Explained](http://moonbase.rydia.net/mental/blog/programming/ruby-symbols-explained.html)
* [Concurrency Five Ways](http://moonbase.rydia.net/mental/blog/programming/concurrency-five-ways.html)

The reason I've included MenTaLguY in this lineup is because I feel like he is a representation of an unapologetically deeply technical person that we are seeing less and less of in a more commercialized ecosystem that tends to value shiny tools and productivity tips over deep knowledge and theory. While the new world isn't a bad one because we all can find jobs in it, I remember a different community that leaned more towards MenTaLguY's direction as recent as five years ago.

So this is my hat-tip to old school Rubyists, and to MenTaLguY for frequently being ahead of his time by focusing on concurrency and functional programming before it was cool.

### Aaron Patterson ([@tenderlove](http://twitter.com/tenderlove))

Aaron is another Ruby hacker who has been in the Ruby community for a long time and has worked on a number of cool projects, including Nokogiri and Mechanize. I've been a friend of Aaron's for a while, and have always admired his abilities as both a hacker and story teller. But the thing that really impressed me about Aaron's work in 2010 is his writing for the AT&T Interactive Engineering.

Recently, Aaron has been performance tuning Rails 3 by rewriting the low level ARel relational algebra library. Doing major refactorings of Rails internals is no easy task, but you might think otherwise if you read Aaron's great posts about his work on ARel, due to their incredible clarity. It may just be the giant fonts or the pretty graphs, but the articles Aaron has written about his work on ARel have been the most easy to understand resources on performance tuning in Ruby that I have ever seen.

Aaron brushes up against a couple other Rails and Ruby 1.9.2 topics on the AT&T blog as well, approaching them in a similar clear and light-hearted style. But those looking for a lot more can dig through the archives of tenderlovemaking.com, which similarly has no shortage of great Ruby content.

<i>UPDATE 2011.07.20: Looks like the AT&T interactive blog is down right now, so I couldn't provide any meaningful links above. But you can still find Aaron writing on his personal blog, [Tender Lovemaking](http://tenderlovemaking.com).</i>

### Peter Cooper ([@peterc](http://twitter.com/peterc))

It may be a bit "meta" to acknowledge Peter, since he's typically writing about people who are writing code, or writing about people who are writing about writing code, but nonetheless, he is someone that deserves both attention and appreciation for the valuable service he's been offering our community for years.

While I find a lot of new Ruby resources over Hacker News or Twitter, or just by working with my friends and colleagues, [Ruby Inside](http://rubyinside.com) remains the place I go when I want to put my finger on the pulse of the Ruby community. While small things may go unnoticed by Peter, most major new releases of important projects are covered by Ruby Inside. RI also is the place to go to see announcements about big upcoming events, or just to get a general feel for what Ruby hackers are up to.

A number of my own projects, including Practicing Ruby itself, have gained a lot more attention by being featured on Ruby Inside than they would have on their own. Over the years, I've seen this site evolve from a time in which the existence of a Ruby news site seemed a bit ridiculous since everyone in the community already pretty much knew each other to the present day in which we absolutely need to have someone sifting through the endless stream of new content so that we know what things we just can't afford to miss.

For better or for worse, Peter has done a ton to help Ruby gain exposure in the broader technical community, and has helped those within our community find their way to some really great resources. The high degree of professionalism and consistency that Ruby Inside showcases does a lot to create a good first impression for our community as a whole.

<i>UPDATE 2011.07.20: Ruby Inside is great, but less frequently updated these days. Be sure to check out other things Peter stewards, such as [Ruby Weekly](http://rubyweekly.com/) and [Ruby Flow](http://rubyflow.com)</i>

### Reflections

I must admit, I actually don't spend a lot of time reading technical books or blogs. I find that I'm so busy writing and actively practicing that hooking myself up to the firehose of new information seems like it would cause me to burst. In general, I think we have a community that is too obsessed with the consumption of information, and for that reason, it makes it hard for me to come up with a list of folks who I'd say can't be missed.

That having been said, a little bit of well placed information goes a long way, and the five people I have acknowledged in this post have really given me some major new insights at one point or another in my career. There are others I could say the same about, but I'll leave it to you to find them on your own.

Who is a great hacker-writer that you think is worth knowing about?

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/049-issues-17-interesting-ruby-writers.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/018-testing-dogma.md
================================================
Today I have a small rant about the test obsessed culture of Ruby. This area in particular is one in which the incredible enthusiasm surrounding the methodology seems to have outpaced the individual practitioner's ability to evaluate its utility. I'm not just talking about beginners here, but seasoned professionals as well. The sheer volume of rapidly changing testing tools and techniques is an indicator that we're nowhere near convergence, but that doesn't stop many from describing automated testing, particularly in the _Behavior Driven Development_ style, as if it were some sort of magic bullet. It isn't.

Speaking only from personal experience, I can tell you that most projects that I've spent more than a few hours hacking on end up better with some automated testing than they would with no testing at all. But I can also tell you that uses all the latest and greatest tools and techniques, and attempting to "test all the fucking time" on large scale, long running projects, has actually hurt rather than helped me at times.

Some tests are a waste of time, and other tests can be actively harmful. If you have ever experienced the pain of refactoring a test suite in which someone overzealously mocked everything except for the object under test, simply to 'decouple the tests from the implementation code', you probably have an idea of what I mean. If you happen to be one of those folks who are still writing tests that way, you should study a bit more about what mocks are actually meant to be used for.

Personally, I've found testing to get in the way when I'm first exploring a new project. I almost always spend a couple hours writing crappy code without any tests at all. Test-first development does help drive interfaces and forces you to think about API design continuously, but it can only really be used to attain a local maximum. When you're trying to get a feel for a whole new concept, what you really need is a lantern, not a laser. Not even story frameworks like Cucumber or Steak give me the level of flexibility I need, so I go without testing frameworks entirely in my initial spikes. The closest thing I get to 'automated testing' in the first few hours of a project is a few lines of example code combined with some printlining to let me see what my code is doing. Pretty much everything else gets done through poking and prodding in irb.

Typically, the tradeoff of velocity for code quality that I make in a spike fairly quickly catches up with me, and that causes me to start to think about adding tests and basically just starting from scratch, only using code if I feel it's good enough to refactor into something more permanent. With enough ideas generated, and a decent high level sense of what my goals are, the laser-like quality of unit testing becomes more useful. But there is still a lot of things I don't test, even once a project is under way.

I don't test complex interactions with users within a system, unless I begin to frequently write code that has system-wide effects. I've definitely been in situations in which integration tests have been vital, but they've been far and few in between. Part of this is because the projects I work on tend to be deeper than they are wide, but it's also because I just trust my design capabilities enough to not introduce too many changes that could break more than one part of my application at a time. I feel like the majority of integration testing goes into way too much detail about the expected paths through a system, and as a result, forces a bunch of false-negatives as minor changes that shouldn't affect users end up breaking tests.

Similarly, I don't place too much emphasis on testing things that I will invariably need to manually inspect. So for example, if I'm generating a PDF report, I don't typically bother testing my output in an automated fashion. What I will do instead is make it dead easy for me to generate that PDF so that I can look at it whenever I need to, and I'll keep a copy of the expected output around so that I can track down issues when they come up by doing some manual comparisons. Things would change somewhat if generating PDF reports was the core purpose of my application, but as a single feature, I feel automated testing would be mostly a waste of time.

There are other areas about testing that concern me, but I'll leave them for another day. For now, I'll try to end things on a positive note by sharing some of the areas where I do think testing is really, really helpful.

  * Dealing with regressions: In most scenarios, once you've created a minimal example to reproduce a bug, it's a small step to convert it into a unit test. As long as you introduce the test as far down the stack as possible, this minor investment is well worth the effort, as it will catch the bug and draw your attention to it if it ever gets reintroduced into your project.

  * Documenting project requirements: When written properly, tests say a lot more about intentions than implementation code does. Some folks feel that something like Cucumber and/or RSpec does a better job at expressing requirements than more low level testing frameworks, but this is primarily an aesthetic argument. No matter what framework you use, the purpose of a test is to describe how some code should be expected to work, which makes test suites a great way to learn about a project.

  * Safeguarding against harmful changes: For long running projects or projects with many developers, automated testing helps detect changes that have undesireable or unexpected side effects on the overall system. This is something that can also be dealt with by being well organized and fairly disciplined, but tests sure don't hurt. Of course, this effect is only accomplished if there is sufficient test coverage to catch those unexpected changes, an investment that may or may not be worth the effort.

Note that in the above, I didn't imply that testing results in writing better code. I also specifically avoided claiming that tests will help you avoid defects in the first place. While I think that occasionally testing contributes to accomplishing these two things, it really depends on the project as well as the individual developer's skill level and coding style. I also didn't claim that writing tests saves time or money. I don't think it actually does, and I wouldn't trust that claim until I saw some concrete evidence.

Even with these caveats, the gains listed in the bullet points above make the
juice worth the squeeze, in most cases. I can also say that automated testing
does make you think about software development in a very different way, and that
change in perspective might make you a better programmer. But testing is just
one of many things that can improve your craft.

So my humble advice to all Rubyists, newbies and seasoned professionals alike, is to cool your jets when it comes to testing. If we remember our main goal is to produce useful software, we can find room to make use of helpful techniques without letting them take center stage. This I think would be a huge step in the right direction.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/050-issues-18-testing-dogma.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/019-thoughts-on-mocking.md
================================================
When I originally published my [small rant on testing](http://practicingruby.com/articles/47), it generated a spirited discussion about a number of different topics. It even lead [Bryan Liles](http://twitter.com/bryanl) to post a great set of [testing guidelines](https://gist.github.com/785610) to balance out my unfocused rant. But the topic that overshadowed almost everything else was that of best practices regarding mock objects. In this two part article, we'll try to shine some light on that topic, because it is clearly still a point of confusion and occasionally even controversy within our community.

In Issue #20, I will go over some examples of when I use mock objects and when I don't, and try to come up with some guidelines for building test suites that do their job without becoming too brittle. But before we can really discuss practices, we need to establish a baseline level of theory and background knowledge, which is what this post is all about.

Rather than doing the heavy lifting myself, I will point you to *the* article to read to better understand mock objects. It was written in 2004 (which is about the time that I first read it), and then revised in 2007. It is of course, Martin Fowler's essay [Mocks Aren't Stubs](http://martinfowler.com/articles/mocksArentStubs.html).

The article is long, somewhat dry, and includes large amounts of Java code. Don't let that discourage you from reading the whole thing from end to end, and if necessary, reading it again. Despite the title, Fowler goes into much deeper topics than mocks vs. stubs, and hits on many of the key ideas that separate 'mockists' from 'classicist'. Personally, I feel this is a false dichotomy, but you'll still be hard pressed to find a better article that gives the historical background of the design ideas that motivated the creation of testing and mocking frameworks in the first place.

I find Fowler's assessment to be reasonable fair, incredibly comprehensive, and a very useful place to start from if you are to form any argument about one approach vs. another when it comes to mocking. That having been said, I am critical of certain aspects of this essay, partly because I am looking at it with a 2011 perspective, and partly because I didn't come to Ruby from Java. For this reason, I've included my commentary on Fowler's article below. I encourage you to read his article in full before reading my comments, as they'll make much more sense that way.

### Commentary on Fowler's "Mocks Aren't Stubs"

Fowler explores two different concepts in this article: behavior vs. state based verification, and classical vs. mockist TDD. While he doesn't directly draw the lines between them, he sort of implies that mockists are always focusing on behavior verification and that classical TDD leans heavily towards state based verification. There are some issues with this line of thinking.

Claiming that mockists inherently focus on behavior is valid. The idea of mocking everything except the object under test means that purists would not be able to work with 'real objects' to perform state verification on. But this sort of practice does not actually require mocking everything except the object under test, what it requires is more carefully written tests.

Fowler claims that classicists tend towards writing single tests that explicitly test large clusters of code simultaneously, which requires them to produce a large amount of fixture data just to get their tests to run. But in a post-BDD world, most people know how to isolate their test cases so that they focus on one behavior at a time, whether or not they're utilizing mock objects. We also know to write comprehensive tests at both the higher and lower levels of our project, and so it isn't necessary to worry about exercising all the possible paths through our low level objects when calling them through a high level interface.

Personally, when I'm testing a feature that is towards the top layer of my stack, I try to make it so it requires as little configuration as possible to initialize. It shouldn't be necessary to load up fixture data for low level features I won't use, so really, I only need to trace a single path of execution and provide the right data to make it a valid path. I weigh the cost of this against using a mock object, and whenever the two are comparable, I prefer the former. Clearly this doesn't make me a mockist, but does it fit with Fowler's definition of a classicist? I don't know.

I was never deeply involved in Java programming, but from my limited experience with it, I feel that a lot of the arguments Fowler formed in this essay were and probably still are more relevant in the Java world. In Java, because you don't have things like mixins, indirection is much more common than in Ruby. You might need to create 6 objects just to do one small simple thing. In such an environment, mock objects must seem like a godsend, as when you multiply that phenomena across your entire project, the cost of maintaining mocks would be far less than the cost of building complex setups for all those objects. But if you're experiencing the same sorts of problems in Ruby, you have a horrible design for your project.

In Ruby, it is possible and often recommendable to build systems that don't have very deep object nesting. For this reason, the ability to focus only on mocking direct neighbors of an object under test isn't as much of a selling point. If we take away the complex object systems component, we are mostly left with the idea that mockists prefer to write mocks so that they can focus on driving the object under test, and then go back to use their mocks as a contract for the next object they need to create. Again, something that makes a lot of sense in languages that punish you for creating new objects. Ruby is not like that.

In almost every scenario I can imagine, it's better to just go ahead and create a skeleton version of an object you need than it is to form a mock that is sort of floating in space. It will likely take less time, and working with the real object will give better insight into its design than trying to dream it up through a cumbersome mock interface. Fowler does touch on this approach being a valid one but claims that the mockist approach provides more design guidance. I don't see any evidence to support this claim, as the two are essentially functionally equivalent with respect to the object under test.

Fowler does an excellent job of covering the arguments about test isolation, and I don't have too much to add there except to say that I am firmly in favor of watching my whole test suite go up in smoke when I make a far reaching change. The false-positives that mocks give are downright dangerous in these scenarios, and arguments about it being difficult to find what caused the breakage are most likely an indication of some deeper problem: I've never had that issue even on my most complex projects.

Fowler's entire discussion about Design Style for classicists vs. mockists misses the mark. It probably had a lot of truth to it at the time he wrote the article, and may still have some truth outside of Ruby. But really, what he is describing here is the distinction between old fashioned regression-suite style TDD and what we now call Behavior Driven Development. In my opinion, BDD is just a new style of TDD that is more principled and focused on design as a first class component of writing testable code. So when Fowler says that mockists favor role based systems, I think this actually applies more generally to anyone practicing modern TDD.

### Reflections

As I said at the very beginning of this article, I think the distinction between mockists and classicists is a false dichotomy. I do agree that there is a wide chasm to cross between the original purpose of test frameworks and the new way of looking at things. But really, once you've decided that tests are more than just a safety net for dealing with regressions, you have already fallen outside of Fowler's classicist point of view. In my opinion, there is room for people who focus on behavior rather than state, but don't necessarily feel like mock objects are a good tool to be using by default. These folks are just as concerned about design and driving code through tests, but do not subscribe to absolutist viewpoints that require a single technique to be used at all times.

Since I consider myself to be in the third category that I've wedged between Fowler's two groups, I will need to share some examples of what that means in practical terms. The next article should help with that, because it provides an outline of how I decide when to mock and when to use real objects instead. Until then, I'd be happy to hear your thoughts on this topic, especially what you think of Fowler's article.

  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/051-issue-19-thoughts-on-mocking.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/020-thoughts-on-mocking.md
================================================
In the [last issue](http://practicingruby.com/articles/48), I encouraged everyone to read Martin Fowler's classic article [Mocks Aren't Stubs](http://martinfowler.com/articles/mocksArentStubs.html). Since this article is a bit dated and leans heavily towards Java style development practices, I also offered my own commentary to hopefully bridge the gap between Fowler's insights and the modern Ruby world. Now that we have the theories behind us, today we can focus on putting these ideas into practice.

There is a style of behavior driven development that encourages mocking everything except the object under test. Fowler calls folks who follow this methodology _mockists_, and more-or-less presents this approach as a completely valid alternative to classic TDD, in which test doubles of any variety are only used when absolutely necessary. While I think that such an assessment is valid in the context Fowler originally wrote his article (2004/Java), I personally feel that the _mockist_ style that Fowler describes has no place in modern Ruby development.

That having been said, when used in moderation, mocking frameworks can make testing a whole lot easier. Today, I'll be sharing my thoughts on when to use mocks and when not to. While these are not meant to be taken as strict rules to follow, they may shed some light on a middle ground between Fowler's classicist and mockist categories.

> **NOTE:** I'm using [citrusbye/contest](http://github.com/citrusbyte/contest) and [mocha](https://github.com/floehopper/mocha) in the tests shown in this article, but the ideas should apply to any testing framework + mocking system.

### Good uses for mocks

When I think back on my testing habits, I find that virtually all of my use of mock objects falls into one or more of the following three categories:

 * Testing code which depends on an external resource of some sort (a web service, the filesystem, mail server, etc.)
 * Testing code which would involve a large amount of non-reusable setup and fixture data if you didn't mock at a high level.
 * Testing code which relies on features which are particularly computationally expensive.

Each of these scenarios has their caveats, but odds are, most moderate to large
size projects I work on hit at least one of them, and it isn't rare
to deal with all three of these issues simultaneously. That alone tells me that
having a good understanding of how to use mocks is a key part of TDD. I'll now
share some examples that hopefully help drive that point home.

<b>Isolation from external resources</b>

It would be great if our projects were completely self-contained, not having to deal with any shared resources, but this isn't realistic. Most projects need to deal with at least some external resources and may even have to tackle some systems integration problems. This often makes automating testing considerably more challenging than we would like it to be.

Thankfully, mock objects provide some shortcuts for us. While they won't help us with testing the code that needs to interact with the outside world, they can easily be used as stand-ins for our integration points when we are testing code that depends on outside resources. This makes it possible to test our high level logic without having to access whatever external resources our code needs to integrate with.

To demonstrate how useful this can be, we'll look at some simple tests from a tool I built which uses _win32ole_ to integrate with some Windows based truck routing software. Below, you can see a bit of test code that ensures a particular error gets raised when an invalid stop is added to the trip object.

```ruby
test "trip must be able to detect an invalid stop" do 
  trip = MilesDavis::Trip.create
  expect_an_invalid_stop

  error = assert_raises(MilesDavis::InvalidStopError) do 
    trip.stops << "Fakeville, FK" 
  end

  assert_equal "Cannot Find: Fakeville, FK", error.message
end 
```

If you guessed that the `expect_an_invalid_stop` method introduces a mock into
these tests, you were right! While it might look a bit like magic on a first
glance, I usually try to separate all but the most trivial mock logic into its
own helper methods to make the tests easier to maintain. Here's what
`expect_an_invalid_stop` actually does:

```ruby
def expect_an_invalid_stop
  server = mock()
  server.expects(:CheckPlaceName).returns(0)
  MilesDavis.expects(:server).returns(server)
end
```

We can now take a look at the implementation code that these tests run against. It is a simple module that gets mixed into the stops array when a new `Trip` is created.

```ruby
module StopValidation
  def <<(place)
    unless MilesDavis.server.CheckPlaceName(place) > 0
      raise InvalidStopError, "Cannot Find: #{place}"
    end

    super
  end
end
```

If you go back and re-read the test and mock code, it should be pretty clear what is going on here. When this system is actually running in production, `MilesDavis.server` refers to a _win32ole_ object, which explains the crappy camel case method names. But when running this particular test, we swap out the server call to return a mock object of our own creation.

By crafting our tests to mock out any interaction with the server, our test suite still works fine outside of the production environment. Even though the core purpose of this library is to integrate with a proprietary bit of Windows code running on a particular machine, we were able to develop all but the lowest layer entirely within our Linux and Mac-based development environments without needing any direct access to the software we were integrating with.

It's worth mentioning that although this use case was extracted directly from a real world project, it was hand picked to demonstrate the value of mocks in this sort of context. Other interactions with external resources are not so black and white. For example, if you're doing something like manipulating files on a system, it might make more sense to use temporary files than it would be to introduce mock objects. There are many other scenarios like this, so it's usually best to weigh out the costs and benefits before going full steam ahead with mocks.

That having been said, mocking external resources is almost always a valid use case, if not the most optimal one in certain situations.

<b>Avoiding complex setup + fixtures</b>

The main reason why integration with external resources is a pain is because it often requires lots of configuration and setup just to get things running. A similar phenomenon occurs internally when projects get large enough to have some complex object relations and/or advanced datastructures.

What follows is a bit of test code for a decorator that we built to wrap some low level geospatial data that we were storing via PostGIS.

```ruby
test "retreive a valid US postal area" do
  expect_postal_area_search("06511")
  geom = GeoRegion.by_postal("06511")
  assert_equal :postal, geom.interpreted_type
end
```

The mocking actually happens in `expect_postal_area_search`, which is shown
below:

```ruby
def expect_postal_area_search(zip)
  PostalArea.expects(:find_by_zcta).with(zip).returns(record_stub)
end
```

This mock emulates a simple `ActiveRecord` search, returning a stubbed out record which implements the bare minimum functionality required by our `GeoRegion` class. While somewhat uninteresting, below is the definition of `record_stub()`, for those curious.

```ruby
def record_stub
  stub(:the_geom => Object.new)
end
```

The guts of `GeoRegion` are actually a little bit complex, but our test was only meant to show that `GeoRegion.by_postal` returns an object that responds to `interpreted_type()` and returns the value `:postal`. This means we can focus on just that part of things without losing anything important.

The part of the code that does the geometry lookup is a simple delegator to `PostalArea.find_by_zcta`, which is what `expect_postal_area_search` mocks out for us. The stubbed out record it returns ends up being used in a helper method that defines the `interpreted_type` on the record via a mixin and then sets its value.

```ruby
def geom_for(record, type = nil)
  geom = (record && record.the_geom) or 
    raise UnknownFormatError, "Not a valid #{type}"

  geom.extend(Meta)
  geom.interpreted_type = type 
  geom.record = record 
  
  return geom 
end         
```

I won't bother tracing the longish execution path that lies on either side of this helper method, but the key takeaway here is that we're able to avoid to skip two layers of complexity by mocking out our call to `PostalArea` and stubbing out the actual geometry object that is associated with that `PostalArea`.

We could have loaded fixture data into our testing environment which had the relevant geospacial data to perform the sort of search we needed for this feature, but doing so would certainly be more complicated than the two simple lines we used to create our mock and stub.

Part of the reason mocks work out well here is that they allow you to focus on the behavior of `GeoRegion` rather than its implementation details. Even though under the hood a bunch of complex object manipulation is going on, we only really care about a very narrow set of functionality that `GeoRegion`'s adds as metadata to the geometry objects looked up through its search methods. If we had to actually populate the database with geometry data and concern ourselves with the messy relationships between these objects, our tests would be far less clear.

Of course, this technique only really makes sense when understanding and maintaining the mock object's interface is easier than creating the necessary setup code and fixtures to run the tests with real objects. Often times, the scales are tipped in the other direction, which I'll talk about a little later in this article. But before we get into the bad ideas, we have one more good one to cover.

<b>Mocking for performance reasons</b>

The first two techniques both had something in common: They made life easier by preventing certain code from actually being run. If we take that idea and apply it to performance, we find that running less code is usually faster than running more code.

Let's consider the following simple code that sends an email message to a group each time a new member is added.

```ruby
class Group

  def initialize(name, admin)
    @name    = name
    @admin   = admin
    @members = []
  end

  attr_reader :members, :name, :admin

  def <<(new_user)
    raise if members.include?(new_user)

    members << new_user
    broadcast("New user added", "#{new_user} joined the #{name} "+
              "group on #{Date.today}.")
  end

  def broadcast(title, content)
    mail = Mail.new

    mail.from(admin)
    mail.to(members)
    mail.subject(title)
    mail.body(content)

    mail.deliver
  end

end
```

Because `Group#broadcast` is almost entirely calls to the external Mail library, it arguably doesn't need unit tests, and instead could be covered by integration tests that set up a test mail server or something like that. However, `Group#<<` is a different story.

If we focus on the behavior of appending a user to the group, we don't actually need to focus on how `broadcast()` is implemented, we only need to verify that it is called. The following test demonstrates how to apply that line of thinking.

```ruby
test "adding users" do
  group = Group.new("Practicing Ruby", "greg@practicingruby.com")

  expect_broadcast(group, 2)

  group << "joe@example.com"
  group << "matz@example.com"

  assert_equal ["joe@example.com", "matz@example.com"], group.members
end
```

The most simple mock that reasonably covers the necessary functionality for `expect_broadcast()` is shown below.

```ruby
def expect_broadcast(group, count)
  group.expects(:broadcast).times(count)
end
```

We could actually go much farther here and verify the particular subject and content being passed to `broadcast()`, but as I said in [issue #18's mini-rant on testing](http://practicingruby.com/articles/47), I don't particularly like testing presentation logic that needs to be hand verified due to frequent superficial change. But personal preferences aside, even with a more complex set of expectations, using a mock object here is sure to be faster than actually sending an email.

This is a bit contrived example, but imagine a group object with many more methods that send broadcast emails. Add to that all the email enabled features across an application, and you'll quickly see the clock ticking longer and longer even if you do have a mail server that pipes everything to _/dev/null_.

This sort of scenario will come up in a number of different domains, and whenever it does, mock objects might be the right way to go. The main downside of using this sort of approach is that it eliminates the possibility of using your tests as a performance benchmark for your project. It is also worth noting that without proper integration tests, your mocks will happily go green in places that your real code may never be able to run. But since these issues tend to get spotted very quickly in manual testing and ordinary application use, it's usually okay to wait until this becomes a problem before worrying about it.

The three types of scenarios I've covered so far pretty much completely describe the valid use cases for mocks that have come up in my work. It isn't likely to be an exhaustive list, but I've working in a fairly large amount of projects across diverse domains and have yet to see another need for mocks that I didn't cover here. I did run up against a couple anti-patterns though, so let's take a look at those now before we wrap up.

### Bad uses for mocks

Two very popular use cases for mocks should actually be considered harmful:

* Using mocks for complete isolation of internal dependencies
* Using mocks as contracts for unwritten objects

To be sure, there are fairly strong arguments for each of these ideas, Fowler alone goes to great lengths making the case for them, and he is a moderate on these issues. But I'd argue the line of thinking is really geared towards languages that punish users from creating lots of objects with simple APIs connecting them together, such as Java. Let's take a look at some Ruby examples so that we can consider that point.

<b>Using mocks for complete isolation of internal dependencies</b>

Consider this simple variation on the theme of a user group, in which `Group#<<` constructs Person objects for each new member of a group.

```ruby
class Group
  def initialize
    @members = []
  end

  attr_reader :members

  def <<(person_name)
    members << Person.new(person_name)
  end

  def member_names
    members.map { |e| e.name }
  end
end
```

A mockist would not think about whether `Person` has external dependencies, complex setup requirements, or performance issues. They would just have started with a mock right away, perhaps something like this.

```ruby
class GroupTest < Test::Unit::TestCase
  test "adding members to a group" do
    group = Group.new

    expect_new_member("Gregory Brown")
    group << "Gregory Brown"

    expect_new_member("Jia Wu")
    group << "Jia Wu"

    assert_equal ["Gregory Brown", "Jia Wu"], group.member_names
  end

  def expect_new_member(member_name)
    Person.expects(:new).returns(stub(:name => member_name))
  end
end
```

The neat thing about the code above is that it really does create some major isolation, in that it will still allow you to test `Group#<<` and `Group#member_names` with nothing more than a bare class definition for `Person`. If we wanted to be hardcore, you could even create a `Group#new_person` method and mock that instead, and then you wouldn't even need a defined `Person` constant!

But before we get too excited, let's assume `Person` is just a trivial container method, such as the one shown below.

```ruby
class Person
  def initialize(name)
    @name = name
  end

  attr_reader :name
end
```

This code doesn't require any complex setup, it isn't using any external resources, and it doesn't have any performance intensive characteristics to it. That means that in order to test it directly, all we need to do is remove a bunch of lines from our previous test case.

```ruby
test "adding members to a group" do
  group = Group.new

  group << "Gregory Brown"
  group << "Jia Wu"

  assert_equal ["Gregory Brown", "Jia Wu"], group.member_names
end
```

By comparison, the above code is much more simple. But some smart folks still write it the other way. This is not without reason, and in fact has something to do with what happens when a change is made that causes tests to fail. To illustrate this, suppose that Person has a simple test that looks something like this.

```ruby
test "a user has a name attribute" do
  user = User.new("Gregory Brown")
  assert_equal "Gregory Brown", user.name
end
```

With the code we've seen so far, this test easily passes. But consider what happens when the implementation of User is changed to something like the code below.

```ruby
class Person
  def initialize(name)
    @name = name.upcase
  end

  attr_reader :name
end
```

The version of our test suite which uses mock objects will have one failure in the test case that is specifically checking what `Person#name` returns. It will not cause our `Group` tests to fail, because a stubbed person object is used there instead. I've included the output of a test run using that approach so you can see what that looks like.

```
  1) Failure:
test_adding_members_to_a_group(GroupTest)
<["Gregory Brown", "Jia Wu"]> expected but was
<["GREGORY BROWN", "JIA WU"]>.

  2) Failure:
test_a_user_has_a_name_attribute(PersonTest)
<"Gregory Brown"> expected but was
<"GREGORY BROWN">.
```

This is exactly what mockists don't like to see. The argument is that as your programs get more complex, the dependencies between objects get larger and larger and you end up with tens or hundreds of failing tests all because of a change in one place. This phenomena can and does occur, and it happens in smaller projects than you might think.

But still, doesn't something smell fishy?  The mock objects that are now being constructed in the tests for `Group#member_names` are now completely out of synchronization with the real specifications of the application. It isn't possible to get the output they test against in real uses of the application, and so while they adequately test the behavior of `Group#member_names`, the isolation has caused the mocks to diverge from reality, making them untrustworthy as 'living documentation' for the real system.

Personally, when I make a change that has potential system-wide affects, I prefer my tests to be verbose. Testing objects directly prevents this sort of out of sync representation of object behavior from being even possible, and so increases the reliability of the tests as both an integration testing safety net and as a documentation source.

As for sifting through the sea of information that gets spit out when you *don't* use mocks, there are ways of effectively sifting through it so as to not have problems even in very complex applications. But that is a topic more related to general debugging and may be better off described in another article.

We still have one more point to cover before we wrap up here, and this is now edging on being a massive article, so let's get to it.

<b>Using mocks as contracts for unwritten objects</b>

When writing code test first, it is possible to use mock objects as stand ins for objects that have not been defined yet. As I had mentioned before, with minor alterations we wouldn't even need to have a `Person` class defined in order to effectively test `Group#<<` and `Group#member_names`.

This is sort of neat, because it forces a radical form of behavior driven development. Since you're not working with the real collaborator objects at all in your tests, you are absolutely forced to work with their expected behaviors and not their implementations.

We've already hinted at some of the downsides of this approach though, in particular, that it is possible for our mocks can get out of sync with reality. We've seen an example of tests that don't fail, even though they describe invalid output from `User#name`. Now let's see an example of a change that does cause our original mock-based tests to fail, even though there is nothing wrong with the code itself.

```ruby
# replace the Person object with this definition, which simply renames
# Person#name to Person#full_name
#
class Person
  def initialize(full_name)
    @full_name = full_name
  end

  attr_reader :full_name
end

class Group
  # update to call the renamed Person#full_name method
  def member_names
    members.map { |e| e.full_name }
  end
end
```

When we run the non-mocked version of our tests, nothing fails, because it never explicitly mentions the name attribute on `Person`. But the same cannot be said for our mocked code, which explicitly creates stubs with a name attribute, as shown below.

```ruby
  def expect_new_member(member_name)
    Person.expects(:new).returns(stub(:name => member_name))
  end
```

You can see the test output below as evidence that our mock is now indeed broken.

```
  1) Failure:
test_adding_members_to_a_group(GroupTest)
    [/home/sandal/devel/practicing-ruby/group.rb:14:in `member_names'
     /home/sandal/devel/practicing-ruby/group.rb:14:in `map'
    ...
unexpected invocation: #<Mock:0x7ff71166e6c0>.full_name()
satisfied expectations:
- expected exactly once, already invoked once: Person.new(any_parameters)
- expected exactly once, already invoked once: Person.new(any_parameters)
- allowed any number of times, not yet invoked:
  #<Mock:0x7ff71166e6c0>.name(any_parameters)
- allowed any number of times, not yet invoked:
  #<Mock:0x7ff71166aac0>.name(any_parameters)
```

So here we see the knife cuts both ways. While it's true that our mocked code doesn't need to worry about the implementations of anything except the object under test, it does tightly bind to the interface, even when changes to those interfaces don't affect the object under test.

This allows us to make the same argument that mockists make about cascading errors, from the other side of the fence. As projects grow bigger, the amount of red tests due to brittle mock objects grows larger and larger, making it harder to see what is actually broken and what needs to be changed. But unlike the problem of noisy directly tested objects, these sort of failures only indicate a problem with the tests, not the code.

In languages where creating new objects is hard and time consuming, such a trade is probably worth considering. If we had to hand tune a Makefile, set up headers, declare variables, and consider memory management just to add a Person object like we might in C++, there might be a strong argument for how using mocks for driving tests helps you be more agile.

But in Ruby, in which our first tests can be made to pass with just a single line like the one below, you have to wonder whether the juice is worth the squeeze.

```ruby
  Person = Struct.new(:name)
```

One important thing to note is that despite my criticisms, there are folks out there who use very elegant design techniques and testing practices that can minimize the problems I have pointed out. But personally, I feel like these folks succeed in spite of the path they've chosen rather than because of it. The idea that using mocks to force you to think about design may work well as a gateway drug, but then once you've learned how to think about object design on its own, you can chuck out the training wheels and just focus on writing good code.

The examples I've shown here might be a bit biased towards demonstrating my arguments, but at least should give a starting point for considering these issues on your own.

### Reflections

We've simultaneously shown in this article that mock objects are both really damn useful and ridiculously annoying at the same time. Personally, I tend to shy away from tooling that requires you to swallow a large amount of dogma and a boatload of theory before you can even make use of it, and that is the main reason why I'm concerned about the whole mockist approach to things. From what I've seen, while a stereotypical _classicist_ is hard to come by, these _mockist_ folks that Fowler describes do exist and in my opinion, do more harm than good in getting folks to write clear, easy to understand Ruby code.

Mocking frameworks are big guns, and should be treated as such. They can be life
savers when used in moderation, but can make you pull your hair out if you use them inappropriately.

In summary, it's a bad idea to swallow bad tasting medicine with the abstract promise that it will be better for you in the end. If you can see clear benefits from the use of mocks and have weighed them out on a case by case basis against your other options, you should be fine. But if you are mostly using them because the RSpec team tells you to, you're basically screwed :)

My final disclaimer about what I've said here is that it is entirely based on my own experiences. You've worked on different problems in different environments than I have, and I'd love to know how those experiences have influenced your own thoughts on mocking.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/052-issue-20-thoughts-on-mocking.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/021-how-to-practice.md
================================================
When I was 17 years old, I was asked by an Air Force military training
instructor how to get to Carnegie Hall. Before I could even think of why he'd
ask me this question, he shouted "Practice! Practice! Practice!" This was
followed by an hour long monologue about the finer points of properly making a
bed and stowing underwear in a locker, which might explain why my aspirations
for a military career started and ended in JROTC. But that cliché of a first
line stuck with me, even to this day.

You are reading a newsletter called Practicing Ruby. Therefore, you must not be
averse to practice or are at least not a stranger to it. However, skillful
practice is an art form, and not all types of practice should be considered
equal. I assume that our readers have goals that are closer to taking the stage
at Carnegie Hall than they are to perfectly folding a pair of underwear. With
that in mind, I'll be sharing my secrets about how I practice, in the hopes that
the techniques I've developed over the years will work for you, too.

Please decide at this point whether you have about 2-3 hours to spare within the
next few days to try out some exercises that go along with this article. If you
don't feel like you can do that, stop reading now, skip and come back to this
two-part article when you have time for it. The ideas I am covering will only
really sink in if you put them into action.

If you've decided to be brave and read on, great! Now is the time to open up
your text editor or grab a sheet of paper and a pen.

### The $64,000 Question

The question you need to answer before it ever makes sense to practice, work
hard, or even think about anything seriously, is simply this: *What interesting 
problems do you need to solve?*

When properly considered, this question can serve as a compass that
points you in the right direction. The key is to pay attention to every last
detail that it demands, so that you can pick the right kind of goal. Here are
some guidelines for picking the right area to focus in:

* First, it must be interesting; something that stands out from
the ordinary. As long as YOU find the topic engaging, it needn't
interest the whole world. But at the very least, it must be something that
won't get lost in the background noise of our day to day lives.

* Second, it must be a problem. Without some form of conflict, without a
struggle between what is and what can be, it is difficult for creativity
or productivity to occur naturally. Problems tend to capture the
imagination in ways that 'exercises' or 'routines' never can.

* Third, it must be a need of yours. Notice I use the word need and not want.
While at the physiological level, needs are very basic (food, water, clothing,
shelter, sex, etc), psychologically our needs are much more complex. A
need is the kind of thing that eats at you until you find a way to satisfy
it. A need is something that when ignored, makes things worse than what they
should be.

* Fourth, it must be related to you. There are many, many problems out there
that are interesting and need to be solved. Which challenges are you uniquely
qualified to solve? Which challenges are you uniquely pained by if they're
left unsolved?

* Finally, it must be solvable, or at least show promise of being solvable.
There are problems out there that would be great if we could solve
them, but nothing seems to change about them. Fortunately, many problems 
can be solved with sufficient motivation. Don't waste your time on the 
impossible, feel free to settle for something challenging but 
surmountable.

Now that I've explained it in detail, I'll repeat the question, and then I
want you to take some time to think about a serious answer to it...

*What interesting problems do you need to solve?*

### Making a commitment

Derek Sivers claimed you should keep your intentions to yourself when planning
your goals. But Freakonomics thinks he's wrong, and so do I.

Every single project that I've been successful on, I've described in public
before I even broke ground. The [original Ruby Mendicant
project](http://www.oreillynet.com/ruby/blog/2008/03/id_love_to_quit_my_job_sort_of.html)
that brought us [Prawn](http://github.com/sandal/prawn), and my current work at
[Mendicant University](http://university.rubymendicant.com) are just two
examples that I can offer some anecdotal evidence for.

Now have I succeeded at every project I have discussed publicly? Hell no! But to
be honest, I've been kept busy enough by my successes that I don't need to worry
too much about my failures in life. I've also found that by sharing my ideas as
early as possible, I can get a sense if people are as excited about it as I am.
While a lukewarm response isn't necessarily an indicator of a bad project,
things that create buzz often indicate that you've struck a real nerve. When you
stumble across problems like that, you really ought to invest in solving them.

So now that you've written down your goal, the next step is to share it with
someone. I'd recommend posting it in the comments section at the end of this
article, but I'd pretty much accept any action you can do to share your idea
with someone else in the world. Tell your kid, tell your wife or husband. It
doesn't really matter who you tell, as long as you put it out there.

Now is the time where people most often second guess themselves, thinking their
idea is not ready yet, or that it won't be appreciated by others. If that's
really how you feel, fine, go back to step one and generate a problem that you
*can* share that still meets all those guidelines.

The next step is to shift from having an idea to having something you can
actually act on.

### Making progress

At the beginning of the article, I asked you to set aside two to three hours for
working through these exercises. Maybe you've spent a little bit of time
brainstorming already, but you probably have plenty of time left on the clock,
right? If so, now is the time to roll up your sleeves and get your hands dirty.

You've got a goal, and you've told someone about it. Now your next task is to
answer this question: What is a concrete, measurable action you can take that
will take you an hour or less, but still manage to get you closer to your goal?

Write down your answer to that question and then pass it along to whoever you
shared your goal with. Then, sit down and try to do what you said you
would do. Keep working on it until either you've solved the small subproblem
you've just described, or until the full hour runs out.

Once you've put the work in, make sure to let your selected observers know.
That'll make you want to build something that you actually believe meets your
goals, rather than lowering your standards to match what you've
actually produced. If you fell short of your objectives, don't be disappointed,
just explain what obstacles got in your way. Folks are more
understanding than we give them credit for, and your observer is not likely to
be an exception.

### Reflecting on your progress

You now have made a few steps in the direction of solving an important problem
that you find personally interesting. Great work!

Now, there is only one question left to ask: What scares you about this project?

If the answer to that is 'nothing', pause a moment and double check whether
you're really being honest with yourself. I personally find fear or extreme
uncertainty to be a common phenomena when working on hard problems, and if I
feel 100% confident with no real doubts or worries, I begin to think that maybe
the problem I'm working on isn't worth my time. That said, a lack of fear also
sometimes comes from being in the state of flow, which is a really pleasant
experience. Try to distinguish between the two, and only settle for 'nothing' if
you really believe it to be true.

Assuming you do dig up some fears or doubts, write them down, in as much detail
as you'd like. Once you've done that, try to separate thing things that you can
figure out answers to from the things that you can't. Take a quick glance at the
list of fears that you won't be able to reason your way out of, and decide if
they're worth giving up your project over. If they are worth surrendering over,
quit and start all the way at the beginning, forming a new goal for solving a
new problem that you really care about.

But if you have the courage to press on, throw out the list of
unresolvable fears and focus on the ones that you can do something about. Study
the topics they cover, talk to friends for ideas on how to get through them, and
then once you feel better, throw that list out too.

### Rinse and Repeat

Today I asked you for a couple hours of your time, to get you to practice your
craft by actually working towards solving something that is important to you.
Odds are, a couple hours wasn't enough time to fully solve one of your big
problems. But it was a start.

The good news is that the process recurses from here. You don't need to work in
one hour intervals, but ask yourself what you can accomplish in an afternoon, in
a work day, or in a week. Then, come up with a plan, tell people about it,
execute, reflect, and repeat.

This is what I do, and if it works for me, it may well work for you. Please try
it out and share your thoughts once you've worked through the suggested 
exercises in this article.

### Reflections

I started out by saying that practice is important, and you may have then
expected to hear me talk about how important it is to read books and blogs, or
to work on code katas, or something else that involves a disconnected form of
learning that is separate from real world applications. But frankly, I find that
sort of approach to be inefficient, when real goals do so much of a better job
of guiding you towards what is really important to focus on.

Hopefully by working through the exercises I've proposed here, you'll gain a
better understanding of why this approach to learning can be so power. In the
next issue, I will share some examples from my own projects in which I've used
this process to make good progress in a short period of time. Through that,
you'll hopefully be able to see how almost all of my time spent studying is done
through goal based learning rather than some sort of separate, isolated practice
sessions.

More than any other article I've published so far, I hope you will participate
in this one. Even if it seems a bit cheesy, I think that actually working
through these exercises will prove to be a worthwhile experience for you.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog.
There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/053-issue-21-how-to-practice.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/022-how-to-practice.md
================================================
> **NOTE:** I generally remove temporal references in Practicing Ruby 
articles to keep them disconnected from a particular point in time, but
in this case I intentionally left them in because the post is written
in a sort of "diary" style. It was originally posted in January 2011.

In the [previous issue](http://practicingruby.com/articles/50), I provided a
series of questions and instructions that outlined the way I practice. While
some may have been expecting code katas or other indirect exercises, my style is
more geared towards learning on the job. You start by figuring out what's
important to you, figure out a baby-step that you can make, and then execute.
Once you've brought yourself one step closer to your goal, you reflect a bit on
how things are going, in particular, what parts of the project still scare you.

I use this technique often, in particular, when I want to get started on a new
project or explore a new area that I'm not that familiar with. As luck would
have it, I actually have a new project I need to start, and so today I'll give
you the chance to metaphorically look over my shoulder as I work through this
exercise myself. If you haven't read [Issue
#21](http://practicingruby.com/articles/50), now is a good time to do that, so
that the rest of this article makes sense.

### Step 1: Find out what's important 

What interesting problems do you need to solve?

Lately, this question has been one that has caused me great anxiety.The success
of Mendicant University and even Practicing Ruby to a certain extent has caused
an explosion of ideas that all feel worthwhile and important to me. But they're
fairly easy to separate into wants and needs, and thankfully, much of what I've
come up with falls in the former category.

When I think about it, there is something that I feel I need to do sooner rather
than later. While our courses are booked up until May, we will need to start
admissions for our second trimester soon. Towards the end of last year, we
decided we wanted to do something more fun and lighthearted than an
entrance exam, but we didn't take much action since then. With the clock
ticking down, making headway on this project would surely help give me some
peace of mind.

What I'd like to build is a programming quiz site that is inspired by the
[Internet Problem Solving Contest](http://ipsc.ksp.sk) and [Project
Euler](http://projecteuler.net), but with an MU-themed twist. I'll have
Mendicant's co-founder [Jordan Byron](http://twitter.com/Jordan_Byron) to help
me with the frontend, but since he's busy with 100 other tasks for
MU, I'm the one who needs to build out the backend for this new app. I'll use my
need to write this article as motivation to help me break ground on this new
project today.

### Step 2: Make a commitment

I made the broad commitment to our students that we'd have a nice replacement
for MU's currently dull admissions process before the next trimester began. But
broad commitments don't particularly inspire action, so I needed to make a
specific commitment as well.

With that in mind, I told Jordan I'd have something for him to look
at today, even if it was just a small start. Since he'll be arriving at my
home within an hour of me finishing this article, I am already feeling the
pressure of having something to show for myself, which is a good thing.

### Step 3: Identify a baby step

The next step in this process is to come up with a small step to get you just a
little bit closer to your goal. I knew by the time I finished Issue
#21 that I'd be working on this project, so I've been subconsciously chewing on
my baby-step for a couple days now. This to me is totally fine, it gives my
brain a chance to think things through and makes actually sitting down and
coding something easier. Of course, the key thing is that my delivery time was
still boxed in. If you leave things open ended, you may end up
talking yourself out of building anything at all.

When coming up with a tiny step, I try to focus on something that is core to the
underlying project, to maximize the amount I learn from the mini spike. In the
case of our quiz application (which we're calling PuzzleNode), validating user
submissions is one of the most important pieces of functionality.

What I'll do today is do a rough proof of concept of the submission validation
system, which compares the expected output to the actual file
uploaded by a user. I like to subdivide my tasks even when working
only for an hour, so I'm going to attack this in three phases:

1. A simple function that compares two files using a SHA1 hash and returns true
or false depending on whether they match.

1. A tiny sinatra application that does the same, but introduces file uploads
into the picture.

1. A minimal Rails app that actually records whether a submission was valid or
invalid, and properly links puzzles with their expected output.

I'm setting my time limit for an hour, so I'm not sure how far I'll actually
get. No matter what happens, I'll try to jot down some notes to give you a feel
for my though process as I work through this exercise.

### Step 4: Get one step closer

[06:40] I've got my clock set now, and I'm ready to get started. Please excuse
me while I go heads down for a bit. I'll pop up with some brief notes here each
time I reach a transition point, and then go into more detail in the reflections
phase.

[06:45] Basic [github project](https://github.com/sandal/pr-issue-22) set up for
this experiment.

[06:48] Add three text files, a reference which is meant to act as the expected
solution, a good file which is just a copy of the reference, and a bad file with
some modifications to make it not match the reference.

[06:51] Phase 1 complete!

```ruby
$ ruby check_solution.rb samples/reference.txt samples/good.txt 
GOOD

$ ruby check_solution.rb samples/reference.txt samples/bad.txt
BAD
```

Source code is dead simple, just a few lines.

```ruby
require "digest/sha1"

expected = Digest::SHA1.hexdigest(File.read(ARGV[0]))
actual   = Digest::SHA1.hexdigest(File.read(ARGV[1]))

puts(expected == actual ? "GOOD" : "BAD")
```

[06:54] Next step is to remind myself how file uploads work in Sinatra, an
indicator of how rusty my frontend webdev knowledge is...

[06:57] Google for "File uploads sinatra" and find Peter Cooper talking about
[this blog
post](http://technotales.wordpress.com/2008/03/05/sinatra-the-simplest-thing-that-could-possibly-work/)
via Ruby Inside.

Outdated, but worth a shot since it's just a one liner.

[07:00] File uploads working via curl. Time to integrate the phase 1 code
into my sinata app.

[07:06] Have something I think should work but found some unexpected
bugs. Drat!

[07:08] Oh, apparently I just don't know how to use curl, working now! (albiet
with a little echo hack to add a newline)

```ruby
$ curl -F "data=@samples/bad.txt" 127.0.0.1:4567/reference.txt; echo
BAD
$ curl -F "data=@samples/good.txt" 127.0.0.1:4567/reference.txt; echo
GOOD
```

Source is still quite simple, so I can inline it here.

```ruby
require "rubygems"
require "sinatra"
require "digest/sha1"

ACCEPTED_FILES = ["reference.txt"]

post "/:expected" do
  raise unless ACCEPTED_FILES.include?(params[:expected])

  expected = 
    Digest::SHA1.hexdigest(File.read("samples/#{params[:expected]}"))

  actual   = Digest::SHA1.hexdigest(params[:data][:tempfile].read)

  expected == actual ? "GOOD" : "BAD"
end
```

Off we go to phase 3!

[07:11] I need to think up a few AR models. Off to the whiteboard, back in a
moment.

[07:15] I've decided to cheat a bit. I realized that for a very basic demo, I
don't actually need to store the uploaded files anywhere, but instead, I just
need each puzzle to store its SHA1 fingerprint. Then, when a new submission is
made, you just hash the file uploaded by the user and compare it to the
associated puzzle.

This data model omits a lot, and would need a lot of love to actually be used in
our application, but it is sufficient for demonstrating just the validation
step.

```
Puzzle(name: text, fingerprint: text) 
Submission(puzzle_id: integer, correct: boolean)
```

Time to go spit out a Rails skeleton, I suppose. The key thing this has saved me
is a trip through paperclip's documentation, and a host of questions about
whether that's still the right tool for the job and whether it works with Rails
3 smoothly. I roughly assume that the answer to each of those questions is yes,
but better to not have to answer them right now.

[07:22] Only 18 minutes to go and rails is still installing, sloooooow.

[07:24] Still installing! Should have used --no-rdoc --no-ri!

[07:25] Finally finished installing, while waiting I stumbled upon [this post on
disabling documentation by
default](http://stackoverflow.com/questions/1381725/how-to-make-no-ri-no-rdoc-default-for-gem-install).
Will need to try that out later.

[07:26] Doh, never going to undo my stupid muscle memory

$ rails puzzlenode
Usage:
  rails new APP_PATH [options]

[07:29] Hmm, rails comes with a .gitignore file now? That's handy. Though I'm
pretty sure I just accidentally checked in my config/database.yml. Not
a big deal, this is just a spike, right?

[07:30] Wow, now is not the time to be punished by the fact that I aliased mvim
to sl on my Gentoo box in an effort to stop typing mvim where it doesn't work.
That seemed like a good idea at the time, of course.

[07:32] Toot toot! Time to switch consoles, this is taking forever. Dear reader,
you *have* googled sl by now, right? :)

[07:39] Ran out of time, so just messed with the data models a bit in the
console to imagine their interactions. Will need to save a proper implementation
for later.

```ruby
>> Puzzle.create(:name => "Reference", :fingerprint =>
>> Digest::SHA1.hexdigest(File.read("#{RAILS_ROOT}/samples/reference.txt")))

=> #<Puzzle id: 1, name: "Reference", fingerprint:
"a59eb2c51e07e2b7369baef8a0c3cb3b5d7ed3d9", created_at: "2011-01-28
12:37:41", updated_at: "2011-01-28 12:37:41">

>> Submission.create(:puzzle_id => 1, :correct => false)

=> #<Submission id: 1, puzzle_id: 1, correct: false, 
     created_at: "2011-01-28 12:38:41", updated_at: "2011-01-28 12:38:41">
>> Submission.create(:puzzle_id => 1, :correct => false)

=> #<Submission id: 2, puzzle_id: 1, correct: false, 
   created_at: "2011-01-28 12:38:42", updated_at: "2011-01-28 12:38:42">

>> Submission.create(:puzzle_id => 1, :correct => true)

=> #<Submission id: 3, puzzle_id: 1, correct: true, 
   created_at: "2011-01-28 12:38:45", updated_at: "2011-01-28 12:38:45">

>> Puzzle.find(1).submissions.where(:correct => true).count
=> 1
>> Puzzle.find(1).submissions.where(:correct => false).count
=> 2
```

Hah, at least my associations seem to be working correctly. I can has rails!

### Step 5: Reflect on your progress

This exercise went more or less as I expected it to, with a couple surprises
here and there. One thing that didn't dawn on me until I reached stage 3 is that
I don't necessarily need to worry about file attachments in this application.
While certain features such as having the ability to review user submissions or
display the reference output would require it, a simple alpha product could be
shipped without those features and still be quite usable.

The exercise hopefully also reflects a bit of realism, as I didn't rehearse it
ahead of time and ran into some stupid things that slowed me down, which is what
might happen to anyone. That's really okay, because in the process, I learned
some things worth looking into later on.

Now that I'm an hour into this project, my instructions call for me to reflect
on what scares me about it. I actually have a lot of general fears, but in order
to explain them I'd need to give a lot of context about the project and those
ideas are still fuzzy even in my own mind. That having been said, there is a 
concern that I can share which this small spike keeps reminding me of.

### What about this project scares me?

I'm not sure that I like fingerprinting as a method for
determining the validity of a solution. It scares me to think that if a problem
called for you to generate some XML, alterations to whitespace could
result in an otherwise perfectly valid submission getting rejected.

The way that IPSC and Project Euler solve this problem is by restricting
the submission format. In the case of IPSC this consists of bits of numbers or
text separated by newlines, and for Project Euler the solutions are always to
compute a simple number. I could adopt this strategy, but it makes me
worried that it'll limit the kinds of problems I can run at PuzzleNode.

I want to avoid making the problems at PuzzleNode too academic in
nature, with a focus more on practical problem solving and creative thinking.
Both Project Euler and IPSC do a good job of this within a subset of their
problems, but most of them are algorithmic. I wonder if that's due to the 
input constraints, and if it is, that would be bad for MU.

One possibility is that rather than doing a bitwise matching via a fingerprint,
I could force users to provide JSON data which I could then process and compare
based on the object structure. This would allow for much greater flexibility in
the way I validate submissions, and eliminate the failure-by-formatting issue I
pointed out before, but it'd both increase the overhead of submitting a solution
and make the backend functionality a good deal more complex.

I think that what I need to do is draft up a few puzzles and see how much the
current fingerprinting validation restrictions get in the way. I may be worrying
about nothing, but the only way to tell is to produce some content and see where
that brings me.

### Step 6: Rinse and Repeat

My next step is to actually flesh out the Rails backend, since I didn't get that
far with it. I'm glad to have found that I can defer file uploads until a bit
later, this is something I don't think I would have realized if I started
directly by jumping into the Rails boilerplate.

Once I have a minimal system functioning, my next step will be to come up with
a few more problems to test it against. I already have one idea in mind;
generating more should be easy.

With my next step planned, I feel confident that this project will keep 
moving forward.

### Closing Thoughts

This is a real outline of how I practice. At first, when I wrote up the set of
instructions, I thought formalizing it would make it feel artificial to me. But
honestly, once I got rolling on the spike, things happened pretty much the way
they always would, and the comments I left were just the thoughts that came up
in my mind as I went along. In that sense, it didn't feel like practice.

You'll notice that I start with what I know and work outwards from there. I
rarely try to think too hard about what I need to know ahead of time, because I
find it causes me to study the wrong things at the wrong time. A more formal
approach might have lead me to study paperclip up front, because this process
involves file uploads. But the 20-30 minutes that might have costed me we found
through experimentation is something that I can put off for several weeks
without it affecting my progress.

I tend not to plug into the firehose of information coming from books, blogs,
and reddit/HN for the same reason. Soaking up that material is begging to find a
solution in search of a problem, rather than the other way around. It's always
easy to ask for a recommendation at the time you actually need something, and
Google is pretty good at digging up well read blog posts or articles about
whatever tool you might need, and so I put off studying until it is necessary.

I don't do a whole lot of code katas, or little practice exercises that I can't
actually use for something. I will certainly do those things for entertainment,
but I don't schedule 'practice time' in my day to day life and honestly, I never
have. There is no shortage of necessary learning that takes place when chasing
practical goals, and the reward is much greater than just having an abstract
feeling of learning a bit more, you end up with something you can use.

The more I can make my life my practice, the less I need to be disciplined about
making time for formal academic exercises. I admit that there are
a lot of things about my lifestyle and circumstances that make me especially
blessed, but I wasn't always in a fortunate position and would give 
similar advice even when I was struggling to make ends meet.

So in closing, it may be true that the way to Carnegie Hall is via the
"Practice! Practice! Practice!" path, but in my mind, that means less time
playing with yourself in the comfort of your own home, and more time on small
stages until they lead you to a slightly bigger stage which you can then occupy
until it too, becomes too small.

This is how I practice. I hope hearing about it has been useful to you.

<b>UPDATE 2011.09.09</b>: <i> The [PuzzleNode website](http://puzzlenode.com)
was successfully launched on time, and has been used to conduct three entrance
exams for Mendicant University already. The puzzles there are language agnostic,
and may be fun to try out even if you aren't planning to apply to MU. But I'd be
just as happy to hear that you're too busy working on real projects that you
care a lot about instead.</i>
 
> **NOTE:** This article has also been published on the Ruby Best Practices blog. 
There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/054-issue-22-how-to-practice.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/023-solid-design.md
================================================
SOLID is a collection of five object oriented design principles that go nicely
together. Here's a super brief summary pulled from [the wikipedia
page](http://en.wikipedia.org/wiki/SOLID) on the topic:

* Single responsibility principle: an object should have only a single
responsibility.

* Open/closed principle: an object should be open for extension, but closed for
modification.

* Liskov substitution principle: objects in a program should be replaceable with
instances of their subtypes without altering the correctness of that program.

* Interface segregation principle: many client specific interfaces are better
than one general purpose interface.

* Dependency inversion principle: depend upon abstractions, do not depend upon
concretions

The term SOLID was coined by Uncle Bob Martin to group together these important
concepts. I had heard of each of the design principles SOLID covers over the
years, but didn't really think much of them until I attended a great talk by
Sandi Metz at GoRuCo 2009. Fortunately, Confreaks recorded [Sandi's
talk](http://confreaks.net/videos/240-goruco2009-solid-object-oriented-design)
so I won't need to try to summarize it here.

I'd strongly recommend watching that video before moving on, because it will go
through SOLID in a lot more detail than what I plan to do in this article. You
might also watch [another video on the same topic by Jim
Weirich](http://confreaks.net/videos/185-rubyconf2009-solid-ruby), which like
pretty much any other talk Jim has done, is likely to blow your mind.

Rather than giving a tutorial on these principles, I'm going to trust you to
either read up on them or watch the videos I've linked to. This way, we can
focus on what I think is a much more interesting problem: How to apply these
ideas to real code.

### Single responsibility principle

The idea that an object should have only a single responsibility shouldn't come
as a surprise. This concept is one of the selling points for object oriented
languages that sets them apart from the more procedural systems that preceded
them. The hard part about putting this idea into practice is figuring out just
how wide to cast the 'single responsibility' net.

In my experience, most objects are born with just one goal in mind, and so
adhere to this principle at least superficially in the early stages of
greenfield projects. It's later when systems get more complex that our objects
lose their identity. To demonstrate this phenomena, we can look at the life
cycle of the Document object from my PDF generation library Prawn.

Back in early 2008, when the project was just beginning, my idea was that the
job of the Document class would be to wrap the low level concept of a Document
at the PDF layer, with a few extra convenience functions at the high level. For
a sketch of what that looked like at the time, we can take a look at the
object's public methods.

```
Directly implemented on Prawn::Document
  start_new_page, page_count, page_size, page_layout, render, render_file

Mixed in via Prawn::Document::Graphics
  line_width=, line, line_to, curve_to, curve, circle_at, ellipse_at,
  polygon, rectangle, stroke, fill, fill_color, stroke_color

Mixed in via Prawn::Document::PageGeometry
  page_dimensions
```

This is so early in Prawn's history that it didn't even have text support yet.
While the API wasn't perfectly well factored at this point in time, the fact
that almost all the above methods directly produced PDF instructions or
manipulated low level structures made me feel that it was a reasonably cohesive
set of features.

Fast forward by a year, and we end up with feature explosion on `Document`.
Here's what shipped in Prawn 0.4.1:

```
Directly implemented on Prawn::Document
  start_new_page, page_count, cursor, render, render_file, bounds,
  bounds=, move_up, move_down, pad_top, pad_bottom, pad, mask,
  compression_enabled?, y, margin_box, margins, page_size,
  page_layout, font_size

Included via Prawn::Document::Text
  text, text_options, height_of (via Prawn::Document::Text::Wrapping),
  naive_wrap (via Prawn::Document::Text::Wrapping)

Included via Prawn::Document::PageGeometry
  page_dimensions

Included via Prawn::Document::Internals
  ref, add_content, proc_set, page_resources, page_fonts, 
  page_xobjects, names

Included via Prawn::Document::Annotations
  annotate, text_annotation, link_annotation

Included via Prawn::Document::Destinations
  dests, add_dest, dest_xyz, dest_fit,  dest_fit_horizontally, 
  dest_fit_vertically, dest_fit_rect, dest_fit_bounds,
  dest_fit_bounds_horizontally, dest_fit_bounds_vertically

Included via Prawn::Graphics
  move_to, line_to, curve_to, rectangle, line_width=, line_width,
  line, horizontal_line, horizontal_rule, vertical_line, curve,
  circle_at, ellipse_at, polygon, stroke, stroke_bounds, fill,
  fill_and_stroke, fill_color (via Prawn::Document::Color), 
  stroke_color (via Prawn::Document::Color)

Included via Prawn::Images
  image
```

The above list of methods is almost embarrassingly scattershot, but it was due
to an oversight. The mistake I made was thinking that splitting different
aspects of functionality into modules was a valid way of respecting the single
responsibility principle. But this is deeply flawed thinking, because the end
result of pulling in roughly 50 methods into a single object by mixing in 8
modules results in a single object, `Prawn::Document` having 60+ public methods
all sharing the same state and namespace. Any illusion of a physical separation
of concerns is all smoke and mirrors here.

Once an object gets this fat, thinking about the cohesiveness of the interface
is the most minor detail to be worried about. I've focused on the 60 public
methods here, but if we count private methods, they would easily exceed 100.
Sometimes folks think that private methods in mixins don't actually get mixed
into the base object, but that's an incorrect assumption, making this problem
much, much worse.

Having close to two hundred methods living in one space causes you to run into
really basic, fundamental problems such as namespace clashes on method names and
variables. It also makes data corruption downright easy, because it's hard to
keep track of how a couple hundred methods manipulate a common dataset. Once you
reach this point, you're back in procedural coding land where all manners of bad
things can happen.

Now that I've sufficiently kicked my own ass, I can tell you the solution to
this problem is simple, if not easy to refactor towards once you've already made
the mess: you just introduce more objects. To do so, we need to identify the
different concerns and group them together, putting abstraction barriers between
their implementations and the behaviors they provide.

An easy realization to make is that over time, Prawn's `Document` became two
different things at the conceptual level. When we see methods like
`page_xobjects`, `ref`, and `proc_set`, we know that there are some low level
tools in use here. But what about methods like move_up, move_down, text, image,
and others like them? These are clearly meant for something that resembles a
domain specific language, and Prawn does look gorgeous at the high level, just
see the simple example below to see what I mean.

```ruby
Prawn::Document.generate('hello.pdf') do 
  text "Hello Prawn!"
end 
```

With 20/20 hindset, the solution to this problem is obvious: Produce a whole
layer of low level tooling that closely follows the PDF spec, creating objects
for managing things like a PDF-level page, the rendering of raw PDF strings,
etc. Make as many objects as necessary to do that, and then maybe provide a
facade that makes interacting with them a bit easier.

Then, for the higher level features, do the same thing. Have an object who's job
is to provide nice looking methods that rely on Prawn's lower level
objects to do the dirty work. Dedicate whole objects or even clusters of objects
to text, images, graphics, and any other cluster of functionality that
previously was mixed into Document directly. The objects might require a bit
more wiring, but the facade can hide that by doing things like the pseudo-code
below.

```ruby
def text(contents, options={})
  text_element = Prawn::TextElement.new(contents, options)
  text_element.render_on(current_page)
end
```

Naming the benefits of this over the previous design would take a long time, but
we've at least cut out those pesky namespace and data corruption concerns
while providing a cohesive API.

While I don't think that the scale of our design problem in Prawn is comparable
to what most Ruby hackers are likely to experience in their day to day work, it
does show just how bad things can get when you start dealing with very complex
systems. Prawn has improved a lot since its 0.4.1 release, but undoing the
damage that was done by neglecting this for so long has been a slow and painful
process for us.

The real lesson here is that you can't respect SRP without real abstraction
barriers. SRP is about more than just creating a cohesive API, you actually need
to create a physical separation of concerns at the implementation level of your
system.

Since it's very likely that you're experiencing this sort of issue on a smaller
scale in the projects you're working on, keeping the story about what happened
to me in Prawn in mind may help you learn from my mistakes instead of your own.

### Open/closed principle

The open/closed principle tells us that an object should be open for extension,
but closed for modification. This can mean a lot of different things, but the
basic idea is that when you introduce a new behavior to an existing system,
rather than modifying old objects you should create new objects which inherit
from or delegate to the target object you wish to extend. The theoretical payoff
is that taking this approach improves the stability of your application by
preventing existing objects from changing frequently, which also makes
dependency chains a bit less fragile because there are less moving parts to
worry about.

Personally, I feel that treating this principle as an absolute law would lead to
the creation of a lot of unnecessary wrapper objects that could make your
application harder to understand and maintain, so much that it might outweigh
the stability benefits you'd gain. But that doesn't mean these ideas don't have
their value, in fact, they provide an excellent alternative to extensive
monkeypatching of third party code.

To illustrate this, I'd like to talk about 
[i18n_alchemy](https://github.com/carlosantoniodasilva/i18n_alchemy), a project
by Carlos Antonio da Silva that was built as a student project for his Mendicant
University core course. The goal of this project was to make it easy to add
localizations for numeric, time, and date values in ActiveRecord.

Early on in the course, Carlos came to me with an implementation that
more-or-less followed the standard operating procedure for developing Rails
plugins. While Carlos shouldn't be faulted for following community trends here,
the weapon of choice was a shotgun blast into an `ActiveRecord::Base` object's
namespace, via a mixin which could be used on a per-model level. By including
this module, you would end up with behavior that looked a bit like this:

```ruby
some_model = SomeModel.where(something)
some_model.a_number     #=> <a localized value>
some_model.a_number_raw #=> <the original numeric value>
```

Now, there are pros and cons to this approach, but I felt pretty sure that we
could do better, and through conversations with Carlos, we settled on a much
better design that didn't make such far reaching changes to the model objects.
Before I explain how it works, I'd like to show an example of how i18n_alchemy
works now:

```ruby  
some_model = SomeModel.where(something)
some_model.a_number     #=> <the original numeric value>

localized_model = some_model.localized
localized_model.a_number #=> <a localized value>
```

In this new implementation, you do have to explicitly ask for a localized
object, but that small change gains us a lot. The module that gives us
`SomeModel#localized` only introduces that one method, rather than a hook that
gets run for every `ActiveRecord::Base` method. That means that
ordinary calls to models extended by i18n_alchemy still work as they always did.

Our localized model act differently, but it's actually not an instance of
SomeModel at all. Instead, it is a simple proxy object that defines special
accessors for the methods that i18n_localized, delegating everything else to the
target model instance.

This makes it possible for the consumer to choose when it'd be best to work with
the localized object, and when it'd be best to work with the model directly.
Unlike the first implementation which breaks the ordinary expected
behavior of an ActiveRecord model, this approach creates a new entity which can
have new behaviors while reusing old functionality.

We were both pretty proud of the results here, because it gives some of the
convenient feel of mixing in some new functionality into an existing Ruby object
without the many downsides. This of course is only a single example of how you
can use OCP in your own code, but I think it's a particularly good one.

### Liskov substitution principle

The idea behind Liskov substitution is that functions that are designed operate
on a given type of object should work without modification when they operate on
objects that belong to a subtype of the original type. In many object oriented
languages, the type of an object is closely tied to its class, and so in those
languages, this principle mostly describes a rule about a relationship between a
subclass and a superclass. In Ruby, this concept is a bit more fluid, and
probably requires a bit more explanation up front.

When we talk about the type of an object in Ruby, we're concerned with
what messages that object responds to rather than what class that object is an
instance of. This seems like a subtle difference, but it has a profound
impact on how we think about thing. In Ruby, type checking can range from very
strict to none at all, as shown by the examples below.

```ruby
  ## Different ways of type checking, from most to least coarse ##

  # verify the class of an object matches a specific class
  object.class == Array

  # verify object's class descends from a specific class
  object.kind_of?(Array)   

  # verify a specific module is mixed into this object
  object.kind_of?(Enumerable)
  
  # verify object claims to understand the specified message
  object.respond_to?(:sort)   

  # don't verify, trust object to either behave or raise an error
  object.sort                 
```

Regardless of the level of granularity of the definition, objects that are meant
to be treated as subtypes of a base type should not break the contracts of the
base type. This is a very hard standard to live up to when dealing with ordinary
class inheritance or module mixins, since you basically need to know the
behavior specifications for everything in the ancestry chain, and so the rule of
thumb is basically not to inherit from anything or mix in a module unless you're
fairly certain that the behavior you're implementing will not interfere with the
internal operations of your ancestors.

To demonstrate a bit of a weird LSP issue, let's think about what happens when
you subclass an `ActiveRecord::Base` object. Technically speaking, if we give
ourselves a pass for breaking signature of methods provided by Object, we'd
still need to keep track of all the behaviors `ActiveRecord::Base` provides, and
take care not to violate them. Here's a brief list of method names, but keep in
mind we'd also need to match signatures and return values.

```ruby
>> ActiveRecord::Base.instance_methods(false).sort
=> ["==", "[]", "[]=", "attribute_for_inspect", "attribute_names",
"attribute_present?", "attribute_types_cached_by_default", "attributes",
"attributes=", "attributes_before_type_cast", "becomes", "cache_key",
"clone", "colorize_logging", "column_for_attribute", "configurations",
"connection", "connection_handler", "decrement", "decrement!",
"default_scoping", "default_timezone", "delete", "destroy",
"destroy_without_callbacks", "destroy_without_transactions",
"destroyed?", "eql?", "freeze", "frozen?", "has_attribute?", "hash",
"id", "id=", "id_before_type_cast", "include_root_in_json", "increment",
"increment!", "inspect", "lock_optimistically", "logger",
"nested_attributes_options", "new_record?", "partial_updates",
"partial_updates?", "pluralize_table_names", "primary_key_prefix_type",
"quoted_id", "readonly!", "readonly?", "record_timestamps", "reload",
"reload_without_autosave_associations", "reload_without_dirty", "save",
"save!", "save_without_dirty", "save_without_dirty!",
"save_without_transactions", "save_without_transactions!",
"save_without_validation", "save_without_validation!", "schema_format",
"skip_time_zone_conversion_for_attributes", "store_full_sti_class",
"store_full_sti_class?", "table_name_prefix", "table_name_suffix",
"time_zone_aware_attributes", "timestamped_migrations", "to_param",
"toggle", "toggle!", "update_attribute", "update_attributes",
"update_attributes!", "valid?", "valid_without_callbacks?",
"write_attribute", "write_attribute_without_dirty"]
```

Hopefully your impression after reading this list is that LSP is basically
impossible to be a purist about, but let's try to come up with a plausible
violation that isn't some obscure edge case. For example, what happens if we're
building a database model for describing a linux system configuration, which has
a field called logger in it? You can certainly at least get away with the
migration for it without Rails complaining, using something like the code shown
below.

```ruby
class CreateLinuxConfigs < ActiveRecord::Migration
  def self.up
    create_table :linux_configs do |t|
      t.text :logger
      t.timestamps
    end
  end

  def self.down
    drop_table :linux_configs
  end
end
```

The standard behavior of `ActiveRecord`'s models is to provide dynamic accessors
to a record's database fields, which means we should expect the following
behavior:

```ruby
config        = LinuxConfig.new
config.logger = "syslog-ng"
config.logger #=> "syslog-ng"
```

But because `ActiveRecord::Base` also implements a method called `logger`, and the
dynamic attribute lookup is just a method_missing hack, we end up with a
different behavior:

```ruby
config        = LinuxConfig.new
config.logger = "syslog-ng" 
config.logger #=> #<ActiveSupport::BufferedLogger:0x00000000b6de38 
              #     @level=0, @buffer={}, @auto_flushing=1, 
              #     @guard=#<Mutex:0x00000000b6dde8>,
              #     @log=#<File:/home/x/demo/log/development.log>>
```

If you've been following closely, you probably saw this coming from a mile away,
even if you couldn't predict the exact behavior. It's worth mentioning that even
Rails knows that this sort of setup will lead to bad things, but their checks
which raise an error when they spot this LSP violation apparently aren't
comprehensive. But to be fair, if we try to set this at the time our record was
initialized, or if we try to use write_attribute, we get a pretty decent error
message.

```ruby
>> config = LinuxConfig.new(:logger => "syslog-ng")
ActiveRecord::DangerousAttributeError: logger is defined by ActiveRecord
```

```ruby
>> config = LinuxConfig.new
=> #<LinuxConfig id: nil, logger: nil, created_at: nil, updated_at: nil>
>> config.write_attribute(:logger, "syslog-ng")
ActiveRecord::DangerousAttributeError: logger is defined by ActiveRecord
```

This sort of proactive error checking is actually more than we should expect
from most parent classes, `ActiveRecord::Base` just takes special consideration
because it is so widely used. You can't expect every object you might subclass
to even try to catch these sorts of violations, and it's not a great idea to
introduce this sort of logic into your own base classes without carefully
considering the context. Of course, that doesn't mean that there aren't measures
you can take to avoid LSP violations in code that you design yourself.

I don't want to go into too much detail here, but there are two techniques I
like to use for mitigating LSP issues. The first one is object composition, and
the second is defining per-object behavior. Just as an experiment, I've thrown
together a rethinking of how `ActiveRecord` could handle dynamic accessors in a
slightly more robust way.

```ruby
require "delegate"

module DynamicFinderProxy

  extend self

  def build_proxy(record)
    proxy = SimpleDelegator.new(record)
    record.attribute_names.each do |a|
      proxy.singleton_class.instance_eval do
        define_method(a) { read_attribute(a) }
        define_method("#{a}=") { |v| write_attribute(a,v) }
      end
    end

    proxy
  end

end

class FakeActiveRecord

  class << self
    def new
      obj = allocate
      obj.send(:initialize)
      DynamicFinderProxy.build_proxy(obj)
    end

    def column_names(*names)
      @column_names = names unless names.empty?
      @column_names
    end
  end

  def attribute_names
    self.class.column_names
  end

  def read_attribute(a)
    logger.puts("Reading #{a}")
    instance_variable_get("@#{a}")
  end

  def write_attribute(a,v)
    logger.puts("Writing #{a}")
    instance_variable_set("@#{a}",v)
  end

  def logger
    STDOUT
  end
end

class LinuxConfig < FakeActiveRecord
  column_names "logger", "crontab"
end

record = LinuxConfig.new
record.logger = "syslog-ng"
p record.logger
```

Now, I'll admit that there is some deep voodoo in this code, but it at least
indicates to me that we should be thinking differently about our options in
Ruby. We have more than just vanilla inheritance to play with, and even ordinary
mixins have their limitations, so maybe we need a whole new set of design
principles that take Ruby's deeply dynamic nature into account? Or perhaps I've
just passed the midway point in a very long article and have decided to go off
on a little tangent to keep myself entertained. I'll let you be the judge.

### Interface segregation principle

I've seen a couple different interpretations of the interface segregation
principle, with the most narrow ones almost directly outlining the use case for
Java-style interfaces, which is to prevent code from specifying that an object
must be a specific type when all that is required is a certain set of methods to
have a meaningful implementation.

Ruby offers a lot of flexibility and its dynamic typing makes a lot of interface
segregation principle violations just go away on their own. That having been
said, we still see a lot of `is_a?()` and `respond_to?()` checks which are both
a form of LSP violation.

To protect against those violations, the best bet is to embrace duck typing as
much as possible. Since this article is already super long and we've already
covered duck typing extensively in issues
[#14](http://practicingruby.com/articles/43) and
[#15](http://practicingruby.com/articles/44) of Practicing Ruby, It would be
sufficient to simply re-read those articles if you need a refresher and then
promptly move on to the next principle. But in case you want to dig deeper, here
are a couple more articles related to this topic that you should definitely read
if you haven't seen them before. All three are about how to get around
explicitly naming classes in case statements, which is a form of LSP violation.

* [Ruby case statements and kind_of?(Sandi Metz)](http://sandimetz.com/2009/06/ruby-case-statements-and-kindof.html)

* [The Double Dispatch Dance (Aaron Patterson)](http://blog.rubybestpractices.com/posts/aaronp/001_double_dispatch_dance.html)

* [The Decorator Delegator Disco (Gregory Brown)](http://blog.rubybestpractices.com/posts/gregory/008-decorator-delegator-disco.html)

That should add an extra hour or so of homework for you. This is getting a bit
crazy though, so let's hit that last principle and call it a day.

### Dependency inversion principle

You probably already know about the values of dependency inversion (aka
dependency injection) if you've been working in Ruby for a while now. You also
probably know that unlike some other languages, there really isn't a need for DI
frameworks because it implements all the necessary tools for good DI at the
language level. But in case you didn't get the memo, I'll go through a quick
example of how dependency inversion can come in handy.

Suppose we have a simple object, like a `Roster`, which keeps track of a list of
people, and we have a `RosterPrinter` which creates formatted output from that
list. Then we might end up with some code similar to what is shown below.

```ruby
class Roster
  def initialize        
    @participants = []
  end

  def <<(new_participant)
    @participants << new_participant
  end

  def participant_names
    @participants.map { |e| e.full_name }
  end

  def to_s
    RosterPrinter.new(participant_names).to_s
  end
end

class RosterPrinter
  def initialize(participant_names)
    @participant_names = participant_names
  end

  def to_s
    "Participants:\n" +
    @participant_names.map { |e| "* #{e}" }.join("\n")
  end
end
```

The nice thing about this code is that it separates the presentation of a roster
from its data representation, bringing it in line with the single
responsibility principle. But the problem with it is that `Roster` and
`RosterPrinter` are needlessly coupled, which limits the value of
separating the objects in the first place. Modifying `Roster#to_s()` can
solve this problem.

```ruby
class Roster
  # other methods same as before

  def to_s(printer=RosterPrinter)
    printer.new(participant_names).to_s
  end
end

# usage
roster.to_s 
```

This new code is functionally equivalent to our previous example when called
with no arguments, but opens a whole host of new opportunities. For example, we
can trivially swap in any printer object we'd like now.

```ruby  
class HTMLRosterPrinter
  def initialize(participant_names)
    @participant_names = participant_names
  end

  def to_s
    "<h3>Participants</h3><ul>"+
    @participant_names.map { |e| "<li>#{e}</li>" } +
    "</ul>
  end
end

# usage
roster.to_s(HTMLRosterPrinter)
```

By injecting the printer object into `Roster`, we avoid resorting to
something as uncouth as creating a `Roster` subclass for the sole purpose of
wiring up the `HTMLRosterPrinter`.

Of course, the most common place that talk about dependency inversion comes up
is when folks are thinking about automated testing. While Ruby makes it possible
to mock out calls to pretty much any object, it's a whole lot cleaner to pass in
raw mock objects than it is to set expectations on real objects.

Dependency inversion can really come in handy, but it's important to provide
sensible defaults so that you don't end up forcing consumers of your API to do a
lot of tedious wiring. The trick is to make it so you can swap out
implementations easily, it's not as important for your code to have no opinion
about which implementation it should use. Folks sometimes forget this and as a
result their code gets quite annoying to work with. However, Ruby makes it easy
to provide defaults, so there is no real reason why this issue can't be averted.

### Reflections

This article is much longer than I expected it would be, but I feel like I've
just scratched the surface. An interesting thing about the SOLID principles is
that they all sort of play into each other, so you tend to get the most out of
them by looking at all five concepts at once rather than each one in isolation.

One thing I want to emphasize is that when I make use of SOLID or any other set
of design principles, I use them as a metric rather than a set of
constructive rules. I don't typically set out designing a system with all of
these different guidelines in mind, as that would give me a claustrophobic
feeling. However, when the time comes to sanity check a new design or make
incremental improvements to an old one during a refactoring session, SOLID
provides a good checklist for pinpointing areas of my code that might deserve
some rethinking.

Sometimes you break these rules by accident, and that's okay. Sometimes you
break them because you are making a conscious trade to avoid some other bad
thing from happening, and that's okay too. As long as you're regularly checking
your assumptions about things and actually caring about the overall design of
your system, you shouldn't feel guilty for not following these guidelines
perfectly. In fact, it is more dangerous to blindly follow design principles to
the letter than it is to completely ignore them.

We have much, much more design discussion to come, so hopefully you enjoyed this
article. :)

> **NOTE:** This article has also been published on the Ruby Best Practices blog. There 
[may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/055-issue-23-solid-design.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/024-connascence.md
================================================
My article on the [SOLID design
principles](http://practicingruby.com/articles/52) was inspired by a great talk
I saw by Sandi Metz at GoRuCo 2009. Coincidentally, this  article is inspired by
another great talk I saw in 2009, called [The Building Blocks of
Modularity](http://confreaks.net/videos/77-mwrc2009-the-building-blocks-of-modularity).
This talk was given by Jim Weirich at MWRC, and if you haven't seen it yet, I
urge you to stop what you're doing and watch it right now.

In the talk, Jim jokingly claims he's presenting on the "Grand Unified Theory of
Software Development". Personally, I think that isn't too far off the mark,
because connascence is a fundamentally simple concept when compared to things
like the SOLID principles or any of other design concepts we'll be studying in
this series.

### Brief introduction to connascence for the uninitiated

Since I didn't know the concept of connascence even existed before seeing Jim's
talk, and because it's not a super common discussion topic even among design
geeks, we should at least steal some [content from
Wikipedia](http://en.wikipedia.org/wiki/Connascent_software_components) to frame
our discussion around:

><i>"Two software components are connascent if a change in one would require the other to be modified in order to maintain the overall correctness of the system. Connascence is a way to characterize and reason about certain types of complexity in software systems."</i>

If you haven't watched Jim's talk yet, I'll remind you to go ahead and do that
now. But if some reason you can't or won't, you should know that the kinds of
complexity that connascence can be used to reason about all have
to do with coupling. The relationship between the concept of connascence to the
concept of coupling becomes a little more clear when you look at the various
kinds of connascence that can be found in software systems. Below I've listed
out the various kinds of connascence in order from weakest to strongest:

* <b>Name:</b> when multiple components must agree on the name of an entity.
* <b>Type:</b> when multiple components must agree on the type of an entity.
* <b>Meaning:</b> when multiple components must agree on the meaning of specific values.
* <b>Position:</b> when multiple components must agree on the order of values.
* <b>Algorithm:</b> when multiple components must agree on a particular algorithm.
* <b>Execution (order):</b> when the order of execution of multiple components is important.
* <b>Timing:</b> when the timing of the execution of multiple components is important.
* <b>Identity:</b> when multiple components must reference the entity. 

Knowing the various kinds of connascence gives us a metric for determining the characteristics and severity of the coupling in our systems. The idea is simple: The more remote the connection between two clusters of code, the weaker the connascence between them should be.

Good design principles encourages us to move from tight coupling to looser
coupling where possible. But connascence allows us to be much more specific
about what kinds of problems we're dealing with, which makes it easier to reason
about the types of refactorings that can be used to weaken the connascence
between components.

In this article, I will show how you can convert instance of Type, Meaning, 
Position, and Algorithm-based
connascence down to Connascence of Name. While all forms of connascence are
worth studying, these ones are the most likely to appear in your daily work.

### Connascence of Name

Name based coupling exists when a name change in one place requires a code
change in other places. Being the weakest form of connascence, it's also by far
the most common. Every module, class, method and variable we create introduces
connascence of name, assuming it is actually used for something. As an example,
consider the following code:

```ruby
mailer = Mailer.new
mailer.deliver(:to      => "gregory.t.brown@gmail.com", 
               :from    => "fake@fake.com",
               :subject => "You have won a lifetime supply of...",
               :body    => "Dear Sir, I am pleased to inform...")
```

In just this script, we see an incredible amount of name based coupling. Any of
the following trivial changes to Mailer would cause all code that depends on it
to break immediately.

* Wrapping the Mailer class definition in a namespace, e.g. `FancyUnicorn::Mailer`

* Renaming the `deliver()` method to `send_message()`

* Renaming any of the keys in the hash passed to `deliver()`, i.e. changing
the `:to` key so that it reads `:recipient`

But the fact is, there isn't really any way around this sort of coupling in most
scenarios, and it's not necessarily a sign of a problem. That having been said,
the reason why naming things is so important in computer science is because even
loosely coupled, highly cohesive systems have copious amounts of name based
coupling, which have widespread effects that only increase as systems get more
complex.

Sometimes, it is possible to eliminate Connascence of Name and the the coupling
that comes along with it. For example, consider this way of defining class
methods in Ruby:

```ruby
class Mailer
  def Mailer.configure(*args)
    #...
  end
end
```

There is a clear dependence in this code between the second line of code and the
first, in which if the first line changes, so too must the second line. We can
rewrite this to avoid that coupling, if we just take advantage of Ruby's `self`
keyword here:

```ruby
class Mailer
  def self.configure(*args)
    #...
  end
end
```

But while eliminating Connascense of Name is desireable when it is both possible
and convenient to do so, it's not always realistic. We need to accept that
because names don't change all that often, a little bit of CoN is often
harmless. In fact, when given the choice between CoN and other forms of
connascence, name based coupling is preferable. We will now take
a look at the other forms of connascence to see why that is the case.

### Connascence of Type

Folks like to think that Ruby is immune to typing issues, but that 
assumption is often far too optimistic. The following code is a 
direct example of Connascence of Type:

```ruby
def average(values)
   raise ArgumentError unless values.kind_of?(Array)

   values.inject(:+) / values.size.to_f
end
```

One might attempt to resolve the problem by moving away from strict class
checking and instead use a `respond_to?()` check:

```ruby
def average(values)
   unless values.respond_to?(:inject) && values.respond_to?(:size)
     raise ArgumentError 
   end

   values.inject(:+) / values.size.to_f
end
```

This certainly loosens the type coupling, but does not eliminate it. If we
accept the notion that the type of a Ruby object is defined by what that object
can do, `respond_to?()` is still a form of type check, done at the method level
instead of at the level of the class hierarchy. It can sometimes even result in
false negatives, because not all code which implements dynamic behavior through
`method_missing()` updates `respond_to?()` to add those methods. This can lead
to code similar to previous example to fail with certain kinds of proxy objects,
even though they implement all necessary behaviors.

To truly free ourselves from Connascence of Type, one option is to just remove
the guard clause and let Ruby bubble up with an exception for objects that don't
work as our code expects them to. But sometimes, we want to make sure our
debugging isn't harder than it needs to be. Here's an alternative that preserves
the error handling in a way that is free of type dependencies:

```ruby
def average(values)
   values.inject(:+) / values.size.to_f
rescue NoMethodError
   raise ArgumentError, "The average() function can only " +
                        "operate on collections which implement " +
                        "the inject() and size() methods"
end
```

If this feels a bit overkill, it's because it probably is. But the general idea
of removing the `kind_of?()` and `respond_to?()` checks is a good one, because
it puts us squarely back in the realm of Connascence of Name. Our dependency is
now simply that the values object has a pair of methods with the names
`inject()` and `size()`.

### Connascence of Meaning

In its most basic form, Connascence of Meaning is all about magic values. For
example, consider a legacy access control system in
which an admin is given the value 0, a manager 1, and an ordinary user 2. You
could end up writing code like this:

```ruby
if user.access_level == 0
  shoot_nukes_at_moon
else
  raise AccessDeniedError
end
```

The trouble is, once you've littered your system with hard-coded numeric values,
you will have a hard time remembering what they do, and will have a hard time
hunting them down when they need to be changed. To fix this issue, we can modify
our hypothetical `User` object:

```ruby
class User
  ACCESS_LEVELS = { 0 => :admin, 1 => :manager, 2 => :user }

  def admin?
    ACCESS_LEVELS[access_level] == :admin
  end
end
```

We try to avoid repeating Connascence of Meaning even in the more local context
of the `User` class by storing the actual role mappings in a constant. We then
provide a convenience method `User#admin?` to be used externally, resulting in
newly minted caller code that looks like this:

```ruby
if user.admin?
  shoot_nukes_at_moon
else
  raise AccessDeniedError
end
```

Now I don't know about you, but I think I'd be much less likely to accidentally
nuke the moon if the code were written this way. We haven't totally 
eliminated the `Connascence of Meaning`, but we've moved it to a hyper-local 
context within a single constant on the User model. Because all of the 
calling code is now just exhibiting Connascence of Name, this is a great 
refactoring.

### Connascence of Position

Connascence of Position is something that we see every day in Ruby because
method parameters are ordered. If we go to our mailer example, we could have
just as easily written the `Mailer#deliver()` method to use explicitly ordered
parameters, similar to what is shown in the example below.

```ruby
class Mailer
  def deliver(to, from, subject, message)
    # ...
  end
end
```

APIs like this annoy the heck out of me, because the calling code typically
doesn't give any hints at why the arguments are specified in a particular order.
Take a look at how opaque things get when we just try to reproduce our previous
example using this slightly different API:

```ruby
mailer.deliver("gregory.t.brown@gmail.com", 
               "fake@fake.com",
               "You have won a lifetime supply of...",
               "Dear Sir, I am pleased to inform...")
```

Looking at this code, it's very difficult to determine who is the sender and who
is the recipient, and even more difficult to think about how you might introduce
default values into the mix. Every change to the ordering or length of the list
of arguments can lead to broken code in remote places in your codebase. For all
of these reasons, Rubyists tend to prefer hash-based pseudo-keyword arguments
for all but the most straightforward method signatures.

However, introducing keyword arguments isn't the only way to reduce CoP in
method signatures to CoN. Another alternative that is perhaps underused is to
simply create objects that provide all the necessary attributes that you would
typically use a hash for. In this case, we can envision a simple `Message`
object being introduced:

```ruby
message = Message.new
message.to      = "gregory.t.brown@gmail.com"
message.from    = "fake@fake.com"
message.subject = "You have won a lifetime supply of..."
message.body    = "Dear Sir, I am pleazed to inform..."

mailer.deliver(message)
```

Assuming that the `Mailer#deliver` method just depends on the attribute readers
for those attributes, this is functionally equivalent to the hash based code but
offers a number of advantages. `Message` is now a reusable, independently
testable entity that can do things like validations internally. This moves some
of the error checking and simple transformation code that might be needed to use
a parameters hash into a more local setting. With a little creativity, it's
relatively easy to make the API look a little nicer by letting `Mailer#deliver`
create the message object for you.

```ruby
mailer.deliver do |message|
  message.to      = "gregory.t.brown@gmail.com"
  message.from    = "fake@fake.com"
  message.subject = "You have won a lifetime supply of..."
  message.body    = "Dear Sir, I am pleazed to inform..."
end
```

This sort of API is fairly common in Ruby as well, but probably not as common as
it should be. So next time you're faced with the CoP problem when dealing with
method arguments, consider fixing it by putting a nice shiny new object in
place.

It's worth noting that Connascence of Position is certainly not limited to
method arguments in Ruby. Anywhere in which a change in position of some data
requires you to change code elsewhere, you've got a CoP issue, and should think
about how to reduce it if possible.

### Connascence of Algorithm

Connascence of Algorithm often looks and smells a bit like the DRY principle.
But there are many cases in which code that violates DRY doesn't have a CoA
problem, and some rare cases where the opposite is true. The key thing about CoA
is the dependency between two or more clusters of code.

The following example is a CoA example from the wild, from a programming quiz
site that we're working on as part of Mendicant University's admission process.
First, you can see a fairly DRY model which is meant to compare uploaded
solutions to the actual answer for a given puzzle.

```ruby
class Puzzle < ActiveRecord::Base
  def file=(tempfile)
    write_attribute :fingerprint, sha1(tempfile)
  end

  def valid_solution?(tempfile)
    fingerprint == sha1(tempfile)
  end

  private

  def sha1(tempfile)
    Digest::SHA1.hexdigest(tempfile.read)
  end
end
```

Internally, this code is fairly free of CoA, particularly because the algorithm
for fingerprinting solutions has been extracted into the `Puzzle#sha1()` helper.
But because this is a private method, I ended up with tests that explicitly do
the hashing themselves to verify that the `Puzzle#file=()` method is working as
expected.

```ruby
test "must be able to create a fingerprint for a file" do
  tempfile = Tempfile.new("puzzle_sample")
  tempfile << "Sample Text"
  tempfile.rewind

  expected_fingerprint = Digest::SHA1.hexdigest(tempfile.read)
  tempfile.rewind

  puzzle = Puzzle.new(:file => tempfile)

  assert_equal expected_fingerprint, puzzle.fingerprint
end
```

This has an upside in that it sanity checks the exact behavior, ensuring that
the tempfile is actually hashed via SHA1. But since the focus of the test is
more on ensuring that a hash was generated rather than the way it was generated,
we might be able to improve this by extracting the fingerprinting code into its
own module.

```ruby
module Fingerprint
  extend self

  def [](stream)
    Digest::SHA1.hexdigest(stream.read)
  end
end
```

Then, I could rewrite the code and tests to look like they do below:

```ruby
class Puzzle < ActiveRecord::Base
  def file=(tempfile)
    write_attribute :fingerprint, Fingerprint[tempfile]
  end

  def valid_solution?(tempfile)
    fingerprint == Fingerprint[tempfile]
  end
end
```

```ruby
test "must be able to create a fingerprint for a file" do
  tempfile = Tempfile.new("puzzle_sample")
  tempfile << "Sample Text"
  tempfile.rewind

  expected_fingerprint = Fingerprint[tempfile]
  tempfile.rewind

  puzzle = Puzzle.new(:file => tempfile)

  assert_equal expected_fingerprint, puzzle.fingerprint
end
```

The end result would be that the algorithm for how I was generating digital
fingerprints for the solutions could change, and I would not need to update my
tests, as long as the names of everything stayed the same.

In this case, arguably just fully applying the DRY principle would lead us to
the same place, but the concept of connascence lets us think about the
consequences of DRY in a less abstract way. Like all the other types of
connascence, there is a lot more we could talk about here, but in the interest
of time, we'll skip the details for now.

### Reflections

While we dug deep into some heavy theory in [last week's SOLID
article](http://practicingruby.com/articles/52), I tried to keep the connascence
examples simple, practical, and common. But that is not to say that connascence
isn't every bit as deep a concept as SOLID, and your investigations should not
stop at the examples I've shown here.

In the two articles to follow this one, we'll be looking at particular patterns
and techniques that can help you design better code. Now that you're armed with
both the SOLID principles and the metrics of connascence, you have a solid basis
for thinking about these problems in more specific contexts. I encourage you to
re-read these first two articles as you continue on with this series, and get
back to me with any questions you might have.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. 
There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/056-issue-24-connascence.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/025-creational-design-patterns.md
================================================
In this issue and the next one, I look at several design patterns laid out by
the Gang of Four and explore their relevance to modern day Ruby programming. My
goal is not so much to teach the design patterns themselves, but instead give
practical examples using real code so that you can consider what their strengths
and weaknesses are.

In this article, I focus on the creational design patterns, all of which have to
do with instantiating new objects. They are listed below in no particular order:

  * Singleton
  * Multiton
  * Factory Method
  * Abstract Factory
  * Builder
  * Prototype  

An important thing to keep in mind is that while the original GoF book was
written with C++ and Smalltalk examples, none of the patterns were written with
Ruby's semantics in mind. This makes it necessary to use a little poetic license
to explore how these patterns apply in Ruby. That having been said, the examples
we'll look at attempt to preserve the spirit of the patterns they implement even
if they're not semantically identical.

### Singleton

<i>UPDATE 2011-12-20: Be sure to check out what [Practicing Ruby
2.8](http://practicingruby.com/articles/shared/jleygxejeopq) has to say about
implementing Singleton pattern in Ruby. My feelings on this have changed over
time</i>

The [Singleton pattern](http://en.wikipedia.org/wiki/Singleton_pattern) is used
in situations where a single instance of a class is all you need. Singleton
objects are meant provide an effective way of organizing global state and
behavior, such as configuration data, logging support, or other similar needs.
This pattern is common enough that Ruby provides a module in its standard
library that makes it easier to implement.

In the code below, I've implemented a simple logger using Ruby's singleton
library, which on the surface should look quite like an ordinary Ruby class
definition.

```ruby
require "singleton"

class SimpleLogger
  include Singleton

  def initialize
    @output = []
  end

  attr_reader :output

  def error(message)
    output << formatted_message(message, "ERROR")
  end

  def info(message)
    output << formatted_message(message, "INFO")
  end

  def write(filename)
    File.open(filename, "w") { |f| f << output.join("\n") }
  end

  private

  def formatted_message(message, message_type)
    "#{Time.now} | #{message_type}: #{message}"
  end

end
```

Nothing here looks out of the ordinary, but you can begin to see the impact of the Singleton mixin as soon as you try to create a SimpleLogger instance.

```ruby
>> logger = SimpleLogger.new
NoMethodError: private method `new' called for SimpleLogger:Class
  from (irb):2
```

Since the purpose of the singleton pattern is to allow for the creation of only a single instance, it makes sense that you might not be able to expect construction to work as normal. The example below demonstrates what you need to do instead.

```ruby
logger = SimpleLogger.instance

logger.error("Some serious problem")
logger.info("Something you might want to know")
logger.write("log.txt")
```

The class method `instance()` gets added by the Singleton mixin, and takes responsibility for initializing the SimpleLogger object. The first time this method is called, an instance of SimpleLogger is created, and initialize gets called as usual. Each subsequent call refers to that exact instance, preventing additional instances of this class from being created.

At this point, you might be wondering what the advantage of this approach is
over using ordinary class definitions and global variables, such as in the code
below:

```ruby
$LOGGER = SimpleLogger.new
$LOGGER.error("Some serious problem")
$LOGGER.info("Something you might want to know")
```

This is a reasonable question to ask, because this approach is just as straightforward and has similar strengths and weaknesses. If Ruby did not have an open class system, you could argue that it's less likely that you'd run into side effects with different definitions of SimpleLogger than you would with someone reassigning $LOGGER, but that clearly does not apply here. It does seem like it'd be marginally more likely to see a variable reassigned than a class stomped on, particularly if proper namespacing was used, but it's important to notice that in Ruby, this particular benefit of Singleton objects is marginal at best.

The real benefit that using the Singleton pattern provides in Ruby over its alternatives is that instantiation is lazy evaluated, and enforces the single instance limitation. The former provides a potential performance and memory bonus when the object never ends up getting used, and the latter helps prevent accidental object creation. Both of these things are nice to have, and it only takes a bit of extra effort make them happen.

The standard library implementation of the Singleton pattern is reasonable, and fairly clever. However, I feel it probably leans a bit too closely to a direct translation, and instead typically use a different technique in my own code. Below you can see how I might write SimpleLogger if left to my own devices.

```ruby
module SimpleLogger
  extend self

  def error(message)
    output << formatted_message(message, "ERROR")
  end

  def info(message)
    output << formatted_message(message, "INFO")
  end

  def write(filename)
    File.open(filename, "w") { |f| f << output.join("\n") }
  end

  private

  def formatted_message(message, message_type)
    "#{Time.now} | #{message_type}: #{message}"
  end

  def output
    @output ||= []
  end
end
```

This code preserves the lazy evaluation component while removing the concept of an instance entirely. The module itself becomes a singleton object, allowing you to call methods directly on it which affect the state stored on the module.

```ruby
SimpleLogger.error("Some serious problem")
SimpleLogger.info("Something you might want to know")
SimpleLogger.write("log.txt")
```

I like to use this approach because what I end up with is truly a single object, rather than a class who's job is to provide a single instance. I also like to be able to call my singleton methods directly on that object without having to retrieve the singular instance.

The downside of this code, and the reason why I showed both approaches instead of just my preferred one, is that it requires a much greater amount of Ruby knowledge to actually understand how it works. Additionally, through the loss of the `initialize()` hook, you need to either do something like create a setup method that you explicitly call before any other methods, or do lazy initialization of all data as I've done in the `output()` method. These idioms are fairly common, but again, take you a bit farther away from the look and feel of an 'ordinary class definition', which may be offputting to some.

No matter which approach you choose, it's worth keeping in mind that the Singleton pattern should be used in moderation. Due to Ruby's open class system, singleton objects are essentially nothing more than overengineered global variables. As with any form of global state, this makes them more difficult to test, easier to corrupt, and harder to isolate when it comes time to debug issues with them. However, when used sparingly and for the right reasons, they can be a good tool for managing global state and interactions.

### Multiton

The [Multiton pattern](http://en.wikipedia.org/wiki/Multiton) is an extension to the Singleton pattern which maps unique keys to specific instances of a class. The idea is that there should only be one instance of an object for each unique key in use, limiting the number of objects that need to be created. For a practical example, we can look at some code in the PDF generation library Prawn which implements this sort of behavior.

As a PDF generation library that operates at the very low level, we need to process and utilize font metrics information extensively. Because font files can become quite large, the processing cost of initializing one of our Font objects can be very computationally expensive. However, since this information is unique for each font that we use, there is no need to re-instantiate fonts once they have been initialized.

While the actual source code is more complex than what I'll show here, the basic idea of applying a multiton here is quite simple. We can start off by assuming that our `Font` class has an ordinary constructor which takes a font file and then processes it into a meaningful dataset. Initializing a font directly then looks something like this.

```ruby  
font = Font.new("/path/to/times.ttf")
```

On top of this basic interface, we layer a mapping mechanism that when used, looks something like the code below.

```ruby
Font.map("Times New Roman" => "/path/to/times.ttf")
Font["Times New Roman"] #=> A font instance
```

To bring all of the above together, we can take a look at a skeletal `Font` class which implements the Multiton pattern.

```ruby
class Font
  class << self
    def file_names
      @file_names ||= {}
    end

    def instances
      @instances ||= {}
    end

    def map(params)
      file_names.update(params)
    end

    def [](name)
      instances[name] ||= new(file_names[name])
    end
  end

  def initialize(filename)
    puts "processing #{filename}"
  end

  # details omitted
end
```

While the mapping is a bit complex here because it's a two stage process, the core idea of the Multiton still shines through. What we have are lazy evaluated Font instances which do not get created until the first time `Font[some_font_name]` is called. Each subsequent call will result in the original instance being returned rather than a new instance of Font being created.

This basic pattern can be used any time you have a scenario in which each unique key can be mapped to exactly one object. This sort of structure can be a super effective caching technique, but should also be used with caution as it too introduces global state and added complexity that should not be taken lightly.

### Factory Method

The [Factory Method pattern](http://en.wikipedia.org/wiki/Factory_method) is used for putting a layer of abstraction on top of object creation so that directly working with its constructor is no longer necessary. This process can lead to more expressive ways of building new objects, and can also allow for the creation of new objects without explicitly referencing their class.

In a Mendicant University course, one of our students (Carol Nichols) ran into a design issue that we were able to improve by introducing factory methods. She was building an `AdjacencyMatrix` datastructure for storing graph data, and her original API looked like this:

```ruby
class AdjacencyMatrix
  def initialize(data, directed=false)
    #...
  end
end

undirected_matrix = AdjacencyMatrix.new(data) 
directed_matrix   = AdjacencyMatrix.new(data, true)
```

Using boolean switches as arguments aren't especially expressive, and so the suggestion was quickly made to move towards a hash-based argument allowing for the following usage.

```ruby
undirected_matrix = AdjacencyMatrix.new(data) 
directed_matrix   = AdjacencyMatrix.new(data, :directed => true)
```

Generally speaking, this isn't a bad strategy, but the more we thought about it, the more we realized that there isn't really a need for a dynamic option here. Consumers would always know whether a graph they were trying to represent was directed or undirected at the time they constructed their object, and the default call without the `:directed` argument still wasn't very expressive about this detail. After some discussion, we settled on the following design, which introduces the Factory method pattern.

```ruby
class AdjacencyMatrix
  class << self
    def undirected(data)
      new(data)
    end

    def directed(data)
      new(data,true)
    end

    private :new
  end

  def initialize(data, directed=false)
    #...
  end
end

undirected_matrix = AdjacencyMatrix.undirected(data) 
directed_matrix   = AdjacencyMatrix.directed(data)
```

While this code does still have its original wart intact internally, the interface that consumers interact with has been greatly improved. By making the `new()` method private at the class level, we force users to call the factory methods, and it becomes immediately clear what type of graph is being processed each time a new `AdjacencyMatrix` is constructed.

This type of scenario comes up often, and the use of factory methods can be used
to simplify or at least hide the complexity of constructor calls by giving an
expressive name to a certain way of constructing a given object. However,
sometimes factories go even farther by completely decoupling the object creation
process from the class of the object being created. While I won't go into much
detail about this rare use case, we see this sort of factory very
often in test code, particularly when dealing with object mappers in web 
applications.

The following example from `factory_girl` demonstrates how code which does not
reference a particular class can be used to instantiate records 
via ActiveRecord:

```ruby
factory :user do
  first_name 'John'
  last_name  'Doe'
  admin false
end

FactoryGirl.build(:user) #=> an instance of the User model
```

I won't elaborate on how this works, but it should serve as food for thought,
and perhaps would be a good project to [dig into the
source](https://github.com/thoughtbot/factory_girl) so that you can get a sense
of how factory methods can simplify the object creation process while also making 
it more flexible.

### Abstract Factory

Due to the flexible nature of Ruby's type system, we don't need an actual language construct for abstract classes. With that in mind, when we note that an [Abstract Factory](http://en.wikipedia.org/wiki/Abstract_factory) is simply an abstract interface for concrete Factory objects to conform to, this pattern pretty much falls away. This is perhaps easier to show than to explain, so I will go ahead and build out a Ruby version of the example shown in the wikipedia article I've linked to above.

```ruby
module OSXGuiToolkit
  extend self

  def button
    Button.new
  end
  
  class Button
    def paint
      puts "I'm an OSX Button"
    end
  end
end

module WinGuiToolkit
  extend self
  
  def button
    Button.new
  end

  class Button
    def paint
      puts "I'm a WINDOWS button"
    end
  end
end

class Application
  def initialize(gui_toolkit)
    button = gui_toolkit.button
    button.paint
  end
end

# this check is a very quick hack, not reliable.
if PLATFORM =~ /darwin/
  Application.new(OSXGuiToolkit)
else
  Application.new(WinGuiToolkit)
end
```

In this example, you'll see that we've eliminated the explicit Abstract Factory interface. Instead, what we've done is created two concrete object factories, `OSXGuiToolkit` and `WinGuiToolkit`, that implement a common API. We then create a simple Application stub class which shows that the GUI toolkit factory should be injected into the Application. The reason for this is seen in the final bit of code which determines which toolkit to use based on the platform the code is running on.

The notion of using dependency injection in combination with object factories is an interesting one to me. I honestly don't find myself writing code like this often at all (and so couldn't share a relevant example of my own), but I'm not sure if that's just because I don't think of it in times where it might be the right strategy to use. In any case, this hopefully demonstrates that the notion of an Abstract Factory in Ruby is a conceptual one that has no need for actual abstract classes or interface objects.

### Builder

The purpose of the [Builder pattern](http://en.wikipedia.org/wiki/Builder_pattern) is to create an abstract blueprint describing the steps of creating an object, and then allow many different implementations to actually carry out that process as long as they provide the necessary steps. This is once again a pattern that in Ruby is purely conceptual due to the lack of a need for explicit interfaces or abstract classes. However, unlike the Abstract Factory pattern, I have actually used the ideas behind builder in some real code of my own, and have an example handy that captures the spirit of the pattern.

A while back, I was experimenting with coming up with some generalized domain language for producing output in a number of different formats. While I had more complex goals in mind, the basic usage ended up looking something like what you see below.

```ruby
class ListFormatter < Fatty::Formatter
  format :text do
    def render
      params[:data].map { |e| "* #{e}" }.join("\n")
    end
  end

  format :html do
    def render
      list_elements = params[:data].map { |e| "<li>#{e}</li>" }.join
      "<ul>#{list_elements}</ul>"
    end
  end
end
```

With some formats defined, the ListFormatter class can be used in the following manner.

```ruby
  data = %w[foo bar baz]
  [:html, :text].each do |format|
    puts ListFormatter.render(format, :data => data)
  end
```

OUTPUTS:

```
  * foo
  * bar
  * baz
  <ul><li>foo</li><li>bar</li><li>baz</li></ul>   
```

While this all may look a bit magical on the surface, the implementation is just a few lines of relatively dull dynamic Ruby code.

```ruby
module Fatty
  class Formatter
    
    class << self    
      def formats
        @formats ||= {}
      end
    
      def format(name, options={}, &block)
        formats[name] = Class.new(Fatty::Format, &block)
      end
        
      def render(format, params={})        
        format_obj = formats[format].new
        format_obj.params = params
        format_obj.render
      end   
    end
  end
  
  class Format
    attr_accessor :params 
  end
end
```

The bulk of this code looks like my Multiton example, albeit one that operates on anonymous Class objects. But instead of focusing on that detail, you should turn your attention to the `render()`, which is where the builder process comes in.

Each time render is called, a new instance of an anonymous Format subclass is created, and then customized. The params attribute is set and the render method is called to return the finally constructed object, our output data. As long as each format object that is implements all the required steps, they can be used interchangeably, which is the key feature that the Builder pattern emphasizes.

This example perhaps would be more convincing if you took a look at it in its real setting, in which I do things like call a validations hook, mix in helper methods to the format instances, and otherwise perform more interesting operations. But since we're running on the long side already with this article, I'll invite you to investigate this on your own by checking out [the source code](https://github.com/sandal/fatty) I wrote a couple years ago for this experiment.

It's worth noting that this isn't a perfect example of Builder because I think the pattern doesn't apply directly to Ruby, and that the code I've demonstrated is a bit of a hack. But it might be a good conversation starter at least for those who are interested in investigating this pattern further, since it tries to attack a problem that the Builder pattern would be well suited for in a static language.

### Prototype 

I've included a reference to the [Prototype pattern](http://en.wikipedia.org/wiki/Prototype_pattern) here for completeness, but unfortunately am unsure whether or not it is remotely applicable to Ruby. The purpose of the Prototype pattern is to provide an alternative to manually constructing new objects by allowing for customization through the cloning of existing objects. Presumably this is a good idea when the setup of an object is more costly than it would be to create a copy of the initial post-processed data. However, I'm coming up with a hard time seeing when this approach would be better than ordinary object composition combined with some side effects free operations done on the source data.

I think what's interesting about the Prototype pattern is that it gives you a different way of looking at object oriented programming which allows you to envision an object system without needing the concept of classes. There are two languages I know of that are specifically designed to implement that kind of environment, [Self](http://en.wikipedia.org/wiki/Self_%28programming_language%29) and [Io](http://en.wikipedia.org/wiki/Io_%28programming_language%29).

Because I feel out of my depth when it comes to suggesting a good use of this pattern in Ruby, I welcome readers to submit their own ideas. Personally, I feel like this pattern isn't a good fit for Ruby, but I could be wrong and would love to see evidence of my own ignorance!

### Reflections

Every example I've shown in this article reflects a design approach that I feel is elegant compared to the obvious alternatives. But each and every one of them requires a lot of explanation and assume more than a fair bit of Ruby and object oriented programming knowledge. For this reason, it always baffles me when beginners or even some intermediate developers bother to learn about patterns at all. Since they don't really make sense to think about until you run into fairly complex problems, and when you do need to apply them you need to be very careful not to overengineer things, they seem like a tool to be used sparingly by strongly skilled developers rather than gratuitously by the masses.

That said, I think that it's nice to be able to recognize a pattern when you see it, regardless what your level of experience is. It's also good to have at least a working knowledge of how to implement patterns so that when someone is discussing design with you, you can have a common vocabulary to work with. For these reasons, the study of patterns might be relevant to all active developers.

While there are clearly lots of different ways to abstract object creation, there are even more ways to create interesting compositions of object clusters. In the next article we'll explore that topic by looking at the structural design patterns.
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/059-issue-25-creational-design-patterns.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/026-structural-design-patterns.md
================================================
In this two part series, I've been looking at the classical design patterns laid out by the Gang of Four and exploring their relevance to modern day Ruby programming. Rather than teaching the design patterns themselves, my goal is to give practical examples using real code so that you can consider what the strengths and weaknesses of these techniques are.

In this issue, I'll be focusing on the structural design patterns, all of which have to do with the relationships between clusters of code. The seven patterns listed below are the ones we'll focus on in this article.

  * Adapter
  * Bridge
  * Composite
  * Proxy
  * Decorator
  * Facade
  * Flyweight

An important thing to keep in mind is that while the original GoF book was written with C++ and Smalltalk examples, none of the patterns were written with Ruby's semantics in mind. This makes it necessary to use a little poetic license to explore how these patterns apply in Ruby. That having been said, the examples we'll look at attempt to preserve the spirit of the patterns they implement even if they're not semantically identical.

### Adapter 

An [Adapter](http://en.wikipedia.org/wiki/Adapter_pattern) is used when you want to provide a unified interface to a number of different objects that implement similar functionality. This pattern is easy to spot in the wild for a Ruby developer, as most of us have to use ActiveRecord in our day to day work. Under the hood, ActiveRecord talks to a large amount of different databases, but wraps them up in a common interface its implementation code can avoid worrying about their individual differences. This is exactly the sort of thing the Adapter pattern is useful for.

Increasingly, Rubyists are finding this pattern to be useful in other settings as well. In particular, things like Intridea's [multi_json](https://github.com/intridea/multi_json) gem allow libraries and applications to be built against an abstract interface rather than requiring some particular JSON library that might conflict with other dependencies. Inspired by the ideas behind multi_json, a Mendicant University student (Mitko Kostov) built a similar adapter library for Markdown processors called [Marky](https://github.com/mytrile/marky). The base implementation is very simple, and gives us a good opportunity to look at one way to implement an Adapter in Ruby from the ground up.

The basic idea is that we want to be able to use a common interface while easily
configuring which backend is used. The following example shows how Marky might
be used:

```ruby  
# using RDiscount as a backend
Marky.adapter = :rdiscount

Marky.to_html("hello world") #=> "<p>hello world</p>

# using BlueCloth as a backend
Marky.adapter = :bluecloth

Marky.to_html("hello world") #=> "<p>hello world</p>"
```

To make this work, it is necessary to map the name of an adapter to a class which wraps the underlying engine and implements a `to_html()` method. The code that actually wires up the adapter is shown below.

```ruby
module Marky
  extend self

  def adapter
    return @adapter if @adapter
    self.adapter = :rdiscount
    @adapter
  end
   
  def adapter=(adapter_name)
    case adapter_name
    when Symbol, String
      require "adapters/#{adapter_name}"
      @adapter = Marky::Adapters.const_get("#{adapter_name.to_s.capitalize}")
    else
      raise "Missing adapter #{adapter_name}"
    end
  end

  def to_html(string)
    adapter.to_html(string)
  end
end
```

While this uses a tiny bit of dynamic Ruby magic to look up the right module name, when we see the actual adapters, it all comes together.

```ruby
# adapters/bluecloth.rb

require "bluecloth"

module Marky
  module Adapters
    module Bluecloth
      extend self
      def to_html(string)
        ::BlueCloth.new(string).to_html
      end
    end
  end
end
```

```ruby
# adapters/rdiscount.rb

require "rdiscount"

module Marky
  module Adapters
    module Rdiscount
      extend self
      def to_html(string)
        ::RDiscount.new(string).to_html
      end
    end
  end
end
```

Since all the adapters implement a `to_html()` method that share a common
contract, `Marky.to_html()` will work regardless of what adapter gets loaded.
The win here is that if that libraries, applications and frameworks rely on
adapters rather than concrete implementations, it will be easier to swap
one engine out for another when necessary.

While not every problem domain needs added level of indirection that Adapters introduce, they can come in handy when there are several competing implementations solving a common problem but you don't want to forced to choose one over the other.

### Bridge

The idea behind a [Bridge](http://en.wikipedia.org/wiki/Bridge_pattern) is to place a physical separation between the interface of a given type of object and its implementation. In languages in which the type system gets in the way, this kind of pattern is important for reducing the proliferation of various type permutations by making hierarchies of interfaces and implementations orthogonal to one another. If that sounds like academic jibberish to you, there is a [SourceMaking article](http://sourcemaking.com/design_patterns/bridge) that might help sort the concepts out for you a bit.

I had a hard time thinking through where this pattern has its place in Ruby, mainly because there is a built in separation of interface and implementation in Ruby via duck typing semantics. The best example I could come up with was to port the painfully complex C++ example that SourceMaking uses in their article to something that is a bit more natural looking in Ruby. Read it over, and think about what it might be gaining us as you go.

```ruby
##Concrete Implementations

class BasicTimeData
  def initialize(hour, minutes)
    @hour     = hour
    @minutes  = minutes
  end

  def formatted_output
    "Time is #{@hour}:#{@minutes}"
  end
end

class TimeWithMeridianData
  def initialize(hour, minutes, meridian)
    @hour     = hour
    @minutes  = minutes
    @meridian = meridian
  end

  def formatted_output
    "Time is #{@hour}:#{@minutes} #{@meridian}"
  end
end

##Bridge

module TimeFormatter
  def to_s
    @time_data.formatted_output
  end
end

## Abstract Objects linked to Concrete Implementations through Bridge

class BasicTime
  include TimeFormatter
  
  def initialize(*a, &b)
    @time_data = BasicTimeData.new(*a, &b)    
  end
end

class TimeWithMeridian
  include TimeFormatter

  def initialize(*a, &b)
    @time_data = TimeWithMeridianData.new(*a, &b)
  end
end

## Example Usage

time1  = BasicTime.new("10","30")
time2  = TimeWithMeridian.new("10","30","PM")

[time1, time2].each { |t| puts t }
```

While it might just due to the nature of the example, I feel this code is still quite contrived, and that hides its benefits. The takeway here is mostly that it's possible for both `BasicTimeData` and `TimeWithMeridianData` to change without breaking `BasicTime` and `TimeWithMeridian`, as long as `TimeFormatter` is updated. Similarly, a change in the needs of `BasicTime` and `TimeWithMeridian` will not affect the concrete implementations of the data structures, as long as the bridge provides the necessary wiring.

The thing that I struggle with in this pattern is understanding what unique behaviors the `BasicTime` and `TimeWithMeridian` objects could implement that justify their existence. Is this pattern an artifact of static languages that isn't really needed in Ruby? Or am I just missing some important detail that could be cleared up with a good example or two? Feel free to leave a comment letting me know what you think.

### Composite 

The [Composite pattern](http://en.wikipedia.org/wiki/Composite_pattern) is useful when you want to treat a group of objects as if it were a single, unified object. To explore this pattern, we can look at some experimental code I wrote for Prawn which was designed to make it possible to treat all objects drawn in the document as compositions of primitive PDF instructions. We can start with an example of rendering a curve, working from the outside in.

When `curve()` is called on a Prawn drawing, the PDF content is generated as a
side effect and the user does not need to do anything with the return value of
that method. However, if someone wanted to play with the internals, they could
call `curve!()` instead and get themselves an object that implements a
`to_pdf()` method, as in the following example:

```ruby
chunk = canvas.curve!(:point1 => [100,100],
                      :point2 => [50,50], 
                      :bound1 => [60,90],
                      :bound2 => [60,90])

puts chunk.to_pdf

...........................................................OUTPUTS..

  100.000 100.000 m
  60.000 90.000 60.000 90.000 50.000 50.000 c
```

We can catch a glimpse of how this `to_pdf()` method is actually implemented via several composed primitive objects by taking a look at the source for the `curve!()` method.

```ruby
def curve!(params)
  chunk(:curve, params) do |c|
    [ move_to!(:point => c[:point1]),
      curve_to!(:point => c[:point2],
                :bound1 => c[:bound1],
                :bound2 => c[:bound2]) ]

  end
end
```

In the above, `chunk()` is a helper method which builds a `Prawn::Core::Chunk` object, which serves as the composite object in this system. We'll look at how chunks are implemented in a bit, but note that the block given to the `chunk()` method defines what the individual chunk is composed of. In this case, a curve consists of two subchunks, one responsible for generating a move instruction, and one for rendering a curve from the current drawing location to another point. The definitions for both of those methods are shown below.

```ruby
def move_to!(params)
  chunk(:move_to, params) do |c|
    raw_chunk("%.3f %.3f m" % c[:point])
  end
end

def line_to!(params)
  chunk(:line_to, params) do |c|
    raw_chunk("%.3f %.3f l" % c[:point])
  end
end
```

Similar to `chunk!()`, these methods can called directly, producing an object with a `to_pdf()` method, as shown below.

```ruby
chunk = canvas.move_to!(:point => [100,100])
puts chunk.to_pdf

chunk2 = canvas.curve_to!(:point => [50,50],
                          :bound1 => [60,90],
                          :bound2 => [60,90])

puts chunk2.to_pdf

# combined output is identical to calling curve!() directly.
```

Through these two methods, we encounter the leaf nodes in our composition, `Prawn::Core::RawChunk` objects. These objects are where the actual text that is in our `to_pdf()` is stored. With that in mind, we can now look at the actual objects that this graphics system is built on.

```ruby
module Prawn
  module Core
    class RawChunk
      def initialize(content)
        @content = content
        @command = :raw_pdf_text
        @params = {}
      end

      attr_accessor :content, :command, :params
      alias_method :to_pdf, :content
    end

    class Chunk
      def initialize(command, params={}, &action)
        @command = command
        @params = params
        @action = action
      end

      attr_reader :command, :params, :action

      def content
        action.call(self)
      end

      def to_pdf
        case results = content
        when Array
          results.map { |sub_chunk| sub_chunk.to_pdf }.join("\n")
        when Prawn::Core::Chunk, Prawn::Core::RawChunk
          results.to_pdf
        else
          raise "Bad Chunk: #{results.class} not supported"
        end
      end
    end
  end
end
```

From this we see that a chunk's content can be an array of children, a chunk, or a raw chunk object. The `to_pdf()` method is responsible for traversing downwards through the composition until it reaches the raw chunk objects which simply return the raw content. But because the APIs match, we can look at the chunks at any level of the system and burrow down to their raw data.

While this might be a bit of an intense example, it shows how the Composite pattern can be used to make a complex set of objects look and feel as if they were a single unified entity. It may be a bit much to take in on a first glance, but try to think of how you could apply these ideas to your own code and you might gain some useful insights.

### Proxy

A [Proxy](http://en.wikipedia.org/wiki/Proxy_pattern) is any object that acts as a drop-in replacement object that does a bit of work and then delegates to some other underlying object. This is another pattern that's easy to recognize for Rails developers, because it is used extensively in ActiveRecord associations support. The following example shows a typical interaction between a base model and one of its associations.

```ruby
quiz = Quiz.first
quiz.questions.class #=> Array

quiz.questions.count #=> 10

quiz.questions.create(:question_text => "Who is the creator of Ruby?",
                      :answer        => "Matz")

quiz.questions.count #=> 11

quiz.questions[-1].answer #=> "Matz" 
```

While the questions association claims to be an array, it also provides some of the common helpers found in `ActiveRecord::Base`. If we stick to the core idea and ignore some of the Rails specific details, creating such an association proxy is fairly easy to do using Ruby's delegate standard library. The code below more or less does the trick.

```ruby
require "delegate"

class Quiz
  def questions
    @questions                  ||= HasManyAssociation.new([])
    @questions.associated_class ||= Question

    @questions
  end
end

class Question
  def initialize(params)
    @params = params
  end

  attr_reader :params

  def answer
    params[:answer]
  end
end

class HasManyAssociation < DelegateClass(Array)
  attr_accessor :associated_class

  def initialize(array)
    super(array)
  end

  def create(params)
    self << associated_class.new(params)
  end
end

quiz = Quiz.new

# grab the proxy object
questions = quiz.questions


# use custom create() method on proxy object

questions.create(:question_text => "Who is the creator of Ruby?",
                 :answer        => "Matz")
questions.create(:question_text => "Who is the creator of Perl?",
                 :answer        => "Larry Wall")


# use array like behavior on proxy object

p questions[0].answer #=> "Matz"
p questions[1].answer #=> "Larry Wall"
p questions.map { |q| q.answer }.join(", ") #=> "Matz, Larry Wall"
```

Interestingly enough, while Ruby provides a standard library for building Proxy objects, most people tend to implement them in a different way, through the use of an explicit `method_missing()` hook on a blank slate object such as Ruby 1.9's BasicObject. For example, we could have written our HasManyAssociation code in the manner shown below and things would still work as expected.

```ruby
class HasManyAssociation < BasicObject
  attr_accessor :associated_class

  def initialize(array)
    @array = array
  end

  def create(params)
    self << associated_class.new(params)
  end

  def method_missing(*a, &b)
    @array.send(*a, &b)
  end
end
```

Without looking at the source, I'm almost sure that Rails does something similar to this, because doing some_association.class returns Array rather than the name of the proxy object. This is the only noticeable difference between this approach and the DelegateClass approach.

Personally, I've written proxies in both ways, and I tend to prefer the `DelegateClass()` approach slightly, simply because it's more explicit and doesn't require me to explicitly define a `method_missing()` hook. But on the other hand, we can see that rolling your own proxy implementation is trivial in Ruby, and some may prefer the self contained nature of doing the delegation work yourself. It'd be interesting to hear what readers have to say on this topic, so please feel free to post to the mailing list if you prefer one approach over the other.

### Decorator

While there is a clear distinction between a [Decorator](http://en.wikipedia.org/wiki/Decorator_pattern) and a Proxy in static languages, in Ruby the two concepts almost merge, except that a Decorator is used for the purpose of adding / extending behavior of a target object, and a Proxy is a more general concept.

Since I've already written up a cool example of using decorators on this blog, I think what I'll do is point you over there in the interest of keeping this article from being even more incredibly long than it already is. Check out the [Decorator Delegator Disco](http://blog.rubybestpractices.com/posts/gregory/008-decorator-delegator-disco.html) if you want to see some interesting code samples that implement this pattern.

### Facade

The [Facade pattern](http://en.wikipedia.org/wiki/Facade_pattern) simplifies how users interact with a codebase by implementing an interface that hides many implementation details for the most common behaviors needed by consumers. Looking at it from the outside, a perfect example of this is the open-uri standard library. When using open-uri, it is possible to do a simple HTTP get request with just a single line of code, as shown below.

```ruby
require "open-uri"

puts open("http://www.google.com").read
```

The purpose of open-uri is to make reading a resource via an HTTP GET request look and feel like opening an ordinary file. This is a very common use case, so open-uri makes it easy to work with. To see what this facade is hiding, we can write the equivalent code using the libraries it wraps, `Net::HTTP` and `URI`.

```ruby
require 'net/http'
require 'uri'

url = URI.parse('http://www.google.com')

res = Net::HTTP.start(url.host, url.port) {|http|
  http.get('/index.html')
}

puts res.body
```

While the code above isn't especially difficult to read, it certainly feels like more work than the previous example. This is primarily because of the flexibility and functionality tradeoff between the two approaches. The open-uri library can afford to be much more high level and limited in scope because its sole purpose is to help make a single particular task easier. On the other hand, `Net::HTTP` and `URI` are both complex tools that can be used for a number of different things. The use of the Facade pattern allows for both kinds of objects to coexist peacefully within a single system.

It's worth keeping in mind that pretty much every DSL you encounter is a Facade of some form or another. If you're interested in seeing how simple interfaces can mask highly complex networks of objects, consider doing a source dive into your favorite tool that utilizes a domain specific language, such as Rake, RSpec, or Sinatra. You'll find a number of different techniques at work depending on which project you explore, but all have the common characteristic of providing a simplified way to interact with a deep system.

### Flyweight

The [Flyweight pattern](http://en.wikipedia.org/wiki/Flyweight_pattern) is a way to represent what would seem like a large amount of data in a lightweight way. This is one of those patterns that mostly goes away in Ruby due to built in language constructs, but it's worth taking a look at just for the sake of completeness.

The wikipedia article linked above discusses font metrics as a common application of the flyweight pattern, in which you may want to associate each character in a string with a large amount of information describing how that character should be rendered. The basic idea is that it'd be far too inefficient memory-wise to create a new instance of font metrics data for every character in a document. So instead, using the Flyweight pattern, it is possible to map the index of characters in a string to a single instance for each unique character, vastly reducing the amount of memory consumed. This is a problem I've actually had to solve before within Prawn, but it's a bit of a tough one demonstrate briefly if we attempt to show real code.

However, if we stub out the actual font metrics generation, it's easy to see that this problem can be solved by wrapping a simple Ruby hash that has an initializer block.

```ruby
class FontMetrics

  def initialize(string)
    @string = string
  end

  def [](index)
    glyph_data[@string[index]]
  end

  def glyph_data
    @glyph_data ||= Hash.new { |h,k| h[k] = metrics_for(k) }
  end

  # stubbed out, but would typcially refer to something
  # largish and time consuming to generate
  # 
  def metrics_for(k)
    { :bbox => [rand(100), rand(100), rand(100), rand(100)] }
  end

end

string = "Hello World"

metrics = FontMetrics.new(string)

p metrics[0]  #=> {:bbox=>[86, 44, 88, 31]}
p metrics[2]  #=> {:bbox=>[52, 7, 38, 98]}
p metrics[3]  #=> {:bbox=>[52, 7, 38, 98]}
p metrics[-2] #=> {:bbox=>[52, 7, 38, 98]}

p metrics[2].object_id == metrics[3].object_id #=> true

p metrics[0] == metrics[1] #=> false
p metrics[2] == metrics[3] #=> true
```

From the above code, we see that the `FontMetrics` object gives the appearance of having data for each and every character in the string, but checking the `object_id` for each proves that only one object per unique character has been created. While I suppose we could call this a Flyweight, I think that in Ruby we'd just say that we were caching the metrics data using a hash with an initializer. But perhaps using this vocabulary wouldn't hurt, if we want to keep our minds focused on the high level concepts.

### Reflections

After writing two articles on the the topic, I'm finding myself getting sick of design patterns. But when I look back on the code I've shared with you, I realize that when I built these things, I never really thought consciously about what pattern they were, and it took me until the time of writing this article to put a name on them.

A major concern I have about classical patterns is that in order to see the resemblance of the code I've shared to the original GoF patterns, you really need to look at things sideways and squint a bit. It'd be great for me to be able to just say 'Use a flyweight here' and have it mean something, but if you say that to someone without a strong background in Ruby, you may end up with hundreds of lines of a Java-esque monstrosity.

To be sure, I'm not saying that this exploration has not been worthwhile. Forcing myself to think of how many of the classic GoF patterns might materialize themselves in Ruby has been a very interesting experience, because it's really making me think about how we build and design our code. But the problem of whether we can actually have a common vocabulary about concepts that really get distorted in translation makes me wonder about the merits of stacking up pattern definitions, at least without giving them new names.

I'm quite curious about what folks have been thinking as they read through the last two articles, in particular, I wonder if seeing me try to attack patterns from a purely pragmatic perspective has changed anything about the way readers look at these concepts. I'm also kind of curious if 'that guy' is out there, silently thinking to himself "Well... actually" after seeing my somewhat liberal interpretation of these classic patterns. Please leave a comment letting me know what you think!
  
> **NOTE:** This article has also been published on the Ruby Best Practices blog. There [may be additional commentary](http://blog.rubybestpractices.com/posts/gregory/060-issue-26-structural-design-patterns.html#disqus_thread) 
over there worth taking a look at.


================================================
FILE: articles/v1/README.md
================================================
These articles are from Practicing Ruby's first volume, which ran from 
2010-11-09 to 2011-02-29. The manuscripts in this folder correspond to the
following articles on practicingruby.com:

* [Issue 1.1: Ruby's method lookup path, Part 1](http://practicingruby.com/articles/shared/oxcpupjsuqik) (2010.11.09)
* [Issue 1.2: Ruby's method lookup path, Part 2](http://practicingruby.com/articles/shared/fvdxhnrylxqd) (2010.11.11)
* [Issue 1.3: Writing configurable applications, Part 1](http://practicingruby.com/articles/shared/xpylcrvtrexl) (2010.11.16)
* [Issue 1.4: Writing configurable applications, Part 2](http://practicingruby.com/articles/shared/psudcyhgnkcj) (2010.11.18)
* [Issue 1.5: Testing antipatterns; Testing private methods](http://practicingruby.com/articles/shared/hzufzwxrygux) (2010.11.25)
* [Issue 1.6: Meditations on bad and good code, Part 1](http://practicingruby.com/articles/shared/ykhqyaduhllv) (2010.12.01)
* [Issue 1.7: Meditations on bad and good code, Part 2](http://practicingruby.com/articles/shared/rhfxdyjqhgdy) (2010.12.03)
* [Issue 1.8: Uses for modules, Part 1](http://practicingruby.com/articles/shared/dptfgwedmytp) (2010.12.08)
* [Issue 1.9: Uses for modules, Part 2](http://practicingruby.com/articles/shared/qtegejtvzcja) (2010.12.10)
* [Issue 1.10: Uses for modules, Part 3](http://practicingruby.com/articles/shared/uecrbiznivfn) (2010.12.14)
* [Issue 1.10.a: Addendum to Uses for modules, Part 3](http://practicingruby.com/articles/shared/nwtojmfkvwjq) (2010.12.15)
* [Issue 1.11: Uses for Modules, Part 4](http://practicingruby.com/articles/shared/yhrmpichydpe) (2010.12.16)
* [Issue 1.12: Rapid Prototyping](http://practicingruby.com/articles/shared/zdzupcjigzhu) (2010.12.21)
* [Issue 1.13: Obfuscations of Christmas Past](http://practicingruby.com/articles/shared/enwcaticyuqv) (2010.12.23)
* [Issue 1.14: Duck typing in practice, Part 1](http://practicingruby.com/articles/shared/rvdcaomuyjzr) (2010.12.28)
* [Issue 1.15: Duck typing in practice, Part 2](http://practicingruby.com/articles/shared/pdcabdowgpji) (2010.12.31)
* [Issue 1.16: Interesting ruby hackers](http://practicingruby.com/articles/shared/ikbmonxlljuh) (2011.01.03)
* [Issue 1.17: Interesting ruby hacker-writers](http://practicingruby.com/articles/shared/mvzltpbdtdft) (2011.01.07)
* [Issue 1.18: Dirty little secrets about testing](http://practicingruby.com/articles/shared/vpqmwrhegmcj) (2011.01.14)
* [Issue 1.19: Thoughts on mocking, Part 1](http://practicingruby.com/articles/shared/ccivepfxynrq) (2011.01.19)
* [Issue 1.20: Thoughts on mocking, Part 2](http://practicingruby.com/articles/shared/wudydnfyhyun) (2011.01.22)
* [Issue 1.21: How to practice, Part 1](http://practicingruby.com/articles/shared/mplhlqnvdtwe) (2011.01.26)
* [Issue 1.22: How to practice, Part 2](http://practicingruby.com/articles/shared/pozccfaahxtp) (2011.01.28)
* [Issue 1.23: SOLID design principles](http://practicingruby.com/articles/shared/tvtqekhpyjla) (2011.02.05)
* [Issue 1.24: Connascence as a software design metric](http://practicingruby.com/articles/shared/lulytjojykvw) (2011.02.11)
* [Issue 1.25: Creational Design Patterns](http://practicingruby.com/articles/shared/mtulskvnxwwn) (2011.02.22)
* [Issue 1.26: Structural design patterns](http://practicingruby.com/articles/shared/fenavpjocftt) (2011.02.28)


================================================
FILE: articles/v2/001-ways-to-load-code.md
================================================
There are many ways to load Ruby code, and that has lead to confusion over the years. In this article, I will give you the backstory behind several conventions seen in the wild and share some stories about how I use those conventions in my own code.

The topic of code loading breaks up naturally into two subtopics: loading code
within your own project and loading code from third-party libraries. People tend
to struggle more with loading code properly within their own projects than they
do with loading code from third-party libraries, so that's what I'll focus on
exclusively in this issue.

For now, I will focus on the basic mechanics of `load()`, `auto_load()`,
`require()`, and `require_relative()`. I'll discuss how they work so you can
then think about how they can be used within your own projects.

### Kernel#load

Suppose we have a file called _calendar.rb_ that contains the code shown here:

```ruby
class Calendar
  def initialize(month, year)
    @month = month
    @year  = year
  end

  # A simple wrapper around the *nix cal command.
  def to_s
    IO.popen(["cal", @month.to_s, @year.to_s]) { |io| io.read }
  end
end

puts Calendar.new(8, 2011)
```

Given an absolute path to this file, the contents will be loaded and then
executed immediately:

```console
>> load "/Users/seacreature/devel/practicing-ruby-2/calendar.rb"
    August 2011
Su Mo Tu We Th Fr Sa
    1  2  3  4  5  6
 7  8  9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30 31
```

I can also just specify a path relative to my current working directory and get the same results. That means that if _calendar.rb_ is in the same directory from which I invoked my irb session, I'm able to call `load()` in the manner shown here:

```console
>> load "./calendar.rb"
    August 2011
Su Mo Tu We Th Fr Sa
    1  2  3  4  5  6
 7  8  9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30 31
```

An interesting thing about `load()` is that it does not do any checks to see
whether it has already loaded a file and will happily reload and reexecute a
file each time you tell it to. So, in practice, the implementation of `load()`
is functionally similar to the code shown here:

```ruby
def fake_load(file)
  eval File.read(file)
  true
end
```

The main benefit of indiscriminately reloading and reexecuting code is that you
can make changes to your files and then `load()` them again within a single
session without having to restart the program that's loading the code. So, for
example, if we changed _calendar.rb_ to output August 2012 instead of August
2011, we could just load it again without restarting irb. But we'd also be
greeted with some warnings in the process:


```console
>> load "./calendar.rb"
/Users/seacreature/devel/practicing-ruby-2/calendar.rb:2: 
warning: method redefined; discarding old initialize
/Users/seacreature/devel/practicing-ruby-2/calendar.rb:2: 
warning: previous definition of initialize was here
/Users/seacreature/devel/practicing-ruby-2/calendar.rb:8: 
warning: method redefined; discarding old to_s
/Users/seacreature/devel/practicing-ruby-2/calendar.rb:8:
warning: previous definition of to_s was here
August 2012
Su Mo Tu We Th Fr Sa
      1  2  3  4
5  6  7  8  9 10 11
12 13 14 15 16 17 18
19 20 21 22 23 24 25
26 27 28 29 30 31
```

If you remember that Ruby classes and modules are permanently open to
modification, these warnings should make a lot of sense. The first time we
called `load()`, it defined the `initialize()` and `to_s()` methods for the
`Calendar` class. The second time we called `load()`, that class and its methods
already existed, so it redefined them. This is not necessarily a sign of a bug,
but Ruby warns you of the possibility that it might be.

Ultimately, these warnings are Ruby telling you that there is probably a better
way for you to do what you're trying to do. One interesting way to get around
the problem is to use `Kernel#load()`'s wrap functionality.  Rather than telling
you directly how it works, I'm going to show you by example and see if you can
guess what's going on.

Suppose we kill our irb session and fire up a new one; we're now back to a blank
slate. We then run the following code and see the familiar calendar output:

```console
>> load "./calendar.rb", true
    August 2012
Su Mo Tu We Th Fr Sa
          1  2  3  4
 5  6  7  8  9 10 11
12 13 14 15 16 17 18
19 20 21 22 23 24 25
26 27 28 29 30 31
```

Then we decide that we want to look a little deeper into the future so that we
know what to plan for in AD 2101. We reload the code using the same command as
before:

```console
>> load "./calendar.rb", true
    August 2101
Su Mo Tu We Th Fr Sa
    1  2  3  4  5  6
 7  8  9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30 31
```

This time, we don't see any warnings, so obviously something has changed. Here's
a clue:

```console
>> Calendar
NameError: uninitialized constant Object::Calendar
  from (irb):2
  from /.../.rvm/rubies/ruby-1.9.2-p180/bin/irb:16:in `<main>'
```

Surely the `Calendar` class must have been defined *somewhere*, because the
program worked as expected. So what is going on here? Take a look at the
following code; it should give you a clearer picture of what is happening:

```ruby
def fake_load(file)
  Module.new.module_eval(File.read(file))
  true
end
```

In this implementation, our approximation of `load()` is evaluating the loaded
code in the context of an anonymous module, which essentially wraps everything
its own namespace. This step prevents any of the constants defined in the loaded
code from being defined within the global namespace, including any class or
module definitions.

The existence of this option is a hint that although `load()` is suitable for
code loading, it is geared more to implementing customized runners for Ruby code
than to simply loading the classes and modules in your projects. So if you've
been using `load()` on a daily basis, you might be using the wrong tool for the
job at least some of the time. It should be clear by the end of this article why
that is the case.

Now that we have looked at the most simple code loading behavior Ruby has to
offer, we will jump straight into the deep end and explore one of its most
complex options: loading code on demand via `Kernel#autoload`.

### Kernel#autoload

Regardless of whether you've used it explicitly in your own projects, the
concept of automatically loading code on demand should be familiar to anyone
familiar with Rails. In Rails, none of the classes or modules you define are
loaded until the first time they are referenced in your running program. There
are two main benefits to this design: faster startup time and delayed loading of
optional dependencies.

Rails uses its own customized code to accomplish this result, but the basic idea
is similar to what can be done with Ruby's `autoload()` method. To illustrate
how `autoload()` works, let's revisit our `Calendar` class that we began
building while discussing `load()`. This time, we have a file called
_calendar.rb_ that contains only the definition of the `Calendar` class, not the
code that actually calls methods on it:

```ruby
class Calendar
  def initialize(month, year)
    @month = month
    @year  = year
  end

  # A simple wrapper around the *nix cal command.
  def to_s
    IO.popen(["cal", @month.to_s, @year.to_s]) { |io| io.read }
  end
end
```

The following irb session demonstrates the behavior of `autoload()`. 

```console
>> autoload(:Calendar, "./calendar.rb") #1
=> nil
>> defined?(Calendar)                   #2
=> nil
>> puts Calendar.new(8,2011)            #3
    August 2011
Su Mo Tu We Th Fr Sa
    1  2  3  4  5  6
 7  8  9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30 31

=> nil
>> defined?(Calendar)                   #4
=> "constant"
```

In our first step, we set up the `autoload()` hook, instructing Ruby to load the
file _calendar.rb_ at the time that the first constant lookup happens for the
Calendar constant. In the second step, we check to ensure that `autoload()` does
not actually load the file for you automatically by verifying that Calendar has
not yet been defined. Then, in our third step, we build and output our Calendar.
Last, we see that the constant is now defined.

This exposes us to some cool Ruby voodoo while also raising a lot of questions.
It may help to approximate how `autoload()` might be implemented in order to
wrap your head around the idea. Although the following code is evil and should
never be used for anything but educational purposes, it simulates the load on
demand behavior nicely.

```ruby
$load_hooks = Hash.new

module Kernel
  def fake_autoload(constant_name, file_name)
    $load_hooks[constant_name] = file_name
  end
end

def Object.const_missing(constant)
  load $load_hooks[constant]
  const_get(constant)
end

fake_autoload :Calendar, "./calendar.rb"
p defined?(Calendar)
puts Calendar.new(8,2011)
p defined?(Calendar)
```

After reading the previous example code and playing with it a bit, remember the
dependency on `const_missing()` and forget pretty much everything else about the
implementation. The real `autoload()` handles a lot more cases than this trivial
example gives it credit for.

With the `const_missing()` dependency in mind, try to guess what will happen
when the following code is run:

```ruby
class Calendar; end

autoload :Calendar, "./calendar.rb"
p defined?(Calendar)
puts Calendar.new(8,2011)
p defined?(Calendar)
```

If you guessed that it didn't output a nicely formatted calendar, you guessed
correctly. Below you can see that when I run this script, all the code in
_calendar.rb_ never gets loaded, so the default `Object#initialize` and
`Object#to_s` are being called instead:

```console
"constant"
<Calendar:0x0000010086d6b0>
"constant"
```

Because `autoload()` does not check to see whether a constant is already defined
when it registers its hook, you do not get an indication that the _calendar.rb_
file was never loaded until you actually try to use functionality defined in
that file. Thus `autoload()` is safe to use only when there is a single, uniform
place where a constant is meant to be defined; it cannot be used to
incrementally build up class or module definitions from several different source
files.

This sort of rigidity is frustrating, because unlike load(), which does not care
how or where you define your code, `autoload()` is much more opinionated. What
you've seen here is a single example of the constraints it puts on you, but it
is easy to imagine other scenarios in which `autoload()` can feel like a brittle
way to load code. I'll leave it up to you to try to figure out some of those
issues, but feel free to ask me for some hints if you get stumped.

In the context of Rails—particularly when working in development mode, in which
the whole environment gets reloaded on every request—some form of automatic
loading makes sense. However, outside of that environment, the drawbacks of
`autoload()` tend to outweigh the benefits, so most Ruby projects tend to avoid
it entirely by making heavy use of `require()`.

### Kernel#require()

If you've written any code at all outside of Rails, odds are you've used
`require()` before. It is actually quite similar to `load()` but has a few
additional features that come in handy. To illustrate how `require()` works,
let's revisit our original _calendar.rb_ file, the one that had a bit of code to
be executed in the end of it.

```ruby
class Calendar
  def initialize(month, year)
    @month = month
    @year  = year
  end

  # A simple wrapper around the *nix cal command.
  def to_s
    IO.popen(["cal", @month.to_s, @year.to_s]) { |io| io.read }
  end
end

puts Calendar.new(8, 2011)
```

If we attempt to load this code twice via `require()`, we immediately see an
important way in which it differs from `load()`.

```console
>> require "./calendar.rb" #1
    August 2011
Su Mo Tu We Th Fr Sa
    1  2  3  4  5  6
 7  8  9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30 31

=> true
>> require "./calendar.rb" #2
=> false
```

When I ran `require()` the first time, the familiar calendar output greeted me,
and then the function returned a true value. The second time I ran it, nothing
happened and the function returned false. This is a feature, and not a bug. The
following code is a crude approximation of what is going on under the hood in
`require()`:

```ruby
$LOADED_BY_FAKE_REQUIRE = Array.new

def fake_require(file)
  full_path = File.expand_path(file)
  return false if $LOADED_BY_FAKE_REQUIRE.include?(full_path)

  load full_path
  $LOADED_BY_FAKE_REQUIRE << full_path

  return true
end
```

This behavior ensures that each file loaded by `require()` is loaded exactly
once, even if the `require()` calls appear in many places. Therefore, updates to
those files will take effect after they have been loaded once. Although this
behavior makes `require()` less suitable than `load()` for quick exploratory
code loading, it does prevent programs from needlessly reloading the same code
again and again, similar to how `autoload()` works once a constant has been
loaded.

Another interesting property of `require()` is that you can omit the file
extension when loading your code. Thus `require("./calendar")` will work just as
well as `require("./calendar.rb")`. Though this may seem like a small feature,
the reason it exists is that Ruby can load more than just Ruby files.
When you omit an extension on a file loaded with `require()`, it will attempt to
load the file with the ".rb" extension first, but will then cycle through the
file extensions used by C extensions as well, such as ".so", ".o", and ".dll".
Despite being an obscure property, it's one that we often take for
granted when we load certain standard libraries or third-party gems. This
behavior is another detail that separates `require()` from `load()`, as the
latter can work only with explicit file extensions.

The main benefit of using `require()` is that it provides the explicit,
predictable loading behavior of `load()` with the caching functionality of
`autoload()`. It also feels natural for those who use RubyGems, as the standard
way of loading libraries distributed as gems is via the patched version of
`Kernel#require()` that RubyGems provides.

Using `require()` will take you far, but it suffers from a pretty irritating
problem—shared by `load()` and `autoload()`—with the way it looks up files. The
`require_relative()` is meant to solve that problem, so we'll take a look at it
now.

### Kernel#require_relative()

Each time I referenced files using a relative path in the previous examples, I
wrote the path to explicitly reference the current working directory. If you're
used to using Ruby 1.8, this may come as a surprise to you. If you've been using
Ruby 1.9.2, it may or may not appear to be the natural thing to do. However, now
is the time when I confess that it's almost always the wrong way to go about
things.

Ruby 1.9.2 removes the current working directory from your path by default for
security reasons. So, in our previous example, if we attempted to write
`require("calendar")` instead of `require("./calendar")`, it would fail on Ruby
1.9.2 even if we invoked irb in the same folder as the _calendar.rb_ file.
Explicitly referencing the current working directory works on both Ruby 1.8.7
and Ruby 1.9.2, which is why this convention was born. Unfortunately, it is an
antipattern, because it forces us to assume that our code will be run from a
particular place on the file system.

Imagine a more typically directory structure, such as this:


```console
lib/
  calendar.rb
  calendar/
    month.rb
    year.rb
bin/
  calendar.rb
```

We could have a _bin/ruby_calendar.rb_ file that looks like this code:

```ruby
require "lib/calendar"

case ARGV.size
when 2
  puts Calendar::Month.new(ARGV[0], ARGV[1])
when 1
  puts Calendar::Year.new(ARGV[0])
else
  raise "Invalid arguments"
end
```

Similarly, our _lib/calendar.rb file_ might include `require()` calls such as
these:

```ruby
require "lib/calendar/year"
require "lib/calendar/month"
```

Now if we run _bin/ruby_calendar.rb_ from the project root, things will work as
expected.

```bash
$ ruby bin/ruby_calendar.rb 2011
# ...
```

But if we ran this file from any other directory, it'd fail to work as expected
because the relative paths would be evaluated relative to wherever you executed
the files from, not relative to where the files live on the file system. That
is, if you execute _ruby_calendar.rb_ in the _bin/_ folder, it would look for a file
called _bin/lib/calendar.rb_.

One way to solve this problem is to use the same mechanism that the Ruby
standard library and RubyGems uses: modify the loadpath.

In _bin/ruby_calendar.rb_, we rewrite our code to match this:

```ruby
$LOAD_PATH.unshift("#{File.dirname(__FILE__)}/../lib")
require "calendar"

case ARGV.size
when 2
  puts Calendar::Month.new(ARGV[0], ARGV[1])
when 1
  puts Calendar::Year.new(ARGV[0])
else
  raise "Invalid arguments"
end
```

Because we've added the _lib/_ folder to the lookup path for all `require()`
calls in our application, we can modify _lib/calendar.rb_ to match the
following:

```ruby
require "calendar/year"
require "calendar/month"
```

This approach makes it possible to run the _ruby_calendar.rb_ program from any
location within the file system, as long as we tell ruby where to find it. That
means you can run it directly from within the _bin/_ folder, or even with an
absolute path.


```bash
# NOTE: this is common in cron jobs.
$ ruby /Users/seacreature/devel/ruby_calendar/bin/ruby_calendar.rb
```

This approach works, and was quite common in Ruby for some time. Then, people
began to get itchy about it, because it is definitely overkill. It effectively
adds an entire folder to the `$LOAD_PATH`, giving Ruby one more place it has to
look on every require and possibly leading to unexpected naming conflicts
between libraries.

The solution to that problem is to not mess with the `$LOAD_PATH` in your code.
Therefore, you expect either that the `$LOAD_PATH` variable will be properly set
by the `-I` flag when you invoke ruby or irb, or that you have to write code
that dynamically determines the proper relative paths to require based on your
current working directory. The latter approach requires less effort from the end
user but makes your code ugly. Below you'll see what people resorted to on Ruby
1.8 before a better solution came along:


```ruby
# bin/ruby_calendar.rb
require "#{File.dirname(__FILE__)}/../lib/calendar"

case ARGV.size
when 2
  puts Calendar::Month.new(ARGV[0], ARGV[1])
when 1
  puts Calendar::Year.new(ARGV[0])
else
  raise "Invalid arguments"
end

# lib/calendar.rb
require "#{File.dirname(__FILE__)}/calendar/year"
require "#{File.dirname(__FILE__)}/calendar/month"
```

Using this approach, you do not add anything to the `$LOAD_PATH` but instead
dynamically build up relative paths by referencing the `__FILE__` variable and
getting a path to the directory it's in. This code will evaluate to different
values depending on where you run it from, but in the end, the right path will
be produced and things will just work.

Predictably, people took efforts to hide this sort of ugliness behind helper
functions, and one such function was eventually adopted into Ruby 1.9. That
helper is predictably called `require_relative()`. Using `require_relative()`,
we can simplify our calls significantly while preserving the "don't touch the
`$LOAD_PATH` variable" ethos:


```ruby
# bin/ruby_calendar.rb
require_relative "../lib/calendar"

case ARGV.size
when 2
  puts Calendar::Month.new(ARGV[0], ARGV[1])
when 1
  puts Calendar::Year.new(ARGV[0])
else
  raise "Invalid arguments"
end

# lib/calendar.rb
require_relative "calendar/year"
require_relative "calendar/month"
```

This code looks and feels like it would work in the way that we'd like to think
`require()` would work. The files we reference are relative to the file in which
the actual calls are made, rather than the folder in which the script was
executed in. For this reason, it is a much better approach than pretty much
anything I've shown so far.

Of course, it is not a perfect solution. In some cases, it does not work as
expected, such as in Rackup files. Additionally, because it's a Ruby 1.9
feature, it's not built into Ruby 1.8.7. The former issue cannot be worked
around, but the latter can be. I'll go into a bit more detail about both of
these issues in the recommendations section, which is coming up right now.

### Conventions and Recommendations

If you remember one thing from this article, it should be that whenever it's
possible to use `require_relative()` and there isn't an obviously better
solution, it's probably the right tool to reach for. It has the fewest 
dark corners and pretty much just works.

That said, take my advice with a grain of salt. I no longer actively
maintain any Ruby 1.8 applications, nor do I have to deal with code that must
run on both Ruby 1.8 and 1.9. If I were in those shoes again, I'd weigh
out four different possible ways of approaching things:

1) Explicitly use `require()` with the `File.dirname(__FILE__)` hack

2) Write my own `require_relative()` implementation leaning on the previous
   hack that gets defined only if `require_relative()` isn't already
   implemented

3) Add a dependency for Ruby 1.8 only on the `require_relative` gem

4) Assume that `$LOAD_PATH` is set for me via the `-I` flag on execution,
   or some other means, and then write ordinary require calls 
   relative to the _lib/_ folder in my project.

I can't give an especially good picture of when I'd pick one of those options
over the other, because it's been about a year since I've last had to think
about it. But any of those four options seem like at least reasonable ideas. I
would *not* employ the common but painfully ugly
`require("./file_in_the_working_dir.rb")` hack in any code that I expected to
use for anything more than a spike or demonstration.

Whether using `require_relative()` explicitly, or one of the workarounds listed
above, I like to use some form of relative require whenever I can. Occasionally,
I do use `load()`, particularly in spikes where I want to  reload files into an
irb session without restarting irb.  But I don't think that `load()` ends up in
production code of mine unless there is a very good reason to use it. A possible
good reason would be if I were building some sort of script runner, such as what
you could find in Rails when it reloads your development environment or in
autotest. In the autotest case in particular in which your test files are
reloaded each time you make an edit to any of your files in your project, it
seems that using `load()` with its obscure second parameter is a good idea. But
these are not tools I'd expect to be building on a daily basis, so `load()`
remains somewhat of an obscure tool for me.

I never use `autoload()`. I've just not run into the issues that some folks in
Rails experience regarding slow startup times of applications in any way that
has mattered to me. I feel like the various gotchas that come along with using
`autoload()` and the strict conventions it enforces are not good things to
impose on general-purpose uses of Ruby. I don't know whether I think that it
makes sense in to context of Rails, but that's a very different question than
whether it should be used in ordinary Ruby applications and libraries. It makes
at least some sense in Rails, but in most Ruby applications, it does not. The
only time I might think about looking into `autoload()` is if I had some sort of
optional dependency that I wanted to be loaded only on demand. I have never
actually run into that issue, and I've found that the following hack provides a
way to do optional dependencies that seems to work just fine:

```ruby
begin
  require "some_external_dependency"
  require "my_lib/some_feature_that_depends_on_dependency"
rescue LoadError
  warn "Could not load some_external_dependency."+
       " Some features are disabled"
end
```

But really, optional dependencies are things I very rarely need to think about.
There are valid use cases for them, but unless something is very difficult to
install or your project is specifically meant to wrap various mutually exclusive
dependencies, I typically will just load up all my dependencies regardless of
whether the user ends up using them. This policy has not caused me problems,
but your mileage will vary depending on the type of work you are doing.

On a somewhat tangential note, I try to avoid things like dynamic require calls
in which I walk over a file list generated from something like `Dir.glob()` or
the like. I also avoid using `Bundler.require()`, even when I use bundler. The
reason I avoid these things is because I like to be able control the exact order
in which my files and my dependencies are being loaded. It's possible to not
have to worry about this sort of thing, but doing so requires a highly
disciplined way of organizing your code so that files can be loaded
independently. 

### Questions / Feedback 

I hope this background story about the various ways to load code along with the
few bits of advice I've offered in the end here have been useful to you. I am
happy to answer whatever questions you have; just leave a comment below.


================================================
FILE: articles/v2/002-sticky-problems.md
================================================
One project that I've always wanted to work on is the creation of a generic table datastructure for Ruby. I've partially solved this problem in a dozen contexts before, but I've never come up with something I've been happy enough with to extract into its own gem. 

Every time I've attempted to work on this project in the past, I've set myself up for failure by thinking of the seemingly endless amount of things that a generic table structure would need to implement. Recently, I approached the problem from a different angle and ended up feeling a lot happier with the way things went. In this article, I share the lessons I learned that helped me attack this very sticky problem.

### Lesson 1: Work on specific cases before attempting to generalize

In the past, I had always been bogged down by thinking of all the possible ways my table structure was going to be used. This time around, I forced myself to think of a single, specific use case to focus on first. Instantly, the idea of of performing some manipulations on sales data came to mind, because I hate the reporting features PayPal gives me.

Typically, I'd start by creating some fake data that was themed to fit this scenario, but lately I've been experimenting more and more with trying to work with real data whenever it isn't too inconvenient. I've had mixed results with that approach, but this time around, a few minutes of cleanup work got me from a nastily formatted CSV file with way too much information to a nice array of arrays in JSON format that's concise enough to let us see the entire contents of the file, as shown here: 

    [["Date","Payments Received","Amount Received",
      "Payment Fees","Net Amount"],
     ["6/30/2011","7","52.00","-3.93","48.07"],
     ["6/29/2011","14","152.00","-8.98","143.02"],
     ["6/28/2011","5","40.00","-2.73","37.27"],
     ["6/27/2011","0","0.00","0.00","0.00"],
     ["6/26/2011","2","12.00","-0.99","11.01"],
     ["6/25/2011","1","4.00","-0.46","3.54"],
     ["6/24/2011","0","0.00","0.00","0.00"],
     ["6/23/2011","1","16.00","-0.76","15.24"],
     ["6/22/2011","2","12.00","-0.95","11.05"],
     ["6/21/2011","0","0.00","0.00","0.00"],
     ["6/20/2011","0","0.00","0.00","0.00"],
     ["6/19/2011","1","16.00","-0.76","15.24"],
     ["6/18/2011","0","0.00","0.00","0.00"],
     ["6/17/2011","1","4.00","-0.42","3.58"],
     ["6/16/2011","0","0.00","0.00","0.00"],
     ["6/15/2011","0","0.00","0.00","0.00"],
     ["6/14/2011","2","36.00","-1.69","34.31"],
     ["6/13/2011","0","0.00","0.00","0.00"],
     ["6/12/2011","0","0.00","0.00","0.00"],
     ["6/11/2011","1","4.00","-0.46","3.54"],
     ["6/10/2011","0","0.00","0.00","0.00"],
     ["6/9/2011","1","4.00","-0.46","3.54"],
     ["6/8/2011","0","0.00","0.00","0.00"],
     ["6/7/2011","1","4.00","-0.46","3.54"],
     ["6/6/2011","0","0.00","0.00","0.00"],
     ["6/5/2011","2","20.00","-1.22","18.78"],
     ["6/4/2011","4","52.00","-3.23","48.77"],
     ["6/3/2011","9","100.00","-6.13","93.87"],
     ["6/2/2011","8","72.00","-4.79","67.21"],
     ["6/1/2011","8","136.00","-6.67","129.33"]]

My next step was to come up with a question about this data that would be easy to answer by visual inspection, and trivial to represent using nothing more than primitive Ruby objects. The question I settled on was, "How many payments were received on 6/14/2011?"

    data = JSON.parse(File.read("sales.json"))
    row = data.find { |x| x[0] == "6/14/2011" }
    p row[1] #=> "2"

If instead I'd chosen a question that required too much thought to answer, the judgmental side of my brain would have kicked in too early and derailed my efforts to get even the smallest start on the problem. However, by picking an extremely simple problem to work on, I managed to turn the judge's voice in my head into a collaborator rather than an interrogator.

The judge looked at these three lines of code and instantly started in with his criticisms.

JUDGE: _"This is just terrible! If the order of the columns in your data changes, this code is going to break! Also, your output is clearly supposed to be numeric, but you're getting back a string. And that JSON call looks ugly too, and your mother is a . . . "_

Addressing all his points right away would have been a bad idea because it would have led me into a spiral of self-doubt. Instead, I just looked for one thing we could agree on so that we had some common ground to start from. The criticism about the column order dependency was a pretty good one, so I decided to work with that.

Whenever I think of good APIs that I've seen for table interactions, the approach ActiveRecord 3 takes always comes to mind. It seemed to fit this particular problem well, so I cautiously asked the judge for his opinion on the following code.

    table = Table.new(data)
    row   = table.where("Date" => "6/14/2011").first

    p row["Payments Received"] #=> "2"

JUDGE: _"Hmm . . . that `first()` call looks weird, but this is a LOT better than your last attempt. I won't be convinced until I see an implementation, though. Also, do you really think baking in the assumption that there are headers in the first row of your data is a good idea?"_

His point about headers was a good one, so—knowing that this was the closest thing I could get to approval from the judge—I started writing some tests that took his suggestion into account.

    describe "Table" do
      it "must be able to search for matching records" do
        fixture_dir = "#{File.dirname(__FILE__)}/fixtures"
        json_data   = File.read("#{fixture_dir}/sales.json")
        
        names, *data  = JSON.parse(json_data)
        table         = Table.new(data, names)

        expected_payments = "2"
        
        match = table.where("Date" => "6/14/2011").first
        match["Payments Received"].must_equal(expected_payments)
      end
    end

Before I even get a chance to run these tests, the judge snapped at me.

JUDGE: _"Whoa, that fixture loading code looks disgusting. Do you really think that you can get away with that while I'm watching?"_

He was right, of course, so I opened up my test helper file and wrote a little method to hide the messy code and isolate it to one place:

    def json_fixture(filename)
      test_dir = File.dirname(__FILE__)
      JSON.parse(File.read("#{test_dir}/fixtures/#{filename}.json"))
    end

Using this helper file, I was able to make my tests look a whole lot better.
   
    describe "Table" do
      it "must be able to search for matching records" do
        names, *data  = json_fixture("sales")
        table         = Table.new(data, names)

        expected_payments = "2"
        
        match = table.where("Date" => "6/14/2011").first
        match["Payments Received"].must_equal(expected_payments)
      end
    end

I looked to the judge for approval and got a half-hearted shrug, which was good enough for me. By the time I finished writing these tests, I already had an idea in mind for how to implement a solution.

    class Table
      def initialize(data, attribute_names)
        @data = data.map { |e| Hash[attribute_names.zip(e)] }
      end

      def where(conditions)
        @data.select do |row|
          conditions.all? { |k,v| row[k] == v }
        end
      end
    end

Even though this code passed the test, the judge could no longer contain himself, and fired off another burst of scathing criticism.

JUDGE: _"This code is a giant hack. You store each row in a hash, but hashes are meant for unordered content and a row is necessarily ordered. Yes, I know that in Ruby 1.9 you can iterate over hashes in insert order, but that's going to come back and bite you later. What if you want to introduce a column rename operation in the future? There is no way to do that with your current code without either changing the iteration order or generating entirely new hashes for the entire structure. Even worse, hash keys must be unique. What if you have a table with two column names that are the same? To make matters worse, the code reeks of primitive obsession. Unless you introduce a `Row` object soon, every feature you add to `Table` is going to get more and more complicated because it will have two concerns: representing an ordered list of rows and manipulating the data within those rows. But who says that users are going to want to work with just rows? What if they want to organize their data by columns instead? Oh, and as I was saying about your mom . . . "_

This rant was too much to take in all at once, and I felt overwhelmed. I knew that responding directly to his criticisms line by line would only fan the flames. The points he made about problems I might run into later could have very well been valid, but thinking about them at this particular point in time would have sent me down a deadly path of feature creep. I needed to take a step back and consider the big picture.

### Lesson 2: Seek ways to defer tough design decisions

After catching my breath, I realized that I could address many of the judge's points without actually making any major decisions about implementation details. I could do this by introducing a `Record` object. This object would initially have a core implementation similar to my previous example but would make it much easier to introduce changes down the line. The following test describes what I was shooting for:

    describe "Record" do
      it "must allow access to attributes by name" do
        data       = ["6/14/2011", "2", "36.00", "-1.69", "34.31"]
        names      = ["Date", "Payments Received", "Amount Received", 
                      "Payment Fees", "Net Amount"] 

        record     = Record.new(data, names)

        record["Payments Received"].must_equal("2")
        record["Payment Fees"].must_equal("-1.69")
      end

      it "must support conditional matching" do
        data       = ["6/14/2011", "2", "36.00", "-1.69", "34.31"]
        names      = ["Date", "Payments Received", "Amount Received", 
                      "Payment Fees", "Net Amount"] 

        record     = Record.new(data, names)

        record.matches?("Date" => "6/14/2011").must_equal(true)
        record.matches?("Date" => "6/12/2011").must_equal(false)
      end
    end

To pass these tests, I pushed logic down from the `Table` class into a newly created `Record` class:

    class Record
      def initialize(values, attribute_names)
        @data = Hash[attribute_names.zip(values)]
      end

      def [](key)
        @data[key]
      end

      def matches?(conditions)
        conditions.all? { |k,v| self[k] == v }
      end
    end

The judge was eerily silent as this test went green. Perhaps he was waiting to see what my next move would be, or maybe he had just run out of jokes about my mom. Nonetheless, I took this as my cue to go update my `Table` code so that it would use `Record` objects instead of hashes:

    class Table
      def initialize(data, attribute_names)
        @data = data.map { |e| Record.new(e, attribute_names) }
      end

      def where(conditions)
        @data.select { |record| record.matches?(conditions) }
      end
    end

Just after I reran my `Table` tests and saw that they were passing, the judge said something that I had to ask him to repeat, because it was so surprising.

JUDGE: _"Not bad."_

He refused to elaborate, but I think I finally figured out why he approved of this newer version of my code. After thinking about his previous barrage of complaints, I realized that this new `Table` implementation did not raise any of the same questions. When you look at the problem at hand and then at the object that directly solves that problem, the object looks natural, well focused, and unsurprising. A `Table` is a collection of `Record` objects. The `Table#where` method selects from that collection the records that match the conditions. These explanations are very easy to follow and directly line up with the code itself.

The thing that made this new `Table` code "not bad" in the eyes of the judge is that it is a proper abstraction, whereas my previous implementation was just an indirect wrapper over some primitive operations. My newer code was written with the changing needs for our `Record` object in mind, which is what made all the difference.

### Lesson 3: Let real use cases be your guide, not imagined scenarios

In his various outbursts, the judge pointed out lots of different things that he felt my `Table` code should do. Here is a short list of them, for those who haven't been keeping track:

* Should be able to set column data types (i.e., convert a column that contains numeric strings into numeric values)
* Should be able to deserialize array-of-arrays datastructure from JSON
* Should provide a way to match a single record rather than calling `first()` on the array returned by `Table#where`
* Should preserve data ordering explicitly
* Should support both by column and by row access
* Should be able to rename columns
* Should be able to support multiple columns with the same name

On their own, all these ideas sound like good ones. But taken together, we're talking about a lot of additional work for a nonspecific gain. I know from previous experience that going down this path will lead to a very complex, very bloated object. Take a look at my `Ruport::Data::Table` implementation from several years ago if you want to see just how complicated this sort of thing can get:

https://github.com/ruport/ruport/blob/master/lib/ruport/data/table.rb 

This time around, I'm going to take a very different approach, adding features to my `Table` object only when I have a real project that depends on that feature. Even when there is something the API doesn't support, I will try to work around the problem and see how much pain it causes me. Only after something causes me a lot of pain in one place or a little pain spread across several places will I add new features.

I am very interested to see what kind of code is produced from this sort of aggressive use-case-driven development. But testing this idea is something that we're going to need to do collectively as homework, because not enough time passed between when I started this experiment and when I published the article you are reading now. 

If you'd like to help, please take a look at the following repository on Github and follow the instructions in the README.

http://github.com/sandal/waffle

### Reflections

Hopefully, by following in my footsteps, you were able to notice some similarities to your own struggles with sticky projects. As this article was just a story about an approach that seems to have worked for me, your mileage will probably vary. Still, I'd love to hear what you think of the ideas I mentioned here, especially if you have a different way of dealing with this kind of problem.


================================================
FILE: articles/v2/003-closure-double-edged-sword.md
================================================
Every `Proc` object is a closure, which means that each code block you write captures references to data from its surrounding scope for later use. Although that sounds highly academic, it has some very practical benefits that you're probably already aware of, as well as some drawbacks that you may or may not know about.

### Closures make block-based APIs feel natural

The closure property of `Proc` objects is what makes the following snippet of code possible:

```ruby
class Vector
  def initialize(data)
    @data = data
  end

  def *(num)
    @data.map { |e| e * num }
  end
end

>> Vector.new([1,2,3]) * 7
=> [7, 14, 21]
```

In this example, when we call `@data.map` and provide it with a code block to execute, we have no trouble accessing the `num` variable. However, this local variable is not defined within the block's local scope; it's defined within the enclosing scope (the `Vector#*`) method. To see that these are truly two different scopes, check out the following examples, which clarify the relationship between the `Proc` object's code and its enclosing scope.

```ruby
def proc_can_see_outer_scope_locals
  y = 10
  lambda { p defined?(y) }.call
end

def proc_can_modify_outer_scope_locals
  y = 10
  lambda { y = 20 }.call
  p y
end

def proc_destroys_block_local_vars_on_exit
  lambda { y = 10 }.call
  p defined?(y)
end

proc_can_see_outer_scope_locals          #=> "local-variable"
proc_can_modify_outer_scope_locals       #=> 20
proc_destroys_block_local_vars_on_exit   #=> nil
```

The first example demonstrates that a `Proc` object's code can access the local variables of its enclosing scope, which is exactly what is going on in our `Vector` example. The second example is an answer to a question that arises naturally from the first example, which is whether the `Proc` object's code can modify the contents of the local variables that are defined in its enclosing scope. The third example simply verifies that once the `Proc` object's code has been called, any variables set up within its own code block are wiped out and are not visible from the outer scope.

### Closures make memory management complicated

Though they take some getting used to, the behaviors provided by the closure property in `Proc` objects are relatively easy to understand and have many practical benefits. However, they do give rise to a complex behavior that sometimes leads to surprising results. Check out the following example for a bit of a head trip.

``` ruby
def new_counter
  x = 0
  lambda { x += 1 }
end

counter_a = new_counter
counter_a.call
counter_a.call

p counter_a.call #=> 3

counter_b = new_counter
p counter_b.call #=> 1

p counter_a.call #=> 4
```

In the example code, we see that the two `Proc` objects returned by the `new_counter()` method are referencing two different locations in memory. This behavior is a bit confusing because we can usually count on methods to clean up after themselves them once they wrap up whatever they are doing. But because the purpose of a `Proc` object is in part to be able to delay the execution of code, it's impossible for the `new_counter()` method to do this cleanup task for us. So here's what happens: `counter_a` gets a reference to the local variable `x` that was set up the first time we called `new_counter()`, and `counter_b` gets a reference to a different local variable `x` that was set up the second time we called `new_counter()`.

If used correctly, this behavior can be a feature. It's not one that you or I would use every day, but because this approach can be used to maintain state in a purely functional way, it is at least academically interesting. However, in most ordinary use cases, it is much more likely that this behavior is going to cause a memory leak than that it will do anything helpful for you, as it leads to lots of seemingly throwaway data stored in local variables getting dangling references that prevent that data from being garbage collected.

### Not all closure-based memory leaks are so obvious

Capturing references to locals from the enclosing scope for longer than necessary isn't the only way that you can cause leaks with `Proc` objects. Every `Proc` object also creates a reference to the object that it was defined within, creating a leak that is even harder to notice. Let's take a look at an example of how that can come back to bite you.

Suppose we have a configurable logger module and we want to record a message to the logs each time a new `User` object is created. If we were going for something simple and straightforward, we might end up with code similar to what you see here:

```ruby
module Logger
  extend self

  attr_accessor :output

  def log(msg)
    output << "#{msg}\n"
  end
end

class User
  def initialize(id)
    @id = id
    Logger.log("Created User with ID #{id}")
  end
end
```

But if we wanted to be a bit more fancy, we could build a logger that delayed the writing of the logs until we explicitly asked for them to be written. We could use `Proc` objects for lazy evaluation, giving us a potential speed boost whenever we didn't actually need to view our logs.

```ruby
module LazyLogger
  extend self

  attr_accessor :output

  def log(&callback)
    @log_actions ||= []
    @log_actions << callback
  end

  def flush
    @log_actions.each { |e| e.call(output) }
  end
end

class User
  def initialize(id)
    @id = id
    LazyLogger.log { |out| out << "Created User with ID #{id}" }
  end
end
```

Although this code may look simple, it has a subtle memory leak. The leak can be verified via the following simple script, which shows that 1000 users still exist in the system, even though the objects were created as throwaway objects:

```ruby
LazyLogger.output = ""
1000.times { |i| User.new(i) }

GC.start
p ObjectSpace.each_object(User).count #=> 1000
```

If instead we use our more vanilla code that does not use `Proc` objects, we see that for the most part*, the garbage collector has done its job.

```ruby
Logger.output = ""
1000.times { |i| User.new(i) }

GC.start

# (*): I expected below to be 0, but GC clearly ran. Weird.
p ObjectSpace.each_object(User).count #=> 1
```

Our `LazyLogger` leaks because when `LazyLogger.log` is called with a block from within `User#initialize`, a new `Proc` object is created that holds a reference to that user object. That `Proc` object ends up getting stored in the `@log_actions` array in `LazyLogger` module and needs to be kept alive at least until `LazyLogger.flush` is called in order for everything to work as expected. Thus our `User` objects that we expected to get thrown away still have live references to them, so they don't end up getting garbage collected.

These kinds of problems can be very easy to run into and very hard to work around. In fact, I've have been having trouble figuring out how to preserve the `LazyLogger` behavior in a way that'd plug the leak or at least mitigate it somewhat. In this particular case, it'd be possible to call `clear` on the `@log_actions` array whenever `flush` is called, and that would free up the references to the `User` instances. But that approach still ends up keeping unnecessary references alive longer than you might want, and the pattern doesn't necessarily apply generally to other scenarios.

### Reflections

Because we use code blocks so freely and tend to ignore the closure property, many Ruby applications and libraries have memory leaks in them. Even fairly experienced developers (myself included) don't necessarily design with these issues in mind. Those who do have firm memory constraints to deal with are forced to use a variety of awkward techniques to overcome this problem. 

One possible way to avoid closure-based memory leaks is to use `Method` objects in place of `Proc` objects wherever the closure properties are not required. Another option is to create `Proc` objects in a different context to avoid accidental references to objects that you want garbage collected. In fact, I recently needed to use the latter approach in order to make use of `ObjectSpace.define_finalizer`. Although that's a bit of an obscure topic, it's a good example of what we've just been talking about, so I recommend checking out [this article by Mike Perham](http://www.mikeperham.com/2010/02/24/the-trouble-with-ruby-finalizers/). 

I don't want to give much more advice on handling memory management, because it's not an area in which I'm particularly strong. I welcome any corrections to what I've said here, if you find that I've made a mistake anywhere.

### Questions/Discussion Points

This article wasn't especially long, but the material is quite dense, and I don't want to push my luck by covering too many concepts at once. That said, I've provided a few exercises for those who want to dig a bit deeper and would be happy to continue discussing the topic in general now that we have a starting point. Leave a comment if something is on your mind!

* Find a good example of an API that allows you to use a `Method` object in place of a `Proc` object, or create your own. Investigate the performance and memory differences between the two approaches by writing benchmarks.

* Use two different approaches to implement your own `attr_accessor` function: one using the closure-based `define_method`, and another using `module_eval` with a string. Compare the performance characteristics of calling the dynamically defined methods and try to reason about why one is faster than the other, if they are not comparable to one another.

* Share your thoughts on when you need to worry about the downsides of closures, and when you don't. Come up with some metrics for determining what issues to look out for.

* CHALLENGE: Explain why my `ObjectSpace` example showed 1 `User` instance, not 0!


================================================
FILE: articles/v2/004-roll-your-own-enumerable-and-enumerator.md
================================================
When I first came to Ruby, one of the things that impressed me the most was the killer features provided by the `Enumerable` module. I eventually also came to love `Enumerator`, even though it took me a long time to figure out what it was and what one might use it for.

As a beginner, I had always assumed that these features worked through some dark form of magic that was buried deep within the Ruby interpreter. With so much left to learn just in order to be productive, I was content to postpone learning the details about what was going on under the hood. After some time, I came to regret that decision, thanks to David A. Black.

David teaches Ruby to raw beginners not only by showing them what `Enumerable` can do, but also by making them implement their own version of it! This is a profoundly good exercise, because it exposes how nonmagical the features are: if you understand `yield`, you can build all the methods in `Enumerable`. Similarly, the interesting features of `Enumerator` can be implemented fairly easily if you use Ruby's `Fiber` construct.

In this article, we're going to work through the exercise of rolling your own subset of the functionality provided by `Enumerable` and `Enumerator`, discussing each detail along the way. Regardless of your skill level, an understanding of the elegant design of these constructs will undoubtedly give you a great source of inspiration that you can draw from when designing new constructs in your own programs.

### Setting the stage with some tests

I've selected a small but representative subset of the features that `Enumerable` and `Enumerator` provide and written some tests to nail down their behavior. These tests will guide my implementations throughout the rest of this article and serve as a roadmap for you if you'd like to try out the exercise on your own.

If you have some time to do so, try to get at least some of the tests to go green before reading my implementation code and explanations, as you'll learn a lot more that way. But if you're not planning on doing that, at least read through the tests carefully and think about how you might go about implementing the features they describe.

```ruby
class SortedList
  include FakeEnumerable

  def initialize
    @data = []
  end

  def <<(new_element)
    @data << new_element
    @data.sort!
   
    self
  end
  
  def each
    if block_given?
      @data.each { |e| yield(e) }
    else
      FakeEnumerator.new(self, :each)
    end
  end 
end

require "minitest/autorun"

describe "FakeEnumerable" do
  before do
    @list = SortedList.new

    # will get stored interally as 3,4,7,13,42
    @list << 3 << 13 << 42 << 4 << 7
  end

  it "supports map" do
    @list.map { |x| x + 1 }.must_equal([4,5,8,14,43])  
  end

  it "supports sort_by" do
    # ascii sort order
    @list.sort_by { |x| x.to_s }.must_equal([13, 3, 4, 42, 7])
  end

  it "supports select" do
    @list.select { |x| x.even? }.must_equal([4,42])
  end

  it "supports reduce" do
    @list.reduce(:+).must_equal(69)
    @list.reduce { |s,e| s + e }.must_equal(69)
    @list.reduce(-10) { |s,e| s + e }.must_equal(59)
  end
end

describe "FakeEnumerator" do
  before do
    @list = SortedList.new

    @list << 3 << 13 << 42 << 4 << 7
  end

  it "supports next" do
    enum = @list.each

    enum.next.must_equal(3)
    enum.next.must_equal(4)
    enum.next.must_equal(7)
    enum.next.must_equal(13)
    enum.next.must_equal(42)

    assert_raises(StopIteration) { enum.next }
  end

  it "supports rewind" do
    enum = @list.each

    4.times { enum.next }
    enum.rewind

    2.times { enum.next }
    enum.next.must_equal(7)
  end

  it "supports with_index" do
    enum     = @list.map
    expected = ["0. 3", "1. 4", "2. 7", "3. 13", "4. 42"]  

    enum.with_index { |e,i| "#{i}. #{e}" }.must_equal(expected)
  end
end
```

If you do decide to try implementing these features yourself, get as close to the behavior that Ruby provides as you can, but don't worry if your implementation is different from what Ruby really uses. Just think of this as if it's a new problem that needs solving, and let the tests guide your implementation. Once you've done that, read on to see how I did it.

### Implementing the `FakeEnumerable` module

Before I began work on implementing `FakeEnumerable`, I needed to get its tests to a failure state rather than an error state. The following code does exactly that:

```ruby
module FakeEnumerable
  def map 
  end

  def select
  end

  def sort_by
  end

  def reduce(*args)
  end
end
```

I then began working on implementing the methods one by one, starting with `map`. The key thing to realize while working with `Enumerable` is that every feature will build on top of the `each` method in some way, using it in combination with `yield` to produce its results. The `map` feature is possibly the most straightforward nontrivial combination of these operations, as you can see in this implementation:

```ruby
def map 
  out = []

  each { |e| out << yield(e) }

  out
end
```

Here we see that `map` is simply a function that builds up a new array by taking each element and replacing it with the return value of the block you provide to it. We can clean this up to make it a one liner using `Object#tap`, but I'm not sure if I like that approach because it breaks the simplicity of the implementation a bit. That said, I've included it here for your consideration and will use it throughout the rest of this article, just for the sake of brevity.

```ruby
def map
  [].tap { |out| each { |e| out << yield(e) } }
end
```

Implementing `select` is quite easy as well. It builds on the same concepts used to implement `map` but adds a conditional check to see whether the block returns a `true` value. For each new yielded element, if the value returned by the block is logically true, the element gets added to the newly built array; otherwise, it does not.

```ruby
def select
  [].tap { |out| each { |e| out << e if yield(e) } }
end
```

Implementing `sort_by` is a little more tricky. I cheated and looked at the API documentation, which (perhaps surprisingly) describes how the method is implemented and even gives a reference implementation in Ruby. Apparently, `sort_by` uses a [Schwartzian transform](http://en.wikipedia.org/wiki/Schwartzian_transform) to convert the collection we are iterating over into tuples containing the sort key and the original element. It then uses `Array#sort` to put these in order, and it finally uses `map` on the resulting array to convert the array of tuples back into an array of the elements from the original collection. That's definitely more confusing to explain than it is to implement in code, so just look at the following code for clarification:

```ruby
def sort_by
  map { |a| [yield(a), a] }.sort.map { |a| a[1] }
end
```

The interesting thing about this implementation is that `sort_by` is dependent on `map`, both on the current collection being iterated over as well as on the `Array` it generates. But after tracing it down to the core, this method is still expecting the collection to implement only the `each` method. Additionally, because `Array#sort` is thrown into the mix, your sort keys need to respond to `<=>`. But for such a powerful method, the contract is still very narrow.

Implementing `reduce` is a bit more involved because it has three different ways of interacting with it. It's also interesting because it's one of the few `Enumerable` methods that isn't necessarily designed to return an `Array` object. I'll let you ponder the following implementation a bit before providing more commentary, because reading through it should be a good exercise. 

```ruby
def reduce(operation_or_value=nil)
  case operation_or_value
  when Symbol
    # convert things like reduce(:+) into reduce { |s,e| s + e }
    return reduce { |s,e| s.send(operation_or_value, e) }
  when nil
    acc = nil
  else
    acc = operation_or_value
  end

  each do |a|
    if acc.nil?
      acc = a
    else
      acc = yield(acc, a)
    end
  end

  return acc
end
```

First, I have to say I'm not particularly happy with my implementation; it seems a bit too brute force and I think I might be missing some obvious refactorings. But it should have been readable enough for you to get a general feel for what's going on. The first paragraph of code is simply handling the three different cases of `reduce()`. The real operation happens starting with our `each` call.

Without a predefined initial value, we set the initial value to the first element in the collection, and our first yield occurs starting with the second element. Otherwise, the initial value and first element are yielded. The purpose of `reduce()` is to perform an operation on each successive value in a list by combining it in some way with the last calculated value. In this way, the list gets reduced to a single value in the end. This behavior explains why the old alias for this method in Ruby was called `inject`: a function is being injected between each element in the collection via our `yield` call. I find this operation much easier to understand when I'm able to see it in terms of primitive concepts such as `yield` and `each` because it makes it possible to trace exactly what is going on.

If you are having trouble following the implementation of `reduce()`, don't worry about it. It's definitely one of the more complex `Enumerable` methods, and if you try to implement a few of the others and then return to studying `reduce()`, you may have better luck. But the beautiful thing is that if you ignore the `reduce(:+)` syntactic sugar, it introduces no new concepts beyond that what is used to implement `map()`. If you think you understand `map()` but not `reduce()`, it's a sign that you may need to brush up on your fundamentals, such as how `yield` works.

If you've been following along at home, you should at this point be passing all your `FakeEnumerable` tests. That means it's time to get started on our `FakeEnumerator`.

### Implementing the `FakeEnumerator` class

Similar to before, I needed to write some code to get my tests to a failure state. First, I set up the skeleton of the `FakeEnumerator` class.

```ruby
class FakeEnumerator
  def next
  end

  def with_index
  end

  def rewind
  end
end
```

Then I realized that I needed to back and at least modify the `FakeEnumerable#map` method, as my tests rely on it returning a `FakeEnumerator` object when a block is not provided, in a similar manner to the way `Enumerable#map` would return an `Enumerator` in that scenario.

```ruby
module FakeEnumerable
  def map 
    if block_given?
      [].tap { |out| each { |e| out << yield(e) } }
    else
      FakeEnumerator.new(self, :map)
    end
  end
end
```

Although, technically speaking, I should have also updated all my other `FakeEnumerable` methods, it's not important to do so because our tests don't cover it and that change introduces no new concepts to discuss. With this change to `map`, my tests all failed rather than erroring out, which meant it was time to start working on the implementation code.

But before we get started, it's worth reflecting on the core purpose of an `Enumerator`, which I haven't talked about yet. At its core, an `Enumerator` is simply a proxy object that mixes in `Enumerable` and then delegates its `each` method to some other iterator provided by the object it wraps. This behavior turns an internal iterator into an external one, which allows you to pass it around and manipulate it as an object. 

Our tests call for us to implement `next`, `rewind`, and `each_index`, but before we can do that meaningfully, we need to make `FakeEnumerator` into a `FakeEnumerable`-enabled proxy object. There are no tests for this because I didn't want to reveal too many hints to those who wanted to try this exercise at home, but this code will do the trick:

```ruby
class FakeEnumerator
  include FakeEnumerable

  def initialize(target, iter) 
    @target = target
    @iter   = iter
  end

  def each(&block)
    @target.send(@iter, &block) 
  end

   # other methods go here...
end
```

Here we see that `each` uses `send` to call the original iterator method on the target object. Other than that, this is the ordinary pattern we've seen in implementing other collections. The next step is to implement our `next` method, which is a bit tricky.

What we need to be able to do is iterate once, then pause and return a value. Then, when `next` is called again, we need to be able to advance one more iteration and repeat the process. We could do something like run the whole iteration and cache the results into an array, then do some sort of indexing operation, but that's both inefficient and impractical for certain applications. This problem made me realize that Ruby's `Fiber` construct might be a good fit because it specifically allows you to jump in and out of a chunk of code on demand. So I decided to try that out and see how far I could get. After some fumbling around, I got the following code to pass the test:

```ruby
# loading the fiber stdlib gives us some extra features, including Fiber#alive?
require "fiber" 

class FakeEnumerator
  def next
    @fiber ||= Fiber.new do
      each { |e| Fiber.yield(e) }

      raise StopIteration
    end

    if @fiber.alive?
      @fiber.resume 
    else
      raise StopIteration
    end
  end
end
```

This code is hard to read because it isn't really a linear flow, but I'll do my best to explain it using my very limited knowledge of how the `Fiber` construct works. Basically, when you call `Fiber#new` with a block, the code in that block isn't executed immediately. Instead, execution begins when `Fiber#resume` is called. Each time a `Fiber#yield` call is encountered, control is returned to the caller of `Fiber#resume` with the value that was passed to `Fiber#yield` returned. Each subsequent `Fiber#resume` picks up execution back at the point where the last `Fiber#yield` call was made, rather than at the beginning of the code block. This process continues until no more `Fiber#yield` calls remain, and then the last executed line of code is returned as the final value of `Fiber#resume`. Any additional attempts to call `Fiber#resume` result in a `FiberError` because there is nothing left to execute.

If you reread the previous paragraph a couple of times and compare it to the definition of my `next` method, it should start to make sense. But if it's causing your brain to melt, check out the [Fiber documentation](http://ruby-doc.org/core-1.9/classes/Fiber.html), which is reasonably helpful. 

The very short story about this whole thing is that using a `Fiber` in our `next` definition lets us keep track of just how far into the `each` iteration we are and jump back into the iterator on demand to get the next value. I prevent the `FiberError` from ever occurring by checking to see whether the `Fiber` object is still alive before calling `resume`. But I also need to make it so that the final executed statement within the `Fiber` raises a `StopIteration` error as well, to prevent it from returning the result of `each`, which would be the collection itself. This is a kludge, and if you have a better idea for how to handle this case, please leave me a comment.

The use of `Fiber` objects to implement `next` makes it possible to work with infinite iterators, such as `Enumerable#cycle`. Though we won't get into implementation details, the following code should give some hints as to why this is a useful feature:

```ruby
>> row_colors = [:red, :green].cycle
=> #<Enumerator: [:red, :green]:cycle>
>> row_colors.next
=> :red
>> row_colors.next
=> :green
>> row_colors.next
=> :red
>> row_colors.next
=> :green
```

As cool as that is, and as much as it makes me want to dig into implementing it, I have to imagine that you're getting tired by now. Heck, I've already slept twice since I started writing this article! So let's hurry up and finish implementing `rewind` and `each_index` so that we can wrap things up.

I found a way to implement `rewind` that is trivial, but something about it makes me wonder if I've orphaned a `Fiber` object somewhere and whether that has weird garbage collection mplications. But nonetheless, because our implementation of `next` depends on the caching of a `Fiber` object to keep track of where it is in its iteration, the easiest way to rewind back to the beginning state is to simply wipe out that object. The following code gets my `rewind` tests passing:

```ruby
def rewind
  @fiber = nil
end
```

Now only one feature stands between us and the completion of our exercise: `with_index`. The real `with_index` method in Ruby is much smarter than what you're about to see, but for its most simple functionality, the following code will do the trick:

```ruby
def with_index
  i = 0
  each do |e| 
    out = yield(e, i)
    i += 1
    out
  end
end
```

Here, I did the brute force thing and maintained my own counter. I then made a small modification to control flow so that rather than yielding just the element on each iteration, both the element and its index are yielded. Keep in mind that the `each` call here is a proxy to some other iterator on another collection, which is what gives us the ability to call `@list.map.with_index` and get `map` behavior rather than `each` behavior. Although you won't use every day, knowing how to implement an around filter using `yield` can be quite useful.

With this code written, my full test suite finally went green. Even though I'd done these exercises a dozen times before, I still learned a thing or two while writing this article, and I imagine there is still plenty left for me to learn as well. How about you?

### Reflections

This is definitely one of my favorite exercises for getting to understand Ruby better. I'm not usually big on contrived practice drills, but there is something about peeling back the magic on features that look really complex on the surface that gives me a great deal of satisfaction. I find that even if my solutions are very much cheap counterfeits of what Ruby must really be doing, it still helps tremendously to have implemented these features in any way I know how, because it gives me a mental model of my own construction from which to view the features.

If you enjoyed this exercise, there are a number of things that you could do to squeeze even more out of it. The easiest way to do so is to implement a few more of the `Enumerable` and `Enumerator` methods. As you do that, you'll find areas where the implementations we built out today are clearly insufficient or would be better off written another way. That's fine, because it will teach you even more about how these features hang together. You can also discuss and improve upon the examples I've provided, as there is certainly room for refactoring in several of them. Finally, if you want to take a more serious approach to things, you could take a look at the tests in [RubySpec](https://github.com/rubyspec/rubyspec) and the implementations in [Rubinius](https://github.com/rubinius/rubinius). Implementing Ruby in Ruby isn't just something folks do for fun these days, and if you really enjoyed working on these low-level features, you might consider contributing to Rubinius in some way. The maintainers of that project are amazing, and you can learn a tremendous amount that way.

Of course, not everyone has time to contribute to a Ruby implementation, even if it's for the purpose of advancing their own understanding of Ruby. So I'd certainly settle for a comment here sharing your experiences with this exercise.


================================================
FILE: articles/v2/005-regression-testing.md
================================================
The most simple and unambiguous benefit that automated testing provides is that it can be used to minimize the impact of regressions. If each bug fix is accompanied by a corresponding test, it makes it very likely that known defects will be noticed immediately whenever they resurface. Over the lifetime of a project, regression tests end up forming a safety net that makes refactoring easier and maintenance work less scary.

In this article, we will look at a couple of real bugs that were fixed in practicingruby.com and the tests that are meant to prevent them from recurring. Working on these fixes taught Jordan and me two valuable lessons that I think are worth sharing. For some, they will serve as evidence of why you should be writing regression tests; for others, they will serve more as a reminder of why the time you're investing into your tests is worthwhile.

_As a bit of a warning to those expecting sage advice in this article, I am in no way an expert at Rails testing, and even my Ruby testing skills are probably average. But this newsletter is about practicing Ruby, not just pontificating about it. I'm not afraid to show some of my own weak spots from time to time, as long as we can all learn something from it._

### LESSON 1: More granular tests make for better regression suites  

We now have support for Github-style mentions in our comments here on practicingruby.com, but the initial version of this feature was much more fragile than the one that is now in place. When Jordan first demonstrated the feature to me, I immediately tried to break it by manually testing how it dealt with case sensitivity, punctuation, and mentions of users who were not subscribed to Practicing Ruby. I succeeded at demonstrating bad behavior for two out of three of these cases, so Jordan quickly coded up a fix and an accompanying test for that fix. The test he wrote is shown here:

```ruby
test "returns an array of valid users mentioned" do
  person = Factory(:user, :github_nickname => "PerSon")
  frank = Factory(:user, :github_nickname => "frank-pepelio")

  comment = Factory(:comment,
    :body => "I mention @person: and @FRank-pepelio but @noexist isn't there")

  mentioned_users = comment.mentioned_users

  assert_equal 2, mentioned_users.count
  assert mentioned_users.include?(person)
  assert mentioned_users.include?(frank)
end
```

Seeing this test go green was sufficient evidence for me that after the fix, the feature was working as I'd expect it to. We committed it, and for the moment, the problem was solved. But when I took a second look at this test while preparing this article, I decided that I should rewrite it. Can you guess why?

If you guessed that it was because I have some deep-rooted hatred for Frank Pepelio, unfortunately that's not correct. But if instead you guessed that it was because I thought this test was covering too many different behaviors at once, you were spot on. I came up with the following tests as a suitable replacement.

```ruby
test "returns an array of valid users mentioned" do
  person = Factory(:user, :github_nickname => "PerSon")
  frank = Factory(:user, :github_nickname => "frank-pepelio")

  comment = Factory(:comment,
    :body => "I mention @PerSon and @frank-pepelio")

  mentioned_users = comment.mentioned_users

  assert_equal 2, mentioned_users.count
  assert mentioned_users.include?(person)
  assert mentioned_users.include?(frank)
end

test "omits mentioned users that do not have a matching user record" do
  frank = Factory(:user, :github_nickname => "frank-pepelio")

  comment = Factory(:comment,
    :body => "I mention @frank-pepelio and @noexist")

  mentioned_users = comment.mentioned_users

  assert_equal [frank], mentioned_users
end

test "match mentioned users without case sensitivity" do
  frank = Factory(:user, :github_nickname => "frank-pepelio")

  comment = Factory(:comment,
    :body => "I mention @FRANK-pepelio")

  mentioned_users = comment.mentioned_users

  assert_equal [frank], mentioned_users
end

test "allows user mentions to include punctuation" do
  frank = Factory(:user, :github_nickname => "frank-pepelio")
  person = Factory(:user, :github_nickname => "person")

  comment = Factory(:comment, :body => "@person, @frank-pepelio: YAY!")
  mentioned_users = comment.mentioned_users

  assert_equal 2, mentioned_users.count
  assert mentioned_users.include?(person)
  assert mentioned_users.include?(frank)
end
```

These tests are a whole lot more verbose than the ones Jordan wrote, even though they cover the same set of behaviors. However, the extra effort of writing tests this way gains us quite a bit. Our new tests are much more granular, making the expectations for this feature more explicit and easier to understand for those reading the tests. More important, though, the increased granularity extends to feedback provided by test failures. If we make a change that causes our mentions to no longer be case sensitive, we get a failure in the specific test about case sensitivity, rather than in a catch-all test designed to fully define the behavior of the feature. 

The value of your test suite as a safety net against regressions is directly related to the granularity of your tests. For any given feature, the more complex it is, the greater the importance of granular testing. Additionally, the more features you have, the more difficult it will be to keep track of the edge cases of any single feature unless you break out each of its behaviors explicitly. Because of the multiplicative effect between these two things, it means that most of the time, it's worth it to make your test more granular, even if doing so takes a bit more time, effort, and code.

### LESSON 2: Testing your critical paths is important

Part of the process of rolling out our new notification features (including mentions) was to set up background job processing for sending emails. We decided to try out [MailHopper](https://github.com/cerebris/mailhopper), but in order to do so, we needed to upgrade to Rails 3.1. The upgrade was relatively straightfoward and Jordan did a fair bit of manual testing to make sure that notification emails were still being sent and that the application as a whole was still working as expected.

Unfortunately, the one area he forgot to check was the code I wrote for linking MailChimp subscriptions to GitHub accounts. This may be some of the most simple code in the the application, but I still managed to use a couple of deprecated APIs without realizing it. If we had some integration tests in place, we probably would have caught these issues, but because we didn't, they didn't surface until several hours after we deployed the Rails 3.1 upgrade.

The issues themselves were downright trivial. In one of our HAML-based views, I had written `- form_for` instead of `= form_for`, which upon upgrading to Rails 3.1 prevented the form from being rendered. In the process of fixing this issue, I stumbled across another problem. It used to be possible to call `ActionMailer` methods by prefixing them with `deliver_`; in Rails 3.1, though, you use an ordinary method call that returns a `Mail` object that you're expected to explicitly call `deliver` on. Because my code was using the old syntax, it ended up raising a `NoMethodError` rather than sending an email once we did the upgrade. It took only a few minutes to fix these issues, but the fact that they cropped up at all was a sign of a deeper problem.

In the early stages of building out practicingruby.com, we avoided writing tests because the code was very simple and because doing a manual check of all the features took no more than a few minutes before each deploy. I wanted to focus on content generation, and I didn't want to overwhelm Jordan with too much work on Practicing Ruby because it would take his focus off of Mendicant University. But, as we fast-forward a couple weeks, the application is now complex enough that not writing tests is costing us time rather than saving us time. Even if we still don't necessarily need 100% test coverage, we need to make sure that we've got the critical paths through this application covered so that the site can remain a good experience for our readers.

With that in mind, I set out to write integration tests that cover the process of linking your GitHub account to your MailChimp subscription from end to end. I started with the easy case of when your GitHub email address matches the one you subscribed to this newsletter with. The code below is my first rough draft of a test that walks through that process.

```ruby
test "github autolinking" do
  Factory.create(:user, :email => "gregory.t.brown@gmail.com")

  OmniAuth.config.add_mock(:github, {
    :uid => '12345',
    :user_info => {
      :nickname => 'sandal',
      :email    => "gregory.t.brown@gmail.com"
    }
  })

  visit community_url

  auth_link = Authorization.find_by_github_uid("12345").
                            authorization_link


  assert_equal authorization_link_path(auth_link), current_path

  refute_empty ActionMailer::Base.deliveries

  mail = ActionMailer::Base.deliveries.first
  ActionMailer::Base.deliveries.clear

  assert_equal ["gregory.t.brown@gmail.com"], mail.to

  visit "/sessions/link/#{auth_link.secret}"
  assert_equal articles_path, current_path
end
```

I was initially expecting this test to pass, but upon running, it ended up with a failure. I had forgotten that my previous manual testing was only of the more complex workflow, which has you explictly enter your email address. The fact that the same confirmation email gets sent by our application in two different ways is a sign that there is some violation of DRY going on, but the more immediate realization I had was that the setup costs of testing these different scenarios manually was causing both Jordan and me to cut corners and to not test all the paths we should have been testing. Fortunately, rewriting another `ActionMailer` call got this test to go green. 

No longer feeling confident in my ability to weed out these errors from memory, I did a quick projectwide search with ack on the word "deliver" to confirm that this was indeed the last instance of this particular bug. After I was sure that was the case, I was able to move onto the workflow that caused me to detect this bug in the first place: our manual MailChimp-to-GitHub linking process. It is more or less the same set of steps, but involves filling out a form field before the confirmation email gets sent. 

```ruby
test "github manual linking" do
  Factory.create(:user, :email => "gregory.t.brown@gmail.com")

  OmniAuth.config.add_mock(:github, {
    :uid => '12345',
    :user_info => {
      :nickname => 'sandal',
      :email    => "test@test.com"
    }
  })

  visit community_url

  auth_link = Authorization.find_by_github_uid("12345").
                            authorization_link


  assert_equal edit_authorization_link_path(auth_link), current_path
  fill_in "authorization_link_mailchimp_email", 
          :with => "gregory.t.brown@gmail.com"
  click_button("Link this email address to my Github account")

  refute_empty ActionMailer::Base.deliveries
  mail = ActionMailer::Base.deliveries.first
  ActionMailer::Base.deliveries.clear

  assert_equal ["gregory.t.brown@gmail.com"], mail.to

  auth_link.reload

  visit "/sessions/link/#{auth_link.secret}"
  assert_equal articles_path, current_path
end
```

Because I had already fixed the issues that would have prevented these tests from passing, this test went green without any additional modifications to the application. However, when writing regression tests, it's important to be able to verify that your tests are able to detect the defects they're meant to protect against. To do so, I went ahead and reverted each of my fixes one by one, then reapplied them to confirm that without each fix, the tests fail, and with them, the tests pass. This check isn't quite a substitute for writing tests before writing code but does at least help ensure that your tests are valid.

Now that the immediate concern of having some tests to accompany my bug fixes was resolved, I turned my eye to the messy code I had just written and found it in dire need of refactoring. Because I typically only ever worked far down the Rails stack in my consulting work, proper integration testing is a skill I never picked up. But just my sense of Ruby coding in general made me realize that I could do some simple extractions to at least hide some of the nasty stuff this code was doing. The following tests show what I ended up with.

```ruby
test "github autolinking" do
  email = "gregory.t.brown@gmail.com"
  uid   = "12345"

  create_user(:email => email)
  login(:nickname => "sandal", :email => email, :uid => uid)

  visit community_url
  get_authorization_link(uid)

  assert_confirmation_sent(email)

  assert_activated
end

test "github manual linking" do
  mailchimp_email = "gregory.t.brown@gmail.com"
  github_email    = "test@test.com"
  uid             = "12345"

  create_user(:email => mailchimp_email)
  login(:nickname => "sandal", :email => github_email, :uid => uid)

  visit community_url
  get_authorization_link(uid)

  assert_email_manually_entered(mailchimp_email)

  assert_confirmation_sent(mailchimp_email)

  assert_activated
end
```

To get the code to look like this, I didn't do anything fancy. I just did ordinary method extractions by pushing paragraphs of code down into functions. In the event that you want to see exactly what changes I made, you can check out [this gist](https://gist.github.com/3829d5bfc124c3640c5b), which contains the entire test file. The end result provides a fairly nice high-level description of each step of these two scenarios.

Writing these tests took me longer than I would have liked, in part because I'm not that comfortable with integration testing in Rails, but also because there are a lot of moving parts to consider. However, because this is a multistep process that involves nontrivial setup, repeated manual testing would quickly end up taking up more of my time than writing these tests did. For that reason, I definitely suggest writing up some integration tests for whatever critical paths you have in your applications.

To make a long story short, the more annoying your workflows are to test manually, the more important it is for you to write automated tests that do the job for you. Also, your critical paths through your application really ought to be covered with automated tests so that they don't just go up in smoke without you noticing. The good news is that your integration tests needn't be bulletproof in order to be useful.

### Reflections

I am trying to reach two audiences at once with this article: those who'd benefit from seeing how bug fixes can be accompanied by tests to help prevent regressions, and those who have experience with Rails testing who can discuss strategies for how to improve the tests I wrote.

To be brutally honest, this newsletter is called Practicing Ruby and not Practicing Rails for a reason: I'm not really a Rails developer. However, I imagine that most of our readers have at least some experience with Rails and that many of you may be much stronger at testing Rails applications than I am. My hope is that you will take this opportunity to learn by teaching and tell me a thing or two about how I could have made my integration tests better.

In particular, I am made uneasy by the large amount of global state that I'm having to manage in my integration tests. Is this a standard practice? If not, what are the alternatives? Am I supposed to use more mock objects to avoid these situations? Should I be designing my applications differently to make them easier to test? If so, what changes could I make to my implementation code to make these tests easier to write? These are all questions that ran through my mind as I was working on this article, and I'd appreciate any links to good resources that might help me answer them, or some advice based on your own experiences.


================================================
FILE: articles/v2/006-learning-new-things-step-by-step.md
================================================
One of the lessons I always try to teach programmers of various skill levels is that it is very important to work in small steps. This approach is especially important when you're learning a new tool or technique, due to all the unexpected issues that can crop up in uncharted territory. Most folks seem to conceptually understand the value of working in small iterations, yet still bite off more than they can chew on a consistent basis because the question of "how small is small enough?" is a hard one to answer.

In this article, I have written up the steps I took to familiarize myself with the game library [Ray](http://mon-ouie.github.com/projects/ray.html). Although I am somewhat familiar with vector graphics, I've never built an arcade game before in any programming language, so it's genuinely new territory for me. Regardless of whether you have experience with this sort of programming, you should thus be able to follow along in my footsteps and have a similar experience to mine.

## Building a simple arcade game in 13 steps

Originally, I'd planned to build a fairly complete Pac-Man clone, but then I realized that process would be a bit too complicated to explain in a single article. So I decided to instead go with a more basic rule set that would still keep some of the Pac-Man style gameplay intact.

The game I came up with is called "Goodies and Baddies," and the rules are very simple. You play as a small red rectangle on a 640x480 screen, and you move around using your keyboard's arrow keys. There are 20 goodies (white rectangles) randomly distributed around the playing area, and your job is to collect them all. However, you need to avoid being captured by the 5 baddies (blue rectangles), who will chase you around the screen as you try to collect the goodies. Touching one of the baddies will cause you to lose, but if you can collect all 20 goodies without getting captured, you win!

After establishing this set of rules, I set out to implement the game. I took notes as I worked and was able to identify 13 distinct steps that I took as I worked towards the final goal. They are listed here and serve as a good blueprint for trying this out at home if you have the time to do so:

* Step 1: Render Ray's "Hello World" example
* Step 2: Render a red 20x20 square to the screen
* Step 3: Get the red square to follow the mouse pointer
* Step 4: Move the square to the left using the left arrow
* Step 5: Allow all arrow keys to move the square
* Step 6: Make the square move a bit faster
* Step 7: Display 20 randomly placed 10x10 white squares
* Step 8: Keep the red square from leaving the screen
* Step 9: Remove white squares when they get covered up 
* Step 10: Display "You win" when all white squares are gone.
* Step 11: Add five randomly placed 15x15 blue squares
* Step 12: Display "You lose" on collision with a blue square
* Step 13: Make the blue squares follow the red square

Those who can't follow along by running my code should still be able to walk through the process virtually by looking at my implementation code while examining the screenshots and videos I've provided. The videos were recorded without any sound and are simply visual aids to make it easier to understand what the code in this article is actually doing.

What follows is a detailed report of my progress on each step. Those wishing to implement the game themselves before reading how I built it should stop reading now and head on over to the [Ray documentation](http://mon-ouie.github.com/projects/ray.html).

## Step 1: Render Ray's "Hello World" example

A "Hello World" example is typically the most simple program that can be written using any system. It is not designed to teach you how a given library or framework is meant to be used but is instead meant to provide a smoke test to make sure that there are no obvious issues with your environment before you take on more serious work.

Getting a "Hello World" example to run is not necessarily a sign that you will have smooth sailing from there on out, but failing to get one to run raises major red flags. That's why I chose running Ray's "Hello World" as our first step, even though we don't need to mess with rendering text until much later in the process.

### Implementation

The following code was taken directly from Ray's website and is simple enough that it's pretty obvious what's going on, even if you haven't worked with the library before.

```ruby
require 'ray'

Ray.game "Hello world!" do
  register { add_hook :quit, method(:exit!) }

  scene :hello do
    @text = text "Hello world!", :angle => 30, :at => [100, 100], :size => 30
    render { |win| win.draw @text }
  end

   scenes << :hello
end
```

### Results

When I ran the "Hello World" example, here is what was rendered in a little 640x480 window on my screen:

<div align="center">
<img width="640" src="http://i.imgur.com/pYJSi.png" alt="" title="Hosted by imgur.com" />
</div>

Though not particularly exciting, it serves the purpose of verifying that the library can at least be loaded up and successfully complete a trivial task. Because Ray has some external dependencies that must be manually installed, this test is especially important.

If we look a little more carefully at the rendered content and compare it to our implementation code, we get a few hints about how Ray works. For example, we can infer that the default background color is black and the default text color is white. We can also infer that it displays the first _scene_  by default without explicitly telling it which scene to render. We also see that it looks like Ray's coordinate system places y=0 at the top of the screen. This placement is pretty common for graphics systems, but it's always good to get the question of "Which way is up?" out of the way as early as possible.

It wouldn't be hard to come up with more questions that might be answerable by tweaking this example a bit, but when I first start learning a new library, I try not to be too adventurous. So rather than getting bogged down in the details, I revisited the documentation to figure out how to render a rectangle to the screen.

## Step 2: Render a red 20x20 square to the screen

Rendering text was a nice start, but because most of this game hinges on manipulting polygons, not words, it was important to test out some basic drawing operations right away. Because Ray's documentation includes a whole section on polygons, this next step was quite easy to work through.

### Implementation

The simple program here shares the same boilerplate code as the previous "Hello world" example but simply swaps out the text rendering code with some polygon manipulation code.

```ruby
require 'ray'

Ray.game "Test" do
  register { add_hook :quit, method(:exit!) }

  scene :square do
    @rect = Ray::Polygon.rectangle([0, 0, 20, 20], Ray::Color.red)
    @rect.pos = [200, 200]

    render do |win|
      win.draw @rect
    end
  end

  scenes << :square
end
```

### Results

The following screenshot shows what was rendered to the screen after I made this small change:

<div align="center">
<img src="http://i.imgur.com/3iXgj.png" alt="" title="Hosted by imgur.com" />
</div>

After comparing the results to the implementation code, it became clear to me that in order to use Ray effectively, I'd need to begin thinking in terms of vector graphics and matrix transformations. In particular, the example demonstrates that Ray represents its drawable objects using an abstract coordinate system for points and edges and then translates those coordinates to determine where they end up being rendered on the screen. This is why we define the square with a top-left corner of (0,0) and then later explicitly set the position to (200,200).

Knowing the math behind 2D transformations is not essential for completing this exercise, but a basic background in those concepts wouldn't hurt. I kept forgetting that this was how Ray worked under the hood while working on this article, which caused some of my debugging sessions to drag on longer than they should have. If you're following along at home and attempting to do each step before reading how I did it, it might not hurt for you to brush up on [the basic math involved in 2D graphics](http://www.willamette.edu/~gorr/classes/GeneralGraphics/Transforms/transforms2d.htm) before continuing with the exercise.

Once I got a square rendered on the screen, the next step was to make it move.

## Step 3: Get the red square to follow the mouse pointer

Even though the final plans called for this to be a game you play using the arrow keys on your keyboard instead of a mouse, the `on :mouse_motion` example in Ray's documentation was staring me in the face and provided too much instant gratification to skip over.

### Implementation

This code shows the changes that I made to make the square follow the mouse pointer around the screen. If you are trying to run these examples as you read along, simply replace the scene code from step 2 with this new implementation. All the other boilerplate code will remain the same throughout the rest of this article.

```ruby
scene :square do
  @rect = Ray::Polygon.rectangle([0, 0, 20, 20], Ray::Color.red)
  @rect.pos = [200,200]

  on :mouse_motion do |pos|
    @rect.pos = pos
  end

  render do |win|
    win.draw @rect
  end
end
```

### Results

This video shows the red square following the mouse pointer around the screen:

<iframe width="640" height="480" src="//www.youtube.com/embed/ywwcj1KH-1o?rel=0" frameborder="0" allowfullscreen></iframe>

Once I got this code working, I was able to get a rough sense of how Ray handles its main event loop. The `on()` method allows you to define observers for various events. Any matching callbacks get triggered on each tick, before the `render` code gets executed. The `:mouse_motion` event was an easy one to start with because it simply yields the position of the mouse pointer on each tick, but the general concept could be applied just as well to key press events.

But before messing with handling keyboard interaction, I decided to take a quick glance at what kind of object the `on :mouse_motion` observer was yielding. I thought it was possible that these would be just simple two-element arrays, but after doing a few printline statements, realized that they were `Ray::Vector2` objects. A [brief source dive](https://github.com/Mon-Ouie/ray/blob/master/lib/ray/vector.rb) brought me up to speed on what to expect from this sort of object; then I moved on to the next step.

## Step 4: Move the square to the left using the left arrow

I initially tripped up on this step because I didn't understand that the `:key_press` event gets triggered only when the key is initially pressed and does not trigger repeatedly while a key is held down. However, once I found the matching `:key_release` event and an example that used both of them, I was able to make some progress by implementing some simple transactional logic.

### Implementation

The following code uses an instance variable `@moving_left` to track whether the square needs to continue moving left. Whenever `@moving_left` is true, it uses vector addition to translate the current position of the rectangle.

```ruby
scene :square do
  @rect = Ray::Polygon.rectangle([0, 0, 20, 20], Ray::Color.red)
  @rect.pos = [200,200]

  on :key_press, key(:left) do
    @moving_left = true
  end

  on :key_release, key(:left) do
    @moving_left = false
  end

  render do |win|
    win.draw @rect
    @rect.pos += [-1,0] if @moving_left
  end
end
```

### Results

The following video shows the red square creeping slowly to the left each time I hold down the left arrow key:

<iframe width="640" height="480" src="//www.youtube.com/embed/7k64QyBiq_0?rel=0" frameborder="0" allowfullscreen></iframe>

After I got this step working, I investigated a couple more things about Ray through experimentation. My tinkering caused me to discover that the `key()` method actually converts the symbolic value `:left` into a `Ray::Key` object, which is a simple container that looks up the key code for you. I also found out that the position of a drawable object appears to be immutable, so you can't do things like `@rect.pos.x -= 1` and expect it to work. Instead, you need to do vector addition and then assign a new position object. This design decision would have made a lot more sense to me if I kept the mathematical underpinnings of vector graphics in mind while working in this step, but instead, it just lead me to scratch my head for a while.

## Step 5: Allow all arrow keys to move the square

I could have repeated the general approach I took in step 4 to get all my arrow keys working, but it would have been tedious. If I read the documentation a little more closely before starting step 4, I would have seen that Ray's author pretty much says exactly that in one of his examples.

### Implementation

The following code uses the conditionless callback `always` to run some code on each tick and checks whether a key is being held down by calling the aptly named `holding?` method that I overlooked in step 4.

```ruby
scene :square do
  @rect = Ray::Polygon.rectangle([0, 0, 20, 20], Ray::Color.red)
  @rect.pos = [200,200]

  always do
    @rect.pos += [-1, 0] if holding?(:left)
    @rect.pos += [1, 0] if holding?(:right)
    @rect.pos += [0, -1] if holding?(:up)
    @rect.pos += [0, 1] if holding?(:down)
  end

  render do |win|
    win.draw @rect
  end
end
```

### Results

After making this change, the red square was able to move in all directions, as shown in the following video. Moving diagonally simply requires holding down two keys at once (i.e., holding up and left moves northwest across the screen).

<iframe width="640" height="480" src="//www.youtube.com/embed/zW98M-tdzfo?rel=0" frameborder="0" allowfullscreen></iframe>

The main thing that I noticed was that moving the red square around was tedious because it was moving so slowly. I investigated a few options, including changing Ray's default frame rate, but my wife quickly talked me into doing something much simpler.

## Step 6: Make the square move a bit faster

This step involved tweaking the distance traveled by the red square on each tick, thus increasing its speed.

### Implementation

In the following code, I changed the distance that the red square moves when a key is held down from 1 to 2, effectively doubling its speed.

```ruby
scene :square do
  @rect = Ray::Polygon.rectangle([0, 0, 20, 20], Ray::Color.red)
  @rect.pos = [200,200]

  always do
    @rect.pos += [-2, 0] if holding?(:left)
    @rect.pos += [2, 0] if holding?(:right)
    @rect.pos += [0, -2] if holding?(:up)
    @rect.pos += [0, 2] if holding?(:down)
  end

  render do |win|
    win.draw @rect
  end
end
```

### Results

This video shows the faster-moving rectangle. Jumping a distance of two pixels at a time still looks like smooth motion, so this approach definitely was more simple than any of the other ideas I had in mind.

<iframe width="640" height="480" src="//www.youtube.com/embed/B4_KZonH3qw?rel=0" frameborder="0" allowfullscreen></iframe>

This was the first time that I started feeling the desire to refactor things: updating four values when I could have updated one seemed a bit tedious. However, I try to keep a semistrict policy of not refactoring unless I am in deep pain for the first few hours of working with a new tool. The reason I do this is to allow my mind to work in a purely creative mode, avoiding invoking the inner "judge" that I talked about in [Practicing Ruby 2.2](http://practicingruby.com/articles/2). Take this note as fair warning, though: there will be more repetitive code to come before this exercise is completed!

At this point, I had a red square moving at a speed that looks comparable to how things tend to move in old-school arcade games. Because the novelty value of moving a little square around in a void wears off pretty quickly, the next step was to introduce some other game objects into the mix.

## Step 7: Display 20 randomly placed 10x10 white squares

In this step, I introduced the goodies that our red rectangle is meant to collect. Researching collision detection at this point would only complicate things, so instead I focused on the visual aspect of things as well as some simple bounds testing.

### Implementation

The following code generates 20 random squares and renders them completely within the visible area on the screen. It does not introduce any new Ray concepts, so it should be pretty easy to follow.

```ruby
scene :square do
  @rect = Ray::Polygon.rectangle([0, 0, 20, 20], Ray::Color.red)
  @rect.pos = [200,200]

  max_x = window.size.width - 20
  max_y = window.size.height - 20

  @goodies = 20.times.map do
    x = rand(max_x) + 10
    y = rand(max_y) + 10
    
    g = Ray::Polygon.rectangle([0,0,10,10])
    g.pos = [x,y]

    g
  end

  always do
    @rect.pos += [-2, 0] if holding?(:left)
    @rect.pos += [2, 0] if holding?(:right)
    @rect.pos += [0, -2] if holding?(:up)
    @rect.pos += [0, 2] if holding?(:down)
  end

  render do |win|
    @goodies.each { |g| win.draw(g) }
    win.draw @rect
  end
end
```

### Results

The following screenshot demonstrates what this effect ended up looking like. It's almost like a starry night! 

<div align="center">
<img src="http://i.imgur.com/yLqJW.png" alt="" title="Hosted by imgur.com" />
</div>

Adding bounds checking to make sure the white squares would be rendered within the visible area of the screen reminded me that I should have done something similar to prevent the red square from moving beyond the edge of the screen as well.

## Step 8: Keep the red square from leaving the screen

The next step was to implement a rudimentary means of keeping the red square from completely disappearing from the screen.

### Implementation

The following code checks to make sure that the top-left corner of the red square never exits the screen by updating its position only if the new location is within the screen's dimensions. I show only the updated `always` callback because it was the only thing that changed.

```ruby
always do
  if @rect.pos.x - 2 > 0
    @rect.pos += [-2, 0] if holding?(:left)
  end

  if @rect.pos.x + 2 < window.size.width
    @rect.pos += [2, 0] if holding?(:right)
  end

  if @rect.pos.y - 2 > 0
    @rect.pos += [0, -2] if holding?(:up)
  end

  if @rect.pos.y + 2 < window.size.height
    @rect.pos += [0, 2] if holding?(:down)
  end
end
```

### Results

The following video shows bounds checking behavior that is slightly different than the previous implementation code; my original code used (-10,-10) rather than (0,0) as the abstract origin for my rectangle. If you run the code yourself, your rectangle will get closer to the edge at times than what this video shows.

<iframe width="640" height="480" src="//www.youtube.com/embed/FDbvirfhywY?rel=0" frameborder="0" allowfullscreen></iframe>

In retrospect, this code was a bit buggy, as it really should have been looking at all the corners of the square, not just the top-left corner. But because it was good enough to keep the red square from completely sailing off into the void, I decided to save the fix as a problem for later. Putting it off would be a bad idea if I were writing production code, but thankfully the rules for spiking are different.

The next step was to get over my tensions about this buggy and unrefactored code and get my red square to interact with the white squares.

## Step 9: Remove white squares when they get covered up 

In this step, we finally need to think about collision detection: specifically, how to determine when one rectangle is contained within another. It turns out that Ray provides some helpers for this, but it took a source dive for me to find them, and a lot of experimentation to figure out how exactly to use them.

### Implementation

The following code uses the `Array#to_rect` core extension that Ray provides for creating `Ray::Rect` objects. This object provides basic collision detection routines, including an `inside?` method that can be used to determine whether one rectangle is completely contained within another. On each tick, any of the white squares that are contained with the bounds of the red square get removed.

```ruby
always do
  # same code as in step 8 goes here

  @goodies.reject! { |e|
    goodie = [e.pos.x, e.pos.y, 10, 10].to_rect
    
    goodie.inside?([@rect.pos.x, @rect.pos.y, 20, 20])
  }
end
```

### Results

The following video demonstrates collecting goodies. To make things a bit more challenging, I made it so that you must completely cover the white squares rather than simply touching them.

<iframe width="640" height="480" src="//www.youtube.com/embed/MmLlCMS0a7s?rel=0" frameborder="0" allowfullscreen></iframe>

Once I figured out how to use `Ray::Rect`, implementing this functionality was relatively straightforward. However, my early confusion about `Ray::Polygon.rectangle` made me think that it returned a `Ray::Rect` object, which it does not. After digging through the source for both `Polygon` and `Rect` at both the Ruby level and the C level, I could not find an easy way to automatically convert a rectangular polygon into a `Rect` object, maybe because Ray is still a pretty young library, or maybe because of a design decision. 

Rather than dwelling on that question, I just manually instantiated `Ray::Rect` objects via `Array#to_rect` so that I could keep moving on. This is the exact point at which I thought that perhaps I should introduce some sort of data model for my game objects that could implement `to_rect` on and remove some of this duplication, but I once again brushed those tensions aside in favor of moving on to something new.

## Step 10: Display "You win" when all white squares are gone

In this step, I introduced the winning game condition, which is removing all the white squares from the screen.

### Implementation

Only a minor modification to the `render` callback was needed to complete this step. We simply check whether the array of white squares is empty, and if so, render the phrase "YOU WIN" to the screen similar to the way we rendered "Hello World" in step 1.

```ruby
render do |win|
  if @goodies.empty?
    win.draw text("YOU WIN", :at => [100,100], :size => 60)
  else
    @goodies.each { |g| win.draw(g) }
    win.draw @rect
  end
end
```

### Results

The following video demonstrates that the game can now be won. You may want to fast-forward a bit, as it takes a while to collect all those white squares.

<iframe width="640" height="480" src="//www.youtube.com/embed/G2gqOCoK4_o?rel=0" frameborder="0" allowfullscreen></iframe>

This was a really simple step, so there isn't much more to say about it. The next step was to introduce baddies into the game. 

## Step 11: Add five randomly placed 15x15 blue squares

In this step, I placed some blue squares in random locations around the screen to serve as our baddies. As in step 7, I focused on the visual aspect of things and didn't immediately jump into collision detection or movement rules.

### Implementation

The following code shows the changes that needed to be made to get the blue squares onto the screen. They are very similar to those in step 7, but if you want to see the full context, you can view [a snapshot of the game's source code for this step](https://github.com/elm-city-craftworks/goodies_and_baddies/blob/41110cc71d3f94231754313fec47d1ad6a87e902/game.rb
) on github.

```ruby
scene :square do
  # same code as step 10 goes here

  @baddies = 5.times.map do
    x = rand(max_x) + 15
    y = rand(max_y) + 15
    g = Ray::Polygon.rectangle([0,0,15,15], Ray::Color.blue)
    g.pos += [x,y]
    g
  end
  
  always do
    # ... same as step 10 goes here
  end

  render do |win|
    if @goodies.empty?
      win.draw text("YOU WIN", :at => [100,100], :size => 60)
    else
      @goodies.each { |g| win.draw(g) }
      @baddies.each { |g| win.draw(g) }
      win.draw @rect
    end
  end
end
```

### Results

The following screenshot shows what the randomized blue squares look like:

<div align="center">
<img src="http://i.imgur.com/Q9mwQ.png" alt="" title="Hosted by imgur.com" />
</div>

This step was pretty much a direct repeat of what I did in step 7, so there isn't a whole lot of interesting things to discuss here. The next step was to get these blue squares to be more than just pretty drawings by making them deadly.

## Step 12: Display "You lose" on collision with a blue square

In this step, I introduce a losing condition, which marks the point where my program actually becomes a functional game, even if it's a very boring one.

### Implementation

Revisiting the `Ray::Rect` source code, I found that it also provides a simple `collide?` method that tells you whether any part of a given rectangle intersects with another. The following code uses this feature to make it so that even if a single point of a blue rectangle touches the red one, the game ends in a loss. If this excerpt is too hard to follow without the surrounding context, check out [the source code of the game at this step](https://github.com/elm-city-craftworks/goodies_and_baddies/blob/5595b3fe43decd1f06f1376cc2bd1cfd9c24ec06/game.rb) on github.

```ruby 
scene :square do
  # same code as in step 11

  always do
    # same code as in step 11

    @game_over ||= @baddies.any? { |e|
      baddie = [e.pos.x, e.pos.y, 15, 15].to_rect
      baddie.collide?([@rect.pos.x, @rect.pos.y, 20,20])
    }
  end

  render do |win|
    if @goodies.empty?
      win.draw text("YOU WIN", :at => [100,100], :size => 60)
    elsif @game_over
      win.draw text("YOU LOSE", :at => [100,100], :size => 60)
    else
      @goodies.each { |g| win.draw(g) }
      @baddies.each { |g| win.draw(g) }
      win.draw @rect
    end
  end
end
```

### Results

This video shows that the game ends in failure as soon as the red square touches a blue square:

<iframe width="640" height="480" src="//www.youtube.com/embed/4W37HjwBHiw?rel=0" frameborder="0" allowfullscreen></iframe>

In this step, I explicitly built even more `Ray::Rect` objects, pushing me even closer to the breaking point—a point at which refactoring was not simply desirable but absolutely necessary. But with only one step left to implement before completing the exercise, I pressed on.

## Step 13: Make the blue squares follow the red square

This final step makes the game a whole lot more interesting and even somewhat fun. There are lots of ways that you could code the movement rules for the baddies, but I went with the simplest one: proceed in a straight line toward the red square on each tick.

### Implementation

This code should be fairly self-explanatory, as it does not introduce any new Ray concepts. It uses a simple algorithm for moving each blue square towards the red square that randomizes the distance traveled on each tick by choosing a number between 0 and 2.5. The [final source code for the game](https://github.com/elm-city-craftworks/goodies_and_baddies/blob/cdf0fe1b59fe2416886e94da6e45c2affc1dc111/game.rb) is available on github.

```ruby
scene :square do
  # same code as in step 12

  always do
    # same code as in step 12

    @baddies.each do |e|
      if e.pos.x < @rect.pos.x
        e.pos += [rand*2.5,0]
      else
        e.pos -= [rand*2.5,0]
      end

      if e.pos.y < @rect.pos.y
        e.pos += [0, rand*2.5]
      else
        e.pos -= [0, rand*2.5]
      end
    end
  end

  render do |win|
    # same code as in step 12
  end
end
```

### Results

The following video shows a complete run of the game, ending in victory. Before you try it out yourself and end up frustrated, please note that I recorded about 20 losses before getting conditions favorable enough for me to win.

<iframe width="640" height="480" src="//www.youtube.com/embed/290MSnc72Jg?rel=0" frameborder="0" allowfullscreen></iframe>

At this point, I accomplished my goal of having a fairly interesting playable game in 13 small steps. If I wanted to go further, I would first go back and comprehensively refactor this code, and I would also study Ray in a more detailed fashion. However, I was thrilled to be able to get this far without doing that.

## Reflections

Hopefully, seeing my process of learning new things has been useful to you. Everyone says you should work in baby steps, but it is my experience that many intermediate developers have a much different idea of what a 'small step' is than more skilled developers tend to have. Even with my level of experience, I consistently find that the programmers that I look up to have a much more refined sense of simplicity and focus than I do. 

One of the most beneficial aspects of taking things one step at a time is that doing so isolates the risk of running into unknown-unknowns and lets you handle them individually. There were many times when holes in my own understanding of how Ray works combined with holes in its documentation caused me to get confused or frustrated. However, the feeling of struggling with a single issue is much more manageable than thinking about dozens of potential blockers simultaneously.

There is also something to be said for instant gratification. The smaller your steps are, the sooner you see some measureable progress. Each successful step forward gives you a small feeling of satisfaction that motivates you to take on the next challenge. This feeling is a key reason why many people like doing test-driven development, and it can be applied to a broad range of practices.

The one thing that I often reevaluate while working in this style is to what extent I should be refactoring as I go. Writing about my process today made me even more uncertain about whether it makes sense to let the code get so ugly just for the sake of preventing judgmental thoughts from arising. However, I feel like the question of whether to refactor as you go is largely a matter of personal preference. That said, I'm very curious to hear what your experience was like while working through this exercise, as well as what you thought of the approach I took. So what do you think?


================================================
FILE: articles/v2/007-unobtrusive-ruby.md
================================================
When Mike Burns outlined his vision of [Unobtrusive Ruby](http://robots.thoughtbot.com/post/10125070413/unobtrusive-ruby), I initially thought it was going to be a hit with the community. However, a lack of specific examples led to a critical backlash and caused the post to generate more heat than light. This is unfortunate, because the ideas he outlined are quite valuable and shouldn't be overlooked.

In this article, I share my own interpretation of what Unobtrusive Ruby means, based on the the points that Mike outlined. I can't guarantee that my take on this is what Mike had in mind, but it should be interesting to those who wanted a better-defined roadmap than what he provided. To get this most out of this article, I recommend going back and [reading what Mike wrote](http://robots.thoughtbot.com/post/10125070413/unobtrusive-ruby) before continuing. Try to think about what these concepts mean to you, and then compare them to what I've outlined here.

The following guidelines are the ones Mike laid out, but I've replaced his explanations with my own in the hopes that attacking these ideas from a second angle will be useful.

## Take objects, not classes 

Because Ruby lets us pass classes around like any other object, we have a way to do dependency injection that isn't as common in other languages. For example, we can write something like the following code:

```ruby
class Roster
 def initialize          
    @participants = []  
  end  
  
  def <<(new_participant)  
    @participants << new_participant  
  end  
  
  def participant_names  
    @participants.map { |e| e.full_name }  
  end  
  
  def print(printer=RosterPrinter)
    printer.new(participant_names).print
  end    
end

class RosterPrinter  
  def initialize(participant_names)  
    @participant_names = participant_names  
  end  
  
  def print
    puts "Participants:\n" +  
    @participant_names.map { |e| "* #{e}" }.join("\n")  
  end  
end 
```

This feels clever, but this form of dependency injection is more brittle than it
needs to be. A [recent conversation with Derick
Bailey](http://blog.rubybestpractices.com/posts/gregory/055-issue-23-solid-design.html#comment-317367342)
on my [SOLID
article](http://blog.rubybestpractices.com/posts/gregory/055-issue-23-solid-design.html)
about a very similar example made me realize just how much we tend to pass
classes around unnecessarily when we could be directly passing the objects our
class depends on. With that in mind, Derick helped me refactor the previous example to the more flexible design shown here. 

```ruby
class Roster
 def initialize          
    @participants = []  
  end  
  
  def <<(new_participant)  
    @participants << new_participant  
  end  
  
  def participant_names  
    @participants.map { |e| e.full_name }  
  end  
  
  def print(printer=RosterPrinter.new)
    printer.print(@participants)
  end    
end

class RosterPrinter    
  def print(participant_names)
    puts "Participants:\n" +  
    participant_names.map { |e| "* #{e}" }.join("\n")  
  end  
end 
```

Although this is a subtle change, it has a major impact on the way `Roster` and its printer object relate to one another. The `Roster#print` method originally had a dependency on both the constructor of its printer object and its `print()` instance method. Our new code reduces that coupling by depending only on the existence of a `print()` method in the general case. The following examples demonstrate the added flexibility that this new approach offers us.

```ruby
# does not provide a .new() method
module FunctionalPrinter
  def self.print(participant_names)
    puts "Participants:\n" +  
    participant_names.map { |e| "* #{e}" }.join("\n")  
  end
end

require "prawn"

# has a different constructor than RosterPrinter
class PDFPrinter
  def initialize(filename)
    @document = Prawn::Document.new
    @filename = filename
  end

  def print(participant_names)
    @document.text("Participants", :size => 16)

    participant_names.each do |e|
      @document.text("- #{e}")
    end

    @document.render_file(@filename)
  end
end

roster = Roster.new
roster << "Gregory Brown" << "Jia Wu" << "Jordan Byron"

puts "USING DEFAULT"
roster.print

puts "USING FUNCTIONAL PRINTER"
roster.print(FunctionalPrinter)

puts "USING PDF PRINTER (see roster.pdf)"
roster.print(PDFPrinter.new("roster.pdf"))
``` 

Both `FunctionalPrinter` and `PDFPrinter` demonstrate corner cases that our original code did not account for. By following the guideline of accepting objects rather than classes as the arguments to our methods, our new design can accomodate these types of objects without modification of the `Roster#print` code. As you can see from these examples, this way makes life easier for us. 

## Never require inheritance

Some libraries that we use strongly encourage us to create subclasses of the objects they provide; others flat-out force us to do so. `ActiveRecord` is an example of a library that is almost useless without inheritance but is also very complicated and would be hard to use as an example. The following code is a much simpler example of a tool that expects you to use subclasses of its provided `Plugin` class to get the job done.

```ruby
module Inspector
  def self.analyze(data)
    Plugin.registered_plugins.each { |e| e.new.analyze(data) }
  end

  class Plugin
    def self.inherited(base)
      registered_plugins << base
    end

    def self.registered_plugins
      @registered_plugins ||= []
    end

    def analyze(data)
      raise NotImplementedError
    end
  end
end

class WordCountPlugin < Inspector::Plugin
  def analyze(data)
    word_count = data.split(/ /).length
    puts "Content contained #{word_count} words"
  end
end

class WordLengthPlugin < Inspector::Plugin
  def analyze(data)
    longest = data.split(/ /).map { |e| e.length }.max
    puts "Longest word contained #{longest} characters"
  end
end

Inspector.analyze("This is a test of the watcher plugins")

## OUTPUTS
Content contained 8 words
Longest word contained 7 characters   
```

Using the `inherited` hook is a clever way to implictly register plugins, but this approach comes with a number of downsides. For example, you are forced to make all your plugins inherit from `Inspector::Plugin`, which means your class can't be a subclass of anything else. Additionally, you need to use a class even if a module would make more sense. This is a direct consequence of the "interface taking classes rather than ordinary objects" issue and cannot be easily avoided. If you throw in things like having to be aware of possible Liskov Substitution Principle violations, it becomes clear that an API that forces you to use subclassing is not exactly flexible.

This code shows a much more flexible alternative that completely removes the dependency on class inheritance:

```ruby
module Inspector
  def self.analyze(data)
    registered_plugins.each { |e| e.analyze(data) }
  end

  def self.registered_plugins
    @registered_plugins ||= []
  end
end

module WordCountPlugin
  def self.analyze(data)
    word_count = data.split(/ /).length
    puts "Content contained #{word_count} words"
  end
end

module WordLengthPlugin
  def self.analyze(data)
    longest = data.split(/ /).map { |e| e.length }.max
    puts "Longest word contained #{longest} characters"
  end
end

Inspector.registered_plugins << WordCountPlugin << WordLengthPlugin
Inspector.analyze("This is a test of the watcher plugins")
```

Now any object that has a valid `analyze` method will work as a plugin, as long as you explicitly register it with `Inspector`. This approach results in much cleaner-looking code for this trivial implementation, but the general strategy can also can be used in situations where inheritance is still a part of the picture. The following code shows plugins that inherit from a parent class coexisting with plugins built from scratch.

```ruby
module Inspector
  def self.analyze(data)
    registered_plugins.each { |e| e.analyze(data) }
  end

  def self.registered_plugins
    @registered_plugins ||= []
  end

  class Plugin
    def self.verify(&block)
      validations << block
    end

    def self.validations
      @validations ||= []
    end

    def self.analyze(&block)
      define_method :analyze do |data|
        validate(data) 
        block.call(data)
      end
    end

    def validate(data)
      raise unless self.class.validations.all? { |v| v.call(data) }
    end
  end
end

class WordCountPlugin < Inspector::Plugin
  verify { |data| data.is_a?(String) }

  analyze do |data|
    word_count = data.split(/ /).length
    puts "Content contained #{word_count} words"
  end
end

module WordLengthPlugin
  # same as before, not inheriting from anything
end

Inspector.registered_plugins << WordCountPlugin.new << WordLengthPlugin
Inspector.analyze("This is a test of the watcher plugins")
```

This small change makes a big difference in flexibility and leaves some of the decision making up to your users rather than forcing them down a particular path. Using the default approach of inheriting from `Inspector::Plugin` will feel nearly identical to an inheritance-only approach to the user. However, if more customizations are needed, this design provides an easy way to build plugins from scratch. Because it is so easy to implement this pattern, it is probably worth doing even if your immediate needs don't call for such flexibility.

## Inject dependencies

I covered dependency injection and the dependency inversion principle at length in [Practicing Ruby 1.23](http://blog.rubybestpractices.com/posts/gregory/055-issue-23-solid-design.html). Rather than repeating those details here, I'd like you to read that article as well as the comments from [Derick Bailey](http://blog.rubybestpractices.com/posts/gregory/055-issue-23-solid-design.html#comment-317367342). My conversation with Derick taught me a thing or two about the subtle distinctions between dependency injection (a technique) and dependency inversion (a design principle) that are hard to summarize but still well worth working through if you want to solidify your understanding of these two very important concepts.

As an example of some practical code that uses dependency injection, let's look at the Highline command-line library. In ordinary usage, Highline outputs everything to `STDIN` and `STDOUT`. You can install HighLine via RubyGems and run the following example to get a feel for how it works.

```ruby
require "highline"

console = HighLine.new
name    = console.ask("What is your name?") 
console.say("Hello #{name}")
```

In ordinary cases, this is exactly what we want: ordinary input and output from your console. However, to test HighLine, we made it possible to inject input and output objects to use in place of `STDIN` and `STDOUT`. The next example shows the use of `StringIO` objects, which is what we use in our unit tests. 

```ruby
require "stringio"
require "highline"

input   = StringIO.new("Gregory\n")
input.rewind # set seek pos back to 0

output  = StringIO.new

console = HighLine.new(input, output)
name = console.ask("What is your name?")
console.say("Hello #{name}!")

output.rewind
puts output.read
```

Interestingly enough, this 'feature' of HighLine has caused it to be used in a number of contexts that we didn't anticipate. For example, it is occasionally used in GUI programs for its input validation features and is sometimes used in noninteractive scripts for its text formatting features. If we had directly worked against `STDIN` and `STDOUT`, these ways of using HighLine would not be possible without ugly hacks.

## Unit tests are fast

Personally, I am not obsessive about unit test performance. Many Rubyists care a lot about this and advocate the heavy use of mock objects to speed up your tests. Mike points out in his article that the combination of "mock objects, a lack of inheritance, and injected dependencies" will make your tests fast, and that's basically true. 

Dependency injection does facilitate the use of mock objects, and our HighLine example demonstrates why that is the case. A lack of inheritance might imply that your method call chains are shorter and that you have fewer dependencies, but it's not as strong of a metric as he makes it seem. Eventually, object composition can end up being more or less the same as inheritance in complexity, and in those cases you may still end up with slow tests. Composed object systems are easier to decouple, which is probably what he's getting at here.

For some practical examples of how you can use mocks to decouple your tests from your code and speed things up a bit, see [Practicing Ruby 1.20](http://blog.rubybestpractices.com/posts/gregory/052-issue-20-thoughts-on-mocking.html). If you read through that article, you'll find out why I don't care so much about limiting my dependencies to the object under test. But when all else is considered equal, fewer dependencies mean less code, which probably means faster tests.

Having a performant test suite is certainly ideal; it's just a matter of weighing he costs of fine-tuning your test suite versus the benefits that faster test runs provide. Personally, I felt that Mike somewhat overemphasized this particular point, but many well-known Rubyists would disagree with me on this one.

## Require no interface

Mike suggests that your library code should allow your users to name their methods however they want and should be designed to consume rather than be consumed. He then recognizes that this design may not always be possible and concedes that if you need the user to implement certain features, you should limit this functionality to one or two methods at most. This is great general advice, and if you look at Ruby itself, we can find some good examples.

The `Enumerable` module is capable of providing the vast majority of its features if the user implements only an `each` method. If you want to use things like `sort`, you just need the yielded elements to implement a `<=>` method. All features in `Enumerable` can thus be supported by "one or two methods" implemented by the user, which makes it a very good API, considering the great wealth of functionality it provides.

However, `Enumerator` takes things a step further by requiring no interface at all. You can name the methods of the target collection anything you want; you merely tell `Enumerator` which method it should delegate its `each` method to when you initialize an `Enumerator`. See the following example to see how flexible this approach is.

```ruby
class RandomInfiniteList
  def generate
    loop do
      yield rand
    end
  end
end

enum = Enumerator.new(RandomInfiniteList.new, :generate)
p enum.next
p enum.take(10)
```

In this way, `Enumerator` can be made to turn any object with an iterator method into an `Enumerable` object, regardless of what the name of that method is. This feature can be useful when you're working with unidiomatic Ruby objects that provide an iterator but do not mix in `Enumerable`.

We can also look at an example that's a bit less abstract. If we look back at the `Inspector` example that we were using to discuss how to avoid hard inheritance requirements, we can see that it requires only a small interface for plugins to conform to. Although it isn't so bad that each plugin needed an `analyze` method in our previous iteration, we can make some modifications so that it depends on no interface at all, which may bring us a step closer to what Mike was hinting at.

The following example shows what an `Inspector` class that implements an "expects no interface" API style might look like. To keep things interesting, I've left the implementation of this class up to you. If you get stuck, feel free to leave a comment asking for hints on how to build this out.

```ruby
# delegates to the WordLengthPlugin module
Inspector.report("Word Length") do |data|
  WordLengthPlugin.analyze(data)
end

# implements the report as an inline function
Inspector.report("Word Count") do |data|
  word_count = data.split(/ /).length
  puts "Content contained #{word_count} words"
end

Inspector.analyze("This is a test of the watcher plugins")
```

## Data, not action

This guideline seems to boil down to the idea that your API calls should be simple "data in, data out" operations whenever that level of simplicity is easily within reach. After thinking about this concept a bit, I realized that a pair of common Ruby operations serve as perfect examples.

Suppose that you want to open a binary file and read its contents into a `String`. You could write the following code, which will get the job done:

```ruby
file = File.open("foo.jpg", "rb")
image_data = file.read
file.close
```

But this approach is the correct way to do things only if you want to work explicitly with the `File` object and control exactly when it gets opened and closed. If all you want to do is read the contents of a binary file, this is three actions for what should be one "data in, data out" operation. Fortunately, Ruby recognizes this as a common use case and provides a nicer API that you can use instead:

```ruby
image_data = File.binread("foo.jpg")
```

In this example, we pass the filename and get back the contents of that file data. Though we have less control over the overall process, we also get to ignore irrelevant details like exactly when the file is opened and closed. This is what I think that Mike probably meant when he said "data, not action," and it can be applied when designing your own APIs. To drive the point home, let's look at another example.

The following code shows the generic form of doing a GET request via the `Net::HTTP` standard library. It is not the most terse way to use `Net::HTTP`, but it is one of the most common.

```ruby
require 'net/http'

url = URI.parse('http://www.google.com')
res = Net::HTTP.start(url.host, url.port) do |http|
  http.get('/')
end

puts res.body
```

There is a whole lot going on here, including explicit `URI` parsing and explicit calls to `get()`. But as you saw with `File.open` versus `File.binread`, Ruby provides a convenient alternative for this very common operation. The open-uri standard library makes it possible to write the following code instead:

```ruby
require "open-uri"

puts open("http://google.com").read
```

Once again, we see a series of complex actions being converted into a simple "data in, data out" operation. In this case, we are converting a string that represents a URI into an IO object via the globally available `open()` method. This approach makes it possible for us to not think about explicitly parsing our string into a `URI` object and lets us ignore the details about starting a HTTP connection and explicitly making a GET request. When all we want is the source of a web page, it's great to be able to ignore these details.

## Always be coding

As I reflect on Mike's guidelines for Unobtrusive Ruby, it all seems to come down to making life easier for your users by limiting the impact your system has on them. Give your users small, flexible APIs that do not demand much of their systems, and they will have better luck using your code. This seems like a noble set of goals to me, and I hope my examples demonstrate the same spirit that Mike wanted to encourage in his post.

However, I always worry about using design guidelines like these as some sort of absolute set of commandments. There were a whole lot of words like "always" and "never" in the original Unobtrusive Ruby post that, if left unchecked, could cause more harm than good. For me, context is king, and these ideas seem much more important for those who are writing code that is intended for widespread reuse than they might be for ordinary application development. That said, the examples I've shown here demonstrate that you can often be on the right side of these guidelines with little to no additional effort. Therefore, if we can keep the ideas behind Unobtrusive Ruby in the back of our minds and apply them when it's an easy fit, we may well end up improving our code.

This article is meant to be a conversation starter, not gospel. Please share your own thoughts on what Unobtrusive Ruby means to you, either via a comment here or on your own blog. I think it's a conversation worth having, even if we haven't quite nailed down all the definitions yet. If we end up getting an interesting discussion going, I'll invite Mike to check out what we've discussed and see what he thinks about it.

So what do you think? Was my code unobtrusive enough for you? If not, why not? ;)


================================================
FILE: articles/v2/008-singleton-pattern.md
================================================
Many design patterns that originated in other object-oriented languages have elegant Ruby translations. However, the [Singleton](http://en.wikipedia.org/wiki/Singleton_pattern) stands out as a construct that seems to have no good way to implement in Ruby. In this article, I will walk through the different options and explain why they all have something wrong with them. But first, we need a working definition of the singleton pattern to make sure we're on the same page.

Put briefly, the singleton pattern is a clever way of implementing global objects that you never need to explicitly instantiate. Not to be confused with Ruby's mostly unrelated concept of a "singleton class," the singleton pattern is applied when only a single instance of an object is needed across an entire application. Typical examples include objects that represent configuration data, global logging systems, and other similar structures. But there are also some subtle use cases in Ruby due to the fact that classes and modules are objects. For example, we have the `Math` module on which we can call methods such as `Math.sin()`, and `Math.cos()`. The `Math` module is acting as a singleton object in this context, even if it's not immediately obvious to the user. Keep in mind while reading this article that I've lumped this sort of use case in with the more traditional ones, as it shifts the perspective somewhat.

There are a lot of different ways to implement this general pattern in Ruby, but as I've already mentioned, they all pretty much suck. That said, studying this problem can teach us a thing or two about the subtleties (and warts) of the Ruby object system. As you read along, try figuring out the downsides of each implementation before moving on to read my explanations. This exercise will make the article more interesting and may even uncover some fresh ideas that I haven't considered yet.

### Using the `Singleton` module provided by the standard library

Ruby provides a standard library to assist in implementing the singleton pattern. The following code (originally from Practicing Ruby 1.25) illustrates how you can use this library to build a simple logger object.

```ruby
require "singleton"

class SimpleLogger
  include Singleton

  def initialize
    @output = []
  end

  attr_reader :output

  def error(message)
    output << formatted_message(message, "ERROR")
  end

  def info(message)
    output << formatted_message(message, "INFO")
  end

  def write(filename)
    File.open(filename, "a") { |f| f << output.join }
  end

  private

  def formatted_message(message, message_type)
    "#{Time.now} | #{message_type}: #{message}\n"
  end
end
```

By including the `Singleton` module, we make it so that it is no longer possible to create an instance of the `SimpleLogger` class in an ordinary way.

```
>> logger = SimpleLogger.new
NoMethodError: private method `new' called for SimpleLogger:Class
  from (irb):2
```

This behavior makes sense, because the point of the singleton pattern is to prevent multiple instances of a given object from being created. This example code shows how to get at a `SimpleLogger` instance in a way that guarantees that only one will be created.

```ruby
logger = SimpleLogger.instance

logger.error("Some serious problem")
logger.info("Something you might want to know")
logger.write("log.txt")
```

This interface is a bit cumbersome to work with, but it gets the job done, and on its own isn't too bad. However, disabling `new` and adding an `instance` method isn't all that the `Singleton` module does. It also does all of the following things:

* Overrides `inherited()` on the class to ensure that subclasses also retain `Singleton` behavior
* Overrides `dup()`/`clone()` on the class to ensure that copied classes also retain `Singleton` behavior
* Overrides `_load()` to call `instance()`, modifying `Marshal` loading behavior to return the single instance
* Overrides `_dump()` to strip state information when serializing via `Marshal`
* Overrides `dup()`/`clone()` on the instance to raise a `TypeError`, preventing cloning or duping of the instance

When you think about what a singleton object is actually meant to be, these changes make sense. However, many Rubyists look at this and see a whole lot of complexity without a lot of direct benefits. This impression causes many folks to avoid the use of the `Singleton` module in favor of implementations that are a bit more low ceremony. These implementations tend to ignore some of the edge cases that the `Singleton` module accounts for but are much easier to understand.

### Using a class consisting of only class methods

The following code uses ordinary class methods as an alternative to the previous approach. We explicitly call `undef_method` to make it so that instances of this class cannot be created, but otherwise this code is a vanilla Ruby class definition.

```ruby
class SimpleLogger
  class << self
    undef_method :new

    def output
      @output ||= []
    end

    def error(message)
      output << formatted_message(message, "ERROR")
    end

    def info(message)
      output << formatted_message(message, "INFO")
    end

    def write(filename)
      File.open(filename, "a") { |f| f << output.join }
    end

    private

    def formatted_message(message, message_type)
      "#{Time.now} | #{message_type}: #{message}\n"
    end
  end
end
```

Using this class is very simple, as the entire API consists of class method calls.

```ruby
SimpleLogger.error("Some serious problem")
SimpleLogger.info("Something you might want to know")
SimpleLogger.write("log.txt")
```

This approach isn't too bad, but it has its own set of caveats. The use of `undef_method` to disable the `new` method makes our `Class` object into something that is some ways class-like, but isn't quite a class anymore. From a purity standpoint, something just feels wrong about a construct that can exist in an inheritance hierarchy but cannot be instantiated. There is also the question of whether it ever really makes sense to create a subclass of a singleton object.

For a number of reasons, these philosophical issues tend to push folks in the direction of Ruby's `Module` construct, which at first glance seems to address some of these problems.

### Using a module consisting of only module methods

Every module is an object that cannot exist in a hierarchy and cannot be instantiated but otherwise holds similar properties of `Class` objects. Note how similar this code is to our previous example.

```ruby
module SimpleLogger
  class << self
    def output
      @output ||= []
    end

    def error(message)
      output << formatted_message(message, "ERROR")
    end

    def info(message)
      output << formatted_message(message, "INFO")
    end

    def write(filename)
      File.open(filename, "a") { |f| f << output.join }
    end

    private

    def formatted_message(message, message_type)
      "#{Time.now} | #{message_type}: #{message}\n"
    end
  end
end
```

The two approaches are so similar that they look identical from the end user's perspective:

```ruby
SimpleLogger.error("Some serious problem")
SimpleLogger.info("Something you might want to know")
SimpleLogger.write("log.txt")
```

Of course, if we look under the hood, we find that these two implementations are quite different. Although it's true that we've effectively made it impossible to create a subclass of `SimpleLogger` and that we didn't have to explicitly disable the `new` method because `Module` does not provide one, we now are faced with the problem that this module can be mixed into other objects.

Just as a class can have methods at the class level and the instance level, a module can have methods at the module level and the "mixin" level. Our `SimpleLogger` code defines all of its methods at the module level, which means that mixing it into an object via `include` or `extend` will not add any new functionality to the object it gets mixed into. From a purity standpoint, this approach is pretty much identical to the "useless instances" that would be possible for us to create if we allowed calls to the `new` method in our class-based `SimpleLogger`. Modules therefore don't actually give us much of an advantage over classes after all.

To make matters more confusing, Ruby provides us with a couple additional ways to use modules to implement the singleton pattern that bring new kinds of complexity into the mix.

### Using a module with `module_function`

If I were to create a list of Ruby's most confusing features, `module_function` would be near the top. It is a keyword (like `private` and `protected`), which allows you to specify certain methods within a module to be callable at the module level. This feature seems useful at a glance and is even used by Ruby's `Math` module. The interesting thing about module methods is that they serve as public methods on the module itself but get mixed into other objects as private methods. 

This example code demonstrates directly calling methods on the `Math` module, which looks similar to our previous module-based singleton pattern example.

```ruby
class Point
  def initialize(x,y)
    @x = x
    @y = y
  end

  attr_reader :x, :y

  def distance_to(other_point)
    Math.hypot(other_point.x - x, other_point.y - y)
  end
end

point_a = Point.new(0,0)
point_b = Point.new(4,3)

p point_a.distance_to(point_b)
```

If we instead include the `Math` module into the `Point` class, we see that the behavior is different than defining methods directly on the `Math` module because its functionality does get mixed into `Point`.

```ruby
class Point
  include Math

  def initialize(x,y)
    @x = x
    @y = y
  end

  attr_reader :x, :y

  def distance_to(other_point)
    hypot(other_point.x - x, other_point.y - y)
  end
end

point_a = Point.new(0,0)
point_b = Point.new(4,3)

p point_a.distance_to(point_b)
```


This pattern of having a module that doubles as a mixin and a singleton object probably has limited applications, but it seems reasonable for the `Math` module because each method provided by `Math` is purely functional and is also unlikely to clash with other features within a given class. But even if mixing in the `Math` module is convenient and relatively safe, we wouldn't want our `Point` object to expose the features that the `Math` object provides via its public API. This is where we notice that `module_function` anticipates this potential problem and attempts to solve it by making all mixed-in methods private.

```
>> point_a.hypot(4,3)
NoMethodError: private method `hypot' called for #<Point:0x0000010083bd68 @x=0, @y=0>
    from (irb):20
    from /Users/seacreature/.rvm/rubies/ruby-1.9.3-rc1/bin/irb:16:in `<main>'
```

Though this is potentially a useful feature, we now must keep in mind that modules that utilize `module_function` do not have ordinary mixin behavior. However, this is not the main reason why I said that `module_function` is confusing. To generate our proper "WTF?" moment, we can attempt to use `module_function` to implement our `SimpleLogger` object.

```ruby
module SimpleLogger
  module_function
  
  def output
    @output ||= []
  end

  def error(message)
    output << formatted_message(message, "ERROR")
  end

  def info(message)
    output << formatted_message(message, "INFO")
  end

  def write(filename)
    File.open(filename, "a") { |f| f << output.join }
  end

  private

  def formatted_message(message, message_type)
    "#{Time.now} | #{message_type}: #{message}\n"
  end
end
```

That implementation *almost* works, but ends up failing with an error that is quite surprising unless you know exactly how `module_function` works. 

```
>> SimpleLogger.error("This won't actually succeed")
NoMethodError: undefined method `formatted_message' for SimpleLogger:Module
    from (irb):29:in `error'
    from (irb):50
    from /Users/seacreature/.rvm/rubies/ruby-1.9.3-rc1/bin/irb:16:in `<main>'
```

The reason this happens is multifaceted. When using `module_function` with no arguments, as we do, the public methods that are defined after the `module_function` call are treated as module functions and get copied onto the module itself. However, once the private keyword is reached, the methods are no longer treated as module functions and thus don't end up getting copied onto the module. It is therefore effectively impossible to use `module_function` if you want to have your module methods call any private methods. If we accept this limitation and remove the `private` keyword from our `SimpleLogger` definition, things will work as expected using the original runner code from the previous examples.

If you made it through the previous example without becoming incredibly confused, try guessing what this code will do before running it, and then prepare to be surprised.

```ruby
module A
  def x
    10
  end

  module_function :x 

  def x
    12
  end
end

p A.x

class B
  include A
end

p B.new.x
```

If you managed to run this example without thinking that `module_function` is an abomination that should be removed from the Ruby language post-haste, please leave a convincing argument in the comments section. But you may want to first look at `module_function`'s slightly less awkward cousin, `extend self`.

### Using a module with `extend self`

The main problem with `module_function` is that it has so many moving parts. You need to really understand a fairly broad range of Ruby concepts in order to use it effectively. But if we accept the notion that it's sometimes useful for a singleton object to double as a mixin, we can try another approach that behaves similar to `module_function` without too many special cases to consider.

We can start by exploring a contrived example that demonstrates what happens when you use `extend` to mix a module into itself. If it's not immediately obvious what the `extend self` line does, treat it as a black box for now and focus on how the objects behave as we call methods on them.

```ruby
module A
  extend self

  def x
    y
  end
  
  private

  def y
    "yay!"
  end
end

class B
  include A
end

A.x      #=> "yay!"
A.y      #=> raises NoMethodError: private method `y' called for A:Module
B.new.x  #=> "yay!"
B.new.y  #=> raises NoMethodError: private method `y' called for #<B:...>
```

Speaking purely from the perspective of the externally visible behavior, we see that the key difference between `module_function` and `extend self` is that `extend self` results in identical behavior at both the module level and the mixin level when it comes to access control. Both private methods and public methods get mixed into the target object, and their access control is kept the same as whatever it was in the module definition. This is good because it means that your module can actually define private methods without any consequences. The main technical downside of this approach is that if `Math` were implemented in this way, including the `Math` module in a given object would add all the functions that can be called on the `Math` module to the public API of that object. There are workarounds to get that sort of behavior without `module_function`, but they're cumbersome and not really worth talking about. Assuming that we don't care about this subtle distinction, the following code will implement a working `SimpleLogger` singleton object that's a bit easier to reason about than the `module_function` version:

```ruby
module SimpleLogger
  extend self

  def output
    @output ||= []
  end

  def error(message)
    output << formatted_message(message, "ERROR")
  end

  def info(message)
    output << formatted_message(message, "INFO")
  end

  def write(filename)
    File.open(filename, "a") { |f| f << output.join }
  end

  private

  def formatted_message(message, message_type)
    "#{Time.now} | #{message_type}: #{message}\n"
  end
end
```

Even if the `extend self` approach is more fundamentally simple than `module_function`, it is not necessarily easy to learn or easy to understand. I go into great detail explaining exactly how this technique works in [Practicing Ruby 1.10](http://blog.rubybestpractices.com/posts/gregory/040-issue-10-uses-for-modules.html), but the two pages it takes me to explain it at a very high level serve as a hint that we're probably trying too hard to be clever when we write code this way. 

So far, we've gone down a deep bunny hole because each alternative approach we've attempted was about solving a problem with the previous implementation. But for every improvement we make, we lose something in return. Much of our struggle has to do with the costs involved in trying to implement a singleton object that conforms to the expectations we have about classes and modules. To sidestep this issue, we can think about a solution that works directly with an individual object instead. 

### Using a bare instance of `Object`

The following code shows how to implement the singleton pattern by adding methods to a bare instance of `Object`. This code may look a bit strange at first but is at its core the same as defining methods on any other Ruby object, including instances of the `Module` and `Class` classes.

```ruby
SimpleLogger = Object.new
class << SimpleLogger
  def output
    @output ||= []
  end

  def error(message)
    output << formatted_message(message, "ERROR")
  end

  def info(message)
    output << formatted_message(message, "INFO")
  end

  def write(filename)
    File.open(filename, "a") { |f| f << output.join }
  end

  private

  def formatted_message(message, message_type)
    "#{Time.now} | #{message_type}: #{message}\n"
  end
end
```

Although we didn't have to store the object in the `SimpleLogger` constant, doing so makes the familiar runner code we've been using over and over work exactly as expected.

```ruby
SimpleLogger.error("Some serious problem")
SimpleLogger.info("Something you might want to know")
SimpleLogger.write("log.txt")
```

The code looks and feels the same from the end-user's perspective, but we know that there is something quite different hiding under the hood. The good news is that this approach makes it so that our `SimpleLogger` is not a factory for creating new objects like classes are and cannot be mixed into other objects or part of a hierarchy. The bad news is that the resulting object is very opaque. When we inspect a class or module object, it at least gives us back its name. However, when we inspect this object, what we get is the following:

```
>> SimpleLogger
=> #<Object:0x0000010084bc18>
```

Documenting this object using something like RDoc would be similarly frustrating, as it wouldn't be able to infer much about the object without lots of explicit directives. Although Ruby is designed in terms of ordinary objects, its infrastructure is surely defined in terms of classes and modules.

We can't do anything about the documentation problem without major changes to Ruby, but we might be able to build our own custom construct that takes these basic ideas and adds better debugging support.

### Using a hand-rolled construct

A few days before writing this article, I complained on Twitter about the lack of a good way to implement the singleton pattern in Ruby and suggested that perhaps we needed a new first-order construct for these purposes. Someone was quick to point out that [Scala has such a construct](http://hestia.typepad.com/flatlander/2009/01/scala-for-c-programmers-part-2-singletons.html), which got the wheels turning in my head. As a point of reference, here's what the construct looks like in Scala:

```
object Universe {
  def contains(obj: Any): Boolean = true  
}

val v = Universe.contains(42)
```

Now, there is a big difference between having built-in support for something in a language and building some sort of hand-rolled approximation. However, I couldn't resist implementing a construct in Ruby that works roughly the same way as this Scala code. After some tinkering, I settled on this syntax:

```ruby
object "Universe" do
  def contains?(anything)
    true
  end
end

p Universe.contains?(42) #=> true
p Universe #=> #<Universe:2156157600>  
```

Because the call to `object` hides the actual object creation, I was able to add nice `inspect` output in a way that is transparent to the user. Similar debugging and introspection features could be added just as easily. Under the hood, I used an approach similar to working with bare objects, so I retain all the benefits of that approach while getting rid of some of the downsides. (NOTE: I've decided to leave implementing this construct as a homework exercise, but please let me know if you get stuck and want to see how I did it.)

Looking back on this code, I like the way the experiment went, but I am stuck wondering whether it makes sense to take it any farther. Without first-order support in the Ruby language for this construct, documentation would still be a struggle. Also, the awkward syntax breaks consistency with `Class` and `Module`. What we'd really want to be able to type is something like the following definition:

```ruby
object Universe 
  def contains?(anything)
    true
  end
end
```

Although the previous example is syntactically pleasing to me, I wonder if encouraging us to use more global functionality (and on a related note, more constants) is a good idea. At a minimum, such a change would need to also be mirrored in `Object.new` by adding a block form similar to the way that `Module` and `Class` work. This approach would end up looking something like this:

```ruby
universe = Object.new do
  def contains?(anything)
    true
  end
end
```

The thing we have to ask ourselves is whether these features would really make Ruby a better language to work in. To answer that question, we need to consider the costs and benefits of avoiding the singleton pattern entirely.

### Avoiding the singleton pattern entirely

Pretty much everything discussed so far is about avoiding explictly instantiating objects, which makes it possible for us to put off potentially expensive setup work and also makes it easier for us to prevent multiple instances of a given object from being instantiated within our applications. Some of our implementations do a good job of communicating these desires to the user by preventing them from creating instances. Others encourage a limited form of code reuse through module mixins, but with a number of caveats attached.

But in the end, we must not forget that the singleton pattern is essentially just a fancy way of managing global state. If we converted our `SimpleLogger` into an ordinary class and then did something like `$logger = SimpleLogger.new`, there would be marginal practical difference in the way things worked in our codebase. Things change slightly when we think of function bags like the `Math` module, but not as much as you might think. We must remember that no matter what form our singleton objects take, each one we add to our system is by definition less reusable and less testable due to its singular, global nature.

The question of whether to implement the singleton pattern really depends on the context, but it's safe to say that it's a bad default. However, this is a genuinely hard problem in object-oriented programming, which may explain why we've seen so many different attempts in Ruby without a real consensus on which way is best. We've also been unable to eliminate the pattern entirely, which is a sign that we can't simply write it off as one of those bad Java imports that real Rubyists freely ignore.

### Reflections

The process of writing this article has taught me a few things. First of all, most of the approaches we take to implement the singleton pattern are way too complicated. Although this grail quest for object-oriented purity is entertaining from an academic perspective, it isn't something we should need to think about in our daily coding lives.

That said, it seems like a first-order construct that lets us define individual objects in an elegant way would be an interesting addition to Ruby. If we gravitated toward a more prototype-based design style via these standalone objects while using modules for our code reuse, we might end up with very nice solutions that would make this "singleton object" problem just disappear. But then again, that would be a huge shift in the way we write Ruby code, and I'm not sure the juice is worth the squeeze.

In the end, it amazes me that I was able to write so much on this topic, but not in a good way. As powerful as Ruby-the-language is, it seems that we're still far from being able to balance that power with responsibility. Every approach I criticized in this article is one I've advocated for at some earlier point, and now I'm not so sure I like any of them.

My hope is that you got two things from reading this article: a deeper understanding of the complexity of Ruby's object system and an awareness of the trade-offs of various approaches to this problem, so that you can be better equipped when you encounter this sort of design in the wild. If I've accomplished that, then this article was well worth the effort it took to write up. If not, don't worry: we'll return to more practical content next week! :)


================================================
FILE: articles/v2/009-unix-style-command-line-applications.md
================================================
Ruby is best known as a web development language, but in its early days it was
mainly used on the command line. In this article, we'll get back to those roots by building a partial implementation of the standard Unix command `cat`.

The core purpose of the `cat` utility is to read in a list of input files, concatenate them, and output the resulting text to the command line. You can also use `cat` for a few other useful things, such as adding line numbers and suppressing extraneous whitespace. If we stick to these commonly used features, the core functionality of `cat` is something even a novice programmer would be able to implement without too much effort.

The tricky part of building a `cat` clone is that it involves more than just
some basic text manipulation; you also need to know about some 
stream processing and error handling techniques that are common in Unix
utilities. The [acceptance tests](https://gist.github.com/1293709) 
that I've used to compare the original `cat` utility to my Ruby-based `rcat` 
tool reveal some of the extra details that need to be considered when
building this sort of command line application.

If you are already fairly comfortable with building command line tools, you may
want to try implementing your own version of `rcat` before reading on. But don't
worry if you wouldn't even know where to start: I've provided a 
detailed walkthrough of my solution that will teach you everything 
that you need to know.

> **NOTE:** You'll need to have the source code for [my implementation of rcat](https://github.com/elm-city-craftworks/rcat) easily accessible as you work through the rest of this article. Please either clone the repository now or keep the GitHub file browser open while reading.

### Building an executable script

Our first task is to make it possible to run the `rcat` script without having to type something like `ruby path/to/rcat` each time we run it. This task can be done in three easy steps.

**1) Add a shebang line to your script.**

If you look at `bin/rcat` in my code, you'll see that it starts with the following line:

```
#!/usr/bin/env ruby
```

This line (commonly called a shebang line) tells the shell what interpreter to use to process the rest of the file. Rather than providing a path directly to the Ruby interpreter, I instead use the path to the standard `env` utility. This step allows `env` to figure out which `ruby` executable is present in our current environment and to use that interpreter to process the rest of the file. This approach is preferable because it is [more portable](http://en.wikipedia.org/wiki/Shebang_line#Portability) than hard-coding a path to a particular Ruby install. Although Ruby can be installed in any number of places, the somewhat standardized location of `env` makes it reasonably dependable.

**2) Make your script executable.**

Once the shebang line is set up, it's necessary to update the permissions on the `bin/rcat` file. Running the following command from the project root will make `bin/rcat` executable:

```
$ chmod +x bin/rcat
```

Although the executable has not yet been added to the shell's lookup path, it is now possible to test it by providing an explicit path to the executable.

```
$ ./bin/rcat data/gettysburg.txt
Four score and seven years ago, our fathers brought forth on this continent a
new nation, conceived in Liberty and dedicated to the proposition that all men
are created equal.

... continued ...
```

**3) Add your script to the shell's lookup path.**

The final step is to add the executable to the shell's lookup path so that it can be called as a simple command. In Bash-like shells, the path is updated by modifying the `PATH` environment variable, as shown in the following example:

```
$ export PATH=/Users/seacreature/devel/rcat/bin:$PATH
```

This command prepends the `bin` folder in my rcat project to the existing contents of the `PATH`, which makes it possible for the current shell to call the `rcat` command without specifying a direct path to the executable, similar to how we call ordinary Unix commands:

```
$ rcat data/gettysburg.txt
Four score and seven years ago, our fathers brought forth on this continent a
new nation, conceived in Liberty and dedicated to the proposition that all men
are created equal.

... continued ...
```

To confirm that you've followed these steps correctly and that things are working as expected, you can now run the acceptance tests. If you see anything different than the following output, retrace your steps and see whether you've made a mistake somewhere. If not, please leave a comment and I'll try to help you out.

```
$ ruby tests.rb 
You passed the tests, yay!
```

Assuming that you have a working `rcat` executable, we can now move on to talk about how the actual program is implemented.

### Stream processing techniques

We now can turn our focus to the first few acceptance tests from the _tests.rb_ file. The thing that all these use cases have in common is that they involve very simple processing of input and output streams, and nothing more. 

```ruby
cat_output  = `cat #{gettysburg_file}`
rcat_output = `rcat #{gettysburg_file}`

fail "Failed 'cat == rcat'" unless cat_output == rcat_output

############################################################################

cat_output  = `cat #{gettysburg_file} #{spaced_file}`
rcat_output = `rcat #{gettysburg_file} #{spaced_file}`

fail "Failed 'cat [f1 f2] == rcat [f1 f2]'" unless cat_output == rcat_output

############################################################################

cat_output  = `cat < #{spaced_file}`
rcat_output = `rcat < #{spaced_file}`

fail "Failed 'cat < file == rcat < file" unless cat_output == rcat_output
```

If we needed only to pass these three tests, we'd be in luck. Ruby provides a special stream object called `ARGF` that combines multiple input files into a single stream or falls back to standard input if no files are provided. Our entire script could look something like this:

```ruby
ARGF.each_line { |line| print line }
```

However, the real `cat` utility does a lot more than what `ARGF` provides,
so it was necessary to write some custom code to handle stream processing:

```ruby
module RCat
  class Application
    def initialize(argv)
      @params, @files = parse_options(argv)

      @display        = RCat::Display.new(@params)
    end

    def run
      if @files.empty?
        @display.render(STDIN)
      else
        @files.each do |filename|
          File.open(filename) { |f| @display.render(f) }
        end 
      end
    end

    def parse_options(argv)
      # ignore this for now
    end
  end
end
```

The main difference between this code and the `ARGF`-based approach is that `RCat::Application#run` creates a new stream for each file. This comes in handy later when working on support for empty line suppression and complex line numbering but also complicates the implementation of the `RCat::Display` object. In the following example, I've stripped away the code that is related to these more complicated features to make it a bit easier for you to see the overall flow of things:

```ruby
module RCat
  class Display
    def render(data)
      lines = data.each_line
      loop { render_line(lines) }
    end

    private

    def render_line(lines)
      current_line = lines.next 
      print current_line
    end
  end
end
```

The use of `loop` instead of an ordinary Ruby iterator might feel a bit strange here, but it works fairly well in combination with `Enumerator#next`. The following irb session demonstrates how the two interact with one another:

```
>> lines = "a\nb\nc\n".each_line
=> #<Enumerator: "a\nb\nc\n":each_line>
>> loop { p lines.next }
"a\n"
"b\n"
"c\n"
=> nil

>> lines = "a\nb\nc\n".each_line
=> #<Enumerator: "a\nb\nc\n":each_line>
>> lines.next
=> "a\n"
>> lines.next
=> "b\n"
>> lines.next
=> "c\n"

>> lines.next
StopIteration: iteration reached an end
  from (irb):8:in `next'
  from (irb):8
  from /Users/seacreature/.rvm/rubies/ruby-1.9.3-rc1/bin/irb:16:in `<main>'

>> loop { raise StopIteration }
=> nil
```

Using this pattern makes it possible for `render_line` to actually consume more
than one line from the input stream at once. If you work through the logic that
is necessary to get the following test to pass, you might catch a glimpse of the
benefits of this technique:

```ruby
cat_output  = `cat -s #{spaced_file}`
rcat_output = `rcat -s #{spaced_file}`

fail "Failed 'cat -s == rcat -s'" unless cat_output == rcat_output
```

Tracing the executation path for `rcat -s` will lead you to this line of code in
`render_line`, which is the whole reason I decided to use this
`Enumerator`-based implementation:

```ruby
lines.next while lines.peek.chomp.empty?
```

This code does an arbitrary amount of line-by-line lookahead until either a nonblank line is found or the end of the file is reached. It does so in a purely stateless and memory-efficient manner and is perhaps the most interesting line of code in this entire project. The downside of this approach is that it requires the entire `RCat::Display` object to be designed from the ground up to work with `Enumerator` objects. However, I struggled to come up with an alternative implementation that didn't involve some sort of complicated state machine/buffering mechanism that would be equally cumbersome to work with.

As tempting as it is to continue discussing the pros and cons of the different
ways of solving this particular problem, it's probably best for us to get back on
track and look at some more basic problems that arise when working on
command-line applications. I will now turn to the `parse_options` method that I asked you 
to treat as a black box in our earlier examples.

### Options parsing

Ruby provides two standard libraries for options parsing: `GetoptLong` and `OptionParser`. Though both are fairly complex tools, `OptionParser` looks and feels a lot more like ordinary Ruby code while simultaneously managing to be much more powerful. The implementation of `RCat::Application#parse_options` makes it clear what a good job `OptionParser` does when it comes to making easy things easy:

```ruby
module RCat
  class Application
    # other code omitted

    def parse_options(argv)
      params = {}
      parser = OptionParser.new 

      parser.on("-n") { params[:line_numbering_style] ||= :all_lines         }
      parser.on("-b") { params[:line_numbering_style]   = :significant_lines }
      parser.on("-s") { params[:squeeze_extra_newlines] = true               }
      
      files = parser.parse(argv)

      [params, files]
    end
  end
end
```

The job of `OptionParser#parse` is to take an arguments array and match it against the callbacks defined via the `OptionParser#on` method. Whenever a flag is matched, the associated block for that flag is executed. Finally, any unmatched arguments are returned. In the case of `rcat`, the unmatched arguments consist of the list of files we want to concatenate and display. The following example demonstrates what's going on in `RCat::Application`:

```ruby
require "optparse"

puts "ARGV is #{ARGV.inspect}"

params = {}
parser = OptionParser.new 

parser.on("-n") { params[:line_numbering_style] ||= :all_lines         }
parser.on("-b") { params[:line_numbering_style]   = :significant_lines }
parser.on("-s") { params[:squeeze_extra_newlines] = true               }

files = parser.parse(ARGV)

puts "params are #{params.inspect}"
puts "files are #{files.inspect}"
```

Try running this script with various options and see what you end up with. You should get something similar to the output shown here:

````
$ ruby option_parser_example.rb -ns data/*.txt
ARGV is ["-ns", "data/gettysburg.txt", "data/spaced_out.txt"]
params are {:line_numbering_style=>:all_lines, :squeeze_extra_newlines=>true}
files are ["data/gettysburg.txt", "data/spaced_out.txt"]

$ ruby option_parser_example.rb data/*.txt
ARGV is ["data/gettysburg.txt", "data/spaced_out.txt"]
params are {}
files are ["data/gettysburg.txt", "data/spaced_out.txt"]
```

Although `rcat` requires us to parse only the most basic form of arguments, `OptionParser` is capable of a whole lot more than what I've shown here. Be sure to check out its [API documentation](http://ruby-doc.org/stdlib-1.9.2/libdoc/optparse/rdoc/OptionParser.html#method-i-parse) to see the full extent of what it can do.

Now that I've covered how to get data in and out of our `rcat` application, we can talk a bit about how it does `cat`-style formatting for line numbering.

### Basic text formatting 

Formatting text for the console can be a bit cumbersome, but some things are easier than they seem. For example, the tidy output of `cat -n` shown here is not especially hard to implement:

<pre style="font-size: 0.8em">
$ cat -n data/gettysburg.txt 
   1  Four score and seven years ago, our fathers brought forth on this continent a
   2  new nation, conceived in Liberty and dedicated to the proposition that all men
   3  are created equal.
   4  
   5  Now we are engaged in a great civil war, testing whether that nation, or any
   6  nation so conceived and so dedicated, can long endure. We are met on a great
   7  battle-field of that war. We have come to dedicate a portion of that field as a
   8  final resting place for those who here gave their lives that that nation might
   9  live. It is altogether fitting and proper that we should do this.
  10  
  11  But, in a larger sense, we can not dedicate -- we can not consecrate -- we can
  12  not hallow -- this ground. The brave men, living and dead, who struggled here
  13  have consecrated it far above our poor power to add or detract. The world will
  14  little note nor long remember what we say here, but it can never forget what
  15  they did here. It is for us the living, rather, to be dedicated here to the
  16  unfinished work which they who fought here have thus far so nobly advanced. It
  17  is rather for us to be here dedicated to the great task remaining before us --
  18  that from these honored dead we take increased devotion to that cause for which
  19  they gave the last full measure of devotion -- that we here highly resolve that
  20  these dead shall not have died in vain -- that this nation, under God, shall
  21  have a new birth of freedom -- and that government of the people, by the people,
  22  for the people, shall not perish from the earth.
</pre>

On my system, `cat` seems to assume a fixed-width column with space for up to six digits. This format looks great for any file with fewer than a million lines in it, but eventually breaks down once you cross that boundary.

```
$ ruby -e "1_000_000.times { puts 'blah' }" | cat -n | tail
999991    blah
999992    blah
999993    blah
999994    blah
999995    blah
999996    blah
999997    blah
999998    blah
999999    blah
1000000    blah
```

This design decision makes implementing the formatting code for this feature a whole lot easier. The `RCat::Display#print_labeled_line` method shows that it's possible to implement this kind of formatting with a one-liner:

```ruby
def print_labeled_line(line)
  print "#{line_number.to_s.rjust(6)}\t#{line}" 
end
```

Although the code in this example is sufficient for our needs in `rcat`, it's worth mentioning that `String` also supports the `ljust` and `center` methods. All three of these justification methods can optionally take a second argument, which causes them to use an arbitrary string as padding rather than a space character; this feature is sometimes useful for creating things like ASCII status bars or tables.

I've worked on a lot of different command-line report formats before, and I can tell you that streamable, fixed-width output is the easiest kind of reporting you'll come by. Things get a lot more complicated when you have to support variable-width columns or render elements that span multiple rows and columns. I won't get into the details of how to do those things here, but feel free to leave a comment if you're interested in hearing more on that topic.

### Error handling and exit codes

The techniques we've covered so far are enough to get most of `rcat`'s tests passing, but the following three scenarios require a working knowledge of how Unix commands tend to handle errors. Read through them and do the best you can to make sense of what's going on.

```ruby
`cat #{gettysburg_file}`
cat_success = $?

`rcat #{gettysburg_file}`
rcat_success = $?

unless cat_success.exitstatus == 0 && rcat_success.exitstatus == 0
  fail "Failed 'cat and rcat success exit codes match"
end

############################################################################

cat_out, cat_err, cat_process    = Open3.capture3("cat some_invalid_file")
rcat_out, rcat_err, rcat_process = Open3.capture3("rcat some_invalid_file") 

unless cat_process.exitstatus == 1 && rcat_process.exitstatus == 1
  fail "Failed 'cat and rcat exit codes match on bad file"
end

unless rcat_err == "rcat: No such file or directory - some_invalid_file\n"
  fail "Failed 'cat and rcat error messages match on bad file'"
end

############################################################################


cat_out, cat_err, cat_proccess  = Open3.capture3("cat -x #{gettysburg_file}")
rcat_out,rcat_err, rcat_process = Open3.capture3("rcat -x #{gettysburg_file}") 

unless cat_process.exitstatus == 1 && rcat_process.exitstatus == 1
  fail "Failed 'cat and rcat exit codes match on bad switch"
end

unless rcat_err == "rcat: invalid option: -x\nusage: rcat [-bns] [file ...]\n"
  fail "Failed 'rcat provides usage instructions when given invalid option"
end
```

The first test verifies exit codes for successful calls to `cat` and `rcat`. In Unix programs, exit codes are a means to pass information back to the shell about whether a command finished successfully. The right way to signal that things worked as expected is to return an exit code of 0, which is exactly what Ruby does whenever a program exits normally without error.

Whenever we run a shell command in Ruby using backticks, a `Process::Status` object is created and is then assigned to the `$?` global variable. This object contains (among other things) the exit status of the command that was run. Although it looks a bit cryptic, we're able to use this feature to verify in our first test that both `cat` and `rcat` finished their jobs successfully without error.

The second and third tests require a bit more heavy lifting because in these scenarios, we want to capture not only the exit status of these commands, but also whatever text they end up writing to the STDERR stream. To do so, we use the `Open3` standard library. The `Open3.capture3` method runs a shell command and then returns whatever was written to STDOUT and STDERR, as well as a `Process::Status` object similar to the one we pulled out of `$?` earlier. 

If you look at _bin/rcat_, you'll find the code that causes these tests to pass:

```ruby
begin
  RCat::Application.new(ARGV).run
rescue Errno::ENOENT => err
  abort "rcat: #{err.message}"
rescue OptionParser::InvalidOption => err
  abort "rcat: #{err.message}\nusage: rcat [-bns] [file ...]"
end
```

The `abort` method provides a means to write some text to STDERR and then exit with a nonzero code. The previous code provides functionality equivalent to the following, more explicit code:

```ruby
begin
  RCat::Application.new(ARGV).run
rescue Errno::ENOENT => err
  $stderr.puts "rcat: #{err.message}"
  exit(1)
rescue OptionParser::InvalidOption => err
  $stderr.puts "rcat: #{err.message}\nusage: rcat [-bns] [file ...]"
  exit(1)
end
```

Looking back on things, the errors I've rescued here are somewhat low level, and
it might have been better to rescue them where they occur and then reraise
custom errors provided by `RCat`. This approach would lead to code similar to
what is shown below:

```ruby
begin
  RCat::Application.new(ARGV).run
rescue RCat::Errors::FileNotFound => err
  # ...
rescue RCat::Errors::InvalidParameter => err
  # ..
end
```

Regardless of how these exceptions are labeled, it's important to note that I intentionally let them bubble all the way up to the outermost layer and only then rescue them and call `Kernel#exit`. Intermingling `exit` calls within control flow or modeling logic makes debugging nearly impossible and also makes automated testing a whole lot harder.

Another thing to note about this code is that I write my error messages to `STDERR` rather than `STDOUT`. Unix-based systems give us these two different streams for a reason: they let us separate debugging output and functional output so that they can be redirected and manipulated independently. Mixing the two together makes it much more difficult for commands to be chained together in a pipeline, going against the [Unix philosophy](http://en.wikipedia.org/wiki/Unix_philosophy).

Error handling is a topic that could easily span several articles. But when it comes to building command-line applications, you'll be in pretty good shape if you remember just two things: use `STDERR` instead of `STDOUT` for debugging output, and make sure to exit with a nonzero status code if your application fails to do what it is supposed to do. Following those two simple rules will make your application play a whole lot nicer with others.

### Reflections

Holy cow, this was a hard article to write! When I originally decided to write a `cat` clone, I worried that the example would be too trivial and boring to be worth writing about. However, once I actually implemented it and sat down to write this article, I realized that building command-line applications that respect Unix philosophy and play nice with others is harder than it seems on the surface.

Rather than treating this article as a definitive reference for how to build good command-line applications, perhaps we can instead use it as a jumping-off point for future topics to cover in a more self-contained fashion. I'd love to hear your thoughts on what topics in particular interested you and what areas you think should have been covered in greater detail.

> NOTE: If you'd like to learn more about this topic, consider doing the Practicing Ruby self-guided course on [Streams, Files, and Sockets](https://practicingruby.com/articles/study-guide-1?u=dc2ab0f9bb). You've already completed one of its reading exercises by working through this article!


================================================
FILE: articles/v2/010-from-requirements-discovery-to-release.md
================================================
Every time we start a greenfield software project, we are faced with the overwhelming responsibility of creating something from nothing. Because the path from the requirements discovery phase to the first release of a product has so many unexpected twists and turns, the whole process can feel a bit unforgiving and magical. This feeling is a big part of what makes programming hard, even for experienced developers.

For the longest time, I relied heavily on my intuition to get myself kick-started on new projects. I didn't have a clear sense of what my creative process was, but I could sense that my fear of the unknown started to melt away as I gained more experience as a programmer. Having a bit of confidence in my own abilities made me more productive, but not knowing where that confidence came from made it impossible for me to cultivate it in others. Treating my creative process as a black box also made it meaningless for me to compare my approach to anyone else's. Eventually, I got fed up with these limitations and decided that I wanted to do something to overcome them.

My angle of approach was fairly simple: I decided to take a greenfield project from the idea phase to an initial open source release while documenting the entire process. I thought this information might provide a useful starting point for identifying patterns in how I work and also a basis of comparison for other folks. As I reviewed my notes from this exercise and compared them to my previous experiences, I was thrilled to see that a clear pattern did emerge. This article summarizes what I learned about my own process; I hope it will also be helpful to you.

### Brainstorming for project ideas

The process of coming up with an idea for a software project (or perhaps any creative work) is highly dynamic. The best ideas tend to evolve quite a bit from whatever the original spark of inspiration was. If you are not constrained to solving a particular problem, it can be quite rewarding to allow yourself to wander a bit and see where you end up. Evolving an idea is like starting with a base recipe for a dish and then tweaking a couple ingredients at a time until you end up with something delicious. The story of how this particular project started should illustrate just how much mutation can happen in the early stages of creating something new.

A few days before writing this article, I was trying to come up with ideas for another Practicing Ruby article I had planned to write. I wanted to do something on event-driven programming and thought that some sort of tower defense game might be a fun example to play with. However, the ideas I had in mind were too complicated, so I gradually simplified my game ideas until they turned into something vaguely resembling a simple board game.

Eventually, I forgot that my main goal was to get an article written and decided to focus on developing my board game ideas instead. With my wife's help, over the course of a weekend I managed to come up with a fairly playable board game that bore no resemblence to a tower defense game and would serve as a terrible event-driven programming exercise. However, I still wanted to implement a software version of the game because it would make the experience much easier for us to analyze and share with others.

My intuition said that the project would take me a day or so to build and that it'd be sufficiently interesting to take notes on for my "documenting the creative process" exercise. This gut feeling was enough to convince me to take the plunge, so I cleared the whiteboards in my office in preparation for an impromptu design session.

### Establishing the 10,000-foot view

Whether you're building a game or modeling a complex business process, you need to define lots of terms before you can go about describing the interactions of your system. When you consider the fact that complex dependencies can make it hard to change names later, it's hard to overstate the importance of this stage of the process. For this reason, it's always a good idea to start a new project by defining some terms for some of the most important components and interactions that you'll be working with. My first whiteboard sketch focused on exactly that:

<div align="center">
<img src="http://farm7.static.flickr.com/6229/6283525185_35bd4c96a8_z.jpg">
</div>

Having a sense of the overall structure of the game in somewhat more formal terms made it possible for me to begin mapping these concepts onto object relationships. The following image shows my first crack at figuring out what classes I'd need and how they would interact with each other:

<div align="center">
<img src="http://farm7.static.flickr.com/6049/6283524127_032ab93d77_z.jpg">
</div>

It's worth noting that in both of these diagrams, I was making no attempt at being exhaustive, nor was I expecting these designs to survive beyond an initial spike. But because moving boxes and arrows around on a whiteboard is easier than rewriting code, I tend to start off any moderately complex project this way.

With just these two whiteboard sketches, I had most of what I needed to start coding. The only important thing left to be done before I could fire up my text editor was coming up with a suitable name for the game. After trying and failing at finding a variant of "All your base" that wasn't an existing gem name, I eventually settled on "Stack Wars." I picked this name because 
a big part of the physical game has to do with building little stacks of army tiles in the territories you control. Despite the fact that the name doesn't mean much in the electronic version, it was an unclaimed name that could easily be _CamelCased_ and _snake_cased_, so I decided to go with it.

As important as naming considerations are, getting bogged down in them can be just as harmful as paying no attention to the problem at all. For this reason, I decided to leave some of the details of the game in my head so that I could postpone some naming decisions until I saw how the code was coming together. That decision allowed me to start coding a bit earlier at the cost of having a bit of an incomplete roadmap.

### Picking some low-hanging fruit

Every time I start a new project, I try to identify a small task that I can finish quickly so that I can get some instant gratification. I find an early success to be important for my morale, and it also serves as a gentle way to test some of my assumptions about the project.

I try to avoid starting with the boring stuff like setting up boilerplate code and building trivial container objects. Instead, I typically attempt to build a small but useful end-to-end feature. For the purposes of this game, an ASCII representation of the battlefield seemed like a good place to start. I started this task by creating a file called _sample_ui.txt_ with the contents you see here:

```
       0      1      2      3      4      5      6      7      8 
    BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
 0  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 1  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 2  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 3  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 4  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 5  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 6  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 7  (B 2)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
      |      |      |      |      |      |      |      |      |
 8  (___)--(W 2)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
    WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW
```

In order to implement this visualization, I needed to make some decisions about how the battlefield data was going to be represented, but I wanted to defer as much of that work as possible. After [asking for some feedback about this problem](https://gist.github.com/1310883), I opted to write the visualization code against a simple array of arrays of Ruby primitives that could be trivially be transformed to and from JSON. Within a few minutes, I had a script that was generating similar output to my original sketch:

```ruby
require "json"

data = JSON.parse(File.read(ARGV[0]))

color_to_symbol = { "black" => "B", "white" => "W" }

header    = "       0      1      2      3      4      5      6      7      8\n"
separator = "       |      |      |      |      |      |      |      |      |\n"

border_b  = "     BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n"
border_w  = "     WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW\n"

battlefield_text = data.map.with_index do |row, row_index|
  row_text = row.map do |color, strength|
    if color == "unclaimed"
      "(___)"
    else
      "(#{color_to_symbol[color]}#{strength.to_s.rjust(2)})"
    end
  end.join("--")

  "#{row_index.to_s.rjust(3)}  #{row_text}\n"
end.join(separator)

puts [header, border_b, battlefield_text, border_w].join
```

Although this script is a messy little hack, it got me started on the project in a way that was immediately useful. In the process of creating this visualization tool, I ended up thinking about a lot of tangentially related issues. In particular, I started to brainstorm about the following topics:

* What fixture data I would need for testing various game actions
* What the coordinate system for the `Battlefield` would be
* What data the `Territory` object would need to contain
* What format to use for inputting moves via the command-line interface

The fact that I was thinking about all of these things was a sign that my initial spike was successful. However, it was also a sign that I should spend some time laying out the foundation for a real object-oriented project rather than continuing to hack things together as if I were writing a ball of Perl scripts.

### Laying out some scaffolding

Although you don't necessarily need to worry about writing super-clean code for a first release of a project, it is important to at least lay down the basic groundwork, which makes it possible to replace bad code with good code later. By introducing a `TextDisplay` object, I was able to reduce the _stackwars-viewer_ script to the following code:

```ruby
#!/usr/bin/env ruby

require "json"
require_relative "../lib/stack_wars"

data = JSON.parse(File.read(ARGV[0]))

puts StackWars::TextDisplay.new(data)
```

After the initial extraction of the code from my script, I thought about how much time I wanted to invest in refactoring `TextDisplay`. I ended up deciding that because this game will eventually have a GUI that completely replaces its command-line interface, I shouldn't put too much effort into code that would soon be deleted. However, I couldn't resist making it at least a tiny bit more readable for the time being:

```ruby
module StackWars
  class TextDisplay
    COLOR_SYM = { "black" => "B", "white" => "W" }
    HEADER    = "#{' '*7}#{(0..8).to_a.join(' '*6)}"
    SEPARATOR = "#{' '*6} #{9.times.map { '|' }.join(' '*6)}"

    BLACK_BORDER  = "#{' '*5}#{COLOR_SYM['black']*61}"
    WHITE_BORDER  = "#{' '*5}#{COLOR_SYM['white']*61}"

    def initialize(battlefield)
      @battlefield = battlefield
    end

    def to_s
      battlefield_text = @battlefield.map.with_index do |row, row_index|
        row_text = row.map do |color, strength|
          if color == "unclaimed"
            "(___)"
          else
            "(#{COLOR_SYM[color]}#{strength.to_s.rjust(2)})"
          end
        end.join("--")

        "#{row_index.to_s.rjust(3)}  #{row_text}\n"
      end.join("#{SEPARATOR}\n")

      [HEADER, BLACK_BORDER, battlefield_text.chomp, WHITE_BORDER].join("\n")
    end
  end
end
```

After writing this code, I wondered whether I should tackle the building of a proper `Battlefield` class that would take the raw data for each cell and wrap it in a `Territory` object. I was hesitant to make both of these changes at once, so I ended up compromising by creating a `Battlefield` class that simply wrapped the nested array of primitives for now:

```ruby
module StackWars
  class Battlefield
    def self.from_json(json_file)
      new(JSON.parse(File.read(json_file)))
    end

    def initialize(territories)
      @territories = territories
    end

    def to_a
      Marshal.load(Marshal.dump(@territories))
    end

    # loses instance variables, but better than hitting to_s() by default
    alias_method :inspect, :to_s

    def to_s
      TextDisplay.new(to_a).to_s
    end
  end
end
```

With this new object in place, I was able to further simplify the _stackwars-viewer_ script, leading to the trivial code shown here:

```ruby
require_relative "../lib/stack_wars"

puts StackWars::Battlefield.from_json(ARGV[0])
```

The benefit of doing these minor extractions is that it makes it possible to focus on the relationships between the objects in a system rather than their implementations. You can always refactor implementation code later, but interfaces are hard to untangle once you start wiring things up to them. This is why it is important to start thinking about the ingress and egress points of your objects as early as possible, even if you're still allowing yourself to write quick and dirty implementation code.

The benefits of laying the proper groundwork for your project and keeping things nicely organized are hard to see in the early stages but are extremely clear later when things get more complex.

### Starting to chip away at the hard parts

Unless you are an incredibly good software designer, odds are good that some aspects of your project will be harder to work on than others. There is even a funny quote that hints at this phenomenon: _"The first 90 percent of the code accounts for the first 90 percent of the development time. The remaining 10 percent of the code accounts for the other 90 percent of the development time."_

To avoid this sort of situation, it is important to maintain a balance between easy tasks and more difficult tasks. Starting a project with an easy task is a great way to get the ball rolling, but if you don't tackle some challenging aspects of your project early on, you may find yourself having to rewrite a ton of code later. The hard parts of your project are what test your overall design as well as your understanding of the problem domain.

With this in mind, I knew it was time to take a closer look at some of the game actions in Stack Wars. Because the FORTIFY action must be implemented before any of the other game actions become meaningful, I decided to start there. The following code was my initial stab at figuring out what I needed to build in order to get this feature working:

```ruby
def fortify(position)
  position.add_army(active_player.color)
  active_player.reserves -= 1
end
```

Until this point in the project, I had been avoiding writing formal tests because I had a mixture of trivial code and throwaway code. But now that I was about to work on some Serious Business, I decided to try test-driving things. After a fair amount of struggling, I decided to add _mocha_ into the mix and begin test-driving a `Game` class through the use of mock objects:

```ruby
require_relative "../test_helper"

describe "StackWars::Game" do

  let(:territory)   { mock("territory") }
  let(:battlefield) { mock("battlefield") }

  subject { StackWars::Game.new(battlefield) }

  it "must be able to alternate players" do
    subject.active_player.color.must_equal :black

    subject.start_new_turn
    subject.active_player.color.must_equal :white

    subject.start_new_turn
    subject.active_player.color.must_equal :black
  end

  it "must be able to fortify positions" do
    subject.expects(:territory_at).with([0,1]).returns(territory)
    territory.expects(:fortify).with(subject.active_player)

    subject.fortify([0,1])
  end
end
```

Taking this approach made it possible for me to test whether the `Game` class was able to delegate `fortify` calls to territories, even though I had not yet implemented the `Territory` class. It gave me a pretty nice way to look at the problem from the outside in and resulted in a clean-looking `Game` class:

```ruby
module StackWars
  class Game
    def initialize(battlefield)
      @players         = [Player.new("black"), Player.new("white")].cycle
      @battlefield     = battlefield
      start_new_turn 
    end

    attr_reader :active_player

    def fortify(position)
      territory = territory_at(position)     
      
      territory.fortify(active_player)
    end

    def start_new_turn
      @active_player  = @players.next
    end

    private

    def territory_at(position)
      @battlefield[*position]
    end
  end
end
```

However, the problem remained that this code hinged on a number of features that were not implemented yet. This frustration caused me to begin working on getting the basic functionality in place for a `Territory` class without writing tests for its behaviors up front. I used a combination of the _stackwars-viewer_ tool and irb to verify that the `Territory` objects that I had shoehorned into the system were working as expected.

After making it so that the `Battlefield` object contained a nested array of `Territory` objects, I went back and wrote some unit tests for `Territory`. The tests ended up being fairly long and tedious, but the implementation code for `Territory#fortify` ended up being quite simple and worked as expected:

```ruby
module StackWars
  class Territory
    # other methods omitted

    def fortify(player)
      if controlled_by?(player)
        player.deploy_army

        @army_strength += 1
        @occupant ||= player.color
      else
        raise Errors::IllegalMove
      end
    end
  end
end
```

Getting the `Territory` tests to go green felt good, but I wasn't satisfied. Now that I had implemented a game action, I wanted to see it in real use. This itch lead me to write a simple script that simulated players fortifying their positions, which resulted in the following output:

```
       0      1      2      3      4      5      6      7      8
     WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW
  0  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  1  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  2  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  3  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  4  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  5  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  6  (B 2)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  7  (___)--(W 2)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  8  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
     BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB

Fortifying black position at (0,6)

       0      1      2      3      4      5      6      7      8
     WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW
  0  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  1  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  2  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  3  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  4  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  5  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  6  (B 3)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  7  (___)--(W 2)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  8  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
     BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB

Fortifying white baseline position at (2,0)

       0      1      2      3      4      5      6      7      8
     WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW
  0  (___)--(___)--(W 1)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  1  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  2  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  3  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  4  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  5  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  6  (B 3)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  7  (___)--(W 2)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
       |      |      |      |      |      |      |      |      |
  8  (___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)--(___)
     BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
```

Seeing this example run gave me a great feeling because it was the first sign that something resembling a game was under development. However, the path to get to this point was long and arduous, even though this was by far the easiest action to implement. 

### Slamming into the wall

There comes a time in every reasonably complicated project at which you end up biting off more than you can chew. The exact reason for this will vary from project to project: perhaps you overlooked something in your design, or misunderstood a key requirement, or maybe you just let your code get too messy and it reached a point where it could no longer be extended without heavy refactoring. This sort of thing is normal and to be expected, but is also a critical turning point in your project. 

If you aren't careful, the setbacks caused by slamming into the wall can really shake your spirits and negatively affect your productivity. Having unrealistic expectations that a certain task will be easy to complete is a surefire way to trigger this effect. That's what happened to me on this project; I hope the following story will serve as a cautionary tale.

After implementing the FORTIFY action, I thought I would be able to repeat the process for MOVE, ATTACK, and INVADE. Anticipating this approach, I roughed out a replacement for `Game#fortify()` called `Game#play()`, which took into account all game actions and selected the right one based on the circumstances:

```ruby
def play(pos1, pos2=nil)
  if pos2.nil?
    territory = territory_at(pos1)
    
    territory.fortify(active_player)
  else
    from = territory_at(pos1)
    to = territory_at(pos2)

    raise Errors::IllegalMove unless battlefield.adjacent?(from, to)

    if to.occupied_by?(opponent)
      attack(from, to)
    else
      move_army(from, to)
    end
  end
  
  start_new_turn
end
```

However, as soon as I looked at this method definition, I knew for sure that testing this code with mocks would be at best brittle and at worst downright misleading. On top of that, the code introduced several new concepts that would need to trickle down into the rest of the system. I tried to think through how to simplify things so that this could be more easily tested, but quickly grew frustrated and ended up abandoning the idea of test-driving this functionality.

Instead, I decided that the problem was that I didn't have a running game console that displayed the battlefield and accepted inputs. I thought that even a buggy system that allowed me to interact with the game in a tangible way would be better than writing a ton of tedious tests against what might end up being the wrong interface. This decision caused me to begin to modify the system in any number of haphazard ways until I got a functioning game console.

I did eventually get something working, but it was so fragile that I ended up enlisting my wife's help in QA testing it until it sort of looked and felt like a working game. Unfortunately, the hot fixes I was applying while she found new ways to break things caused more bugs to surface. Eventually, I gave up on the project for the evening and decided to come back to it with fresh eyes in the morning.

### Searching for a pragmatic middle path

Conventional wisdom says that if a particular bit of code is especially hard to test, a structural flaw in your design might be to blame. Because testability and extensibility are linked together, it is a good idea to listen to your tests when they make life hard for you. But there certainly are times when we need to temporarily sacrifice purity in the name of practicality.

The fact that I had a working Stack Wars implementation but no confidence that it wasn't super buggy left me in a sticky position: I wanted to make sure that the code would stabilize, but I didn't want to rework the design from scratch. The base design of the code was more than good enough for a first release; I just wanted to iron the wrinkles out and find a way to refactor with a bit of confidence that each change I made wasn't going to break everything all over again.

To accomplish this goal, I started by making my manual testing process more efficient. I made it so that my game console would fire up a game in which each player had only 2 armies in reserve rather than 27. This change made it possible to play an entire game in a fraction of the time that a real game would take, but still allowed me to test all the game actions. I used this faster manual feedback loop to quickly eliminate the bugs that I had introduced the night before, and I also tried to be more careful about the fixes I applied.

Once I got things to a reasonable level of stability, I realized that I could then build a fairly good integration test by replaying moves from a real, complete game. After a few more tweaks, my wife and I managed to make it through a game without a crash. I then set up a demo script that would replay these moves one by one until it reached the end of the game. Once I got that stage working, I extracted it into an integration test that reads the moves from a JSON file, calls `Game#play` for each one, and then does some validations to make sure the game ended as expected:

```ruby
describe "A full game of stack wars" do
  let(:moves) do
    moves_file = "#{File.dirname(__FILE__)}/../fixtures/moves.json"
    JSON.load(File.read(moves_file))
  end

  let(:battlefield) { StackWars::Battlefield.new }

  let(:game) { StackWars::Game.new(battlefield) }

  it "must end as expected" do
    message = catch(:game_over) do
      moves.each { |move| game.play(*move) }
    end

    message.must_equal "white won!"

    white = game.active_player
    black = game.opponent

    battlefield.deployed_armies(white).must_equal(4)
    battlefield.deployed_armies(black).must_equal(0)

    white.reserves.must_equal(0)
    black.reserves.must_equal(0)

    white.successful_invasions.must_equal(6)
    black.successful_invasions.must_equal(4)
  end
end
```

Having this integration test in place will make it possible for me to come back later and refactor the codebase to make it more testable without the fear of breaking things. Although unit tests offer more in the way of documenting how the codebase is meant to work and provide more precisely located feedback upon failure, this single test is good enough to ensure that I don't introduce new critical bugs into the application without noticing them.

In retrospect, it seems like integration testing is more important than exhaustive unit testing in the very early phases of a hard-to-test project. It is less of a time investment to create some black box testing and such testing is less likely to be thrown out as subsystems change rapidly during the initial phases of development.

### Shipping the 0.1.0 release

It is important to remember that a 0.1.0 release of an open source project is basically just a way to communicate an idea to your fellow programmers. If you label a release 0.1.0, no one is going to expect feature completeness, stability, or even a particularly high level of code quality. What they will expect is for you to have attempted to make your project comprehensible to them and ideally to have done a good job of making it easy to get involved in your project. I tried to keep these priorities in mind while preparing Stack Wars for release. 

Adding the full game test was an important first step for making the codebase release-ready. People who try out the game are going to want to be able to submit bug reports and possibly add new bits of functionality. Without tests to verify that their changes don't break stuff, it would be much harder to safely accept their contributions.

Some additional code cleanup was also necessary: I removed a bunch of tests and examples that were no longer relevant and shifted around some of the code within each class to make it easier to read. In general, it is a good idea to remove anything that is not actively in use, as well as anything that isn't quite working correctly whenever you release code. This step lessens the chances of confusion and frustration when someone tries to read your code. 

I did not bother with API documentation just yet because so much is still subject to change, but I did write up a basic [README](https://github.com/sandal/stack_wars/blob/master/README.md) with instructions for those who want to play-test the game as well as those who might want to hack on its code. I also wrote a detailed writeup of the [game rules](https://github.com/sandal/stack_wars/blob/master/RULES.txt) because folks will need to learn the game before they can understand how this program works.

In addition to documentation and cleanup, I did what I could to make it very easy to try out the game. Running `stack_wars rules` will display the game rules so that you don't need to go back to the source code or your web browser to know how the game is played. Additionally, I made it possible to run through the sample game that Jia and I played just by typing `stack_wars demo`. The sole reason these features exist is to make the software more accessible to new users, which I hope will increase the chance that those users become contributors at some point in the future. But even if most people download my software without ever contributing to it in some way, I still care a lot about the experience they have while using something I created.

You can try things out for yourself by following the instructions in the [README](https://github.com/sandal/stack_wars/blob/master/README.md); this video will give you a sense of what the first release of this project ended up looking like:

<div align="center">
<iframe width="640" height="480" src="//www.youtube.com/embed/oom5n4Azq2I?rel=0" frameborder="0" allowfullscreen></iframe>
</div>

Although in the grand scheme of things it may not look like much, I am pretty happy with it for a 0.1.0 release!

### Reflections

The more I think about it, the more I realize that the cycle I've outlined in this article is pretty much the one I go through whenever I'm starting a new project. There are some things about my process that I like, and some things that I don't. However, knowing that there is a pattern I tend to follow makes me think that I can now work towards optimizing it over time.

The thing I found fascinating about this exercise is that it really drove home the point that software development is about a lot more than just writing code. There are a whole range of skills involved in bringing a software project from the idea phase to even the most humble first release, and it seems like it'd be good for us to spend time optimizing the whole process rather than just our purely code-oriented skills.

Because I've never attempted anything quite like this exercise before, I'm really curious to hear your thoughts on this article. Please leave a comment, even if you're the type that typically lurks, with whatever your initial gut reaction may be. If this is a topic that interests you, please also share your thoughts on how we might be able to dig even deeper in future experiments.


================================================
FILE: articles/v2/011-domain-specific-api-construction.md
================================================
Many people are attracted to Ruby because of its flexible syntax. Through various tricks and techniques, it is possible for Ruby APIs to blend seamlessly into a wide range of domains. 

In this article, we will investigate how domain-specific APIs are constructed by implementing simplified versions of popular patterns seen in the wild. Hopefully, this exploration will give you a better sense of the tools you have at your disposal as well as a chance to see various Ruby metaprogramming concepts being used in context.

### Implementing `attr_accessor`

One of the first things every beginner to Ruby learns is how to use `attr_accessor`. It has the appearance of a macro or keyword, but is actually just an ordinary method defined at the module level. To illustrate that this is the case, we can easily implement it ourselves.

```ruby
class Module
  def attribute(*attribs)
    attribs.each do |a|
      define_method(a) { instance_variable_get("@#{a}") }
      define_method("#{a}=") { |val| instance_variable_set("@#{a}", val) }
    end
  end
end

class Person
  attribute :name, :email
end

person = Person.new
person.name = "Gregory Brown"

p person.name #=> "Gregory Brown"
```

In order to understand what is going on here, you need to think a little bit about what a class object in Ruby actually is. A class object is an instance of the class called `Class`, which is in turn a subclass of the `Module` class. When an instance method is added to the `Module` class definition, that method becomes available as a class method on all classes.

At the class/module level, it is possible to call `define_method`, which will in turn add new instance methods to the class/module that it gets called on. So when the `attribute()` method gets called on `Person`, a pair of methods get defined on `Person` for each attribute. The end result is that functionally equivalent code to what is shown below gets dynamically generated:

```ruby
class Person
  def name
    @name
  end

  def name=(val)
    @name = val
  end

  def email
    @email
  end

  def email=(val)
    @email = val
  end
end
```

This is powerful stuff. As soon as you recognize that things like `attr_accessor` are not some special keywords or macros that only the Ruby interpreter can define, a ton of possibilities open up.

### Implementing a Rails-style `before_filter` construct

Rails uses class methods all over the place to make it look and feel like its own dialect of Ruby. As a single example, it is possible to register callbacks to be run before a given controller action is executed using the `before_filter` feature. The simplified example below is a rough approximation of what this functionality looks like in Rails.

```ruby
class ApplicationController < BasicController
  before_filter :authenticate

  def authenticate
    puts "authenticating current user"
  end
end

class PeopleController < ApplicationController 
  before_filter :locate_person, :only => [:show, :edit, :update]

  def show
    puts "showing a person's data"
  end

  def edit
    puts "displaying a person edit form"
  end

  def update
    puts "committing updated person data to the database"
  end

  def create
    puts "creating a new person"
  end

  def locate_person
    puts "locating a person"
  end
end
```

Suppose that `BasicController` provides us with the `before_filter` method as well as an `execute` method which will execute a given action, but first trigger any `before_filter` callbacks. Then we'd expect the `execute` method to have the following behavior. 

```ruby
controller = PeopleController.new

puts "EXECUTING SHOW"
controller.execute(:show)

puts

puts "EXECUTING CREATE"
controller.execute(:create)

=begin -- expected output --

EXECUTING SHOW
authenticating current user
locating a person
showing a person's data

EXECUTING CREATE
authenticating current user
creating a new person 

=end 
```

Implementing this sort of behavior isn't as trivial as implementing a clone of `attr_accessor`, because in this scenario we need to manipulate some class level data. Things are further complicated by the fact that we want filters to propagate down through the inheritance chain, allowing a given class to apply both its own filters as well as the filters of its ancestors. Fortunately, Ruby provides facilities to deal with both of these concerns, resulting in the following implementation of our `BasicController` class:

```ruby
class BasicController
  def self.before_filters
    @before_filters ||= []
  end

  def self.before_filter(callback, params={})
    before_filters << params.merge(:callback => callback)
  end

  def self.inherited(child_class)
    before_filters.reverse_each { |f| child_class.before_filters.unshift(f) }
  end

  def execute(action)
    matched_filters = self.class.before_filters.select do |f| 
      f[:only].nil? || f[:only].include?(action) 
    end

    matched_filters.each { |f| send f[:callback] }
    send(action)
  end
end
```

In this code, we store our filters as an array of hashes on each class, and use the `before_filters` method as a way of lazily initializing that array. Whenever a subclass gets created, the parent class copies its filters to the front of list of filters that the child class will continue to build up. This allows downward propagation of filters through the inheritance chain. If that sounds confusing, exploring in irb a bit might help clear up what ends up happening as a result of this `inherited` hook.

```
>> BasicController.before_filters.map { |e| e[:callback] }
=> []
>> ApplicationController.before_filters.map { |e| e[:callback] }
=> [:authenticate]
>> PeopleController.before_filters.map { |e| e[:callback] }
=> [:authenticate, :locate_person]
```

From here, it should be pretty easy to see how the `execute` method works. It simply looks up this list of filters and selects the ones relevant to the provided action. It then uses `send` to call each callback that was matched, and finally calls the target action. 

While we've only gone through two examples of class level macros so far, the techniques used between the two of them cover most of what you'll need to know to understand virtually all uses of this pattern in other scenarios. If we really wanted to dig in deeper, we could go over some other tricks such as using class methods to mix modules into classes on-demand (a common pattern in Rails plugins), but instead I'll leave those concepts for you to explore on your own and move on to some other interesting patterns.

### Implementing a cheap counterfeit of Mail's API

Historically, sending email in Ruby has always been an ugly and cumbersome process. However, the Mail gem changed all of that not too long ago. Using Mail, sending a message can be as simple as the code shown below.

```ruby
Mail.deliver do
  from    "gregory.t.brown@gmail.com"
  to      "test@test.com"
  subject "Hello world"
  body    "Hi there! This isn't spam, I swear"
end
```

The nice thing about Mail is that in addition to this convenient syntax, it is still possible to work with a more ordinary looking API as well.

```ruby
mail         = Mail::Message.new

mail.from    = "gregory.t.brown@gmail.com"
mail.to      = "test@test.com"
mail.body    = "Hi there! This isn't spam, I swear"
mail.subject = "Hello world"

mail.deliver
```

If we ignore the actual sending of email and focus on the interface to the object, implementing a dual purpose API like this is surprisingly easy. The code below defines a class that provides a matching API to the examples shown above.

```ruby
class FakeMail
  def self.deliver(&block)
    mail = Message.new(&block)
    mail.deliver
  end

  class Message
    def initialize(&block)
      instance_eval(&block) if block
    end

    attr_writer :from, :to, :subject, :body

    def from(text=nil)
      return @from unless text 

      self.from = text
    end

    def to(text=nil)
      return @to unless text

      self.to = text
    end

    def subject(text=nil)
      return @subject unless text

      self.subject = text
    end

    def body(text=nil)
      return @body unless text

      self.body = text
    end

    # this is just a placeholder for a real delivery method
    def deliver
      puts "Delivering a message from #{from} to #{to} "+
      "with the subject '#{subject}' and "+
      "the body '#{body}'"
    end
  end
end
```

There are only two things that make this class definition different from that of the ordinary class definitions we see in elementary Ruby textbooks. The first is that the constructor for `FakeMail::Message` accepts an optional block to run through `instance_eval`, and the second is that the class provides accessor methods which can act as both a reader and a writer depending on whether an argument is given or not. These two special features go hand in hand, as the following example demonstrates:

```ruby
  FakeMail.deliver do 
    # this looks ugly, but would be necessary if using ordinary attr_accessors
    self.from = "gregory.t.brown@gmail.com"

  end

  mail = FakeMail::Message.new

  # when you strip away the syntactic sugar, this looks ugly too.
  mail.from "gregory.t.brown@gmail.com"
```

This approach to implementing this pattern is fairly common and shows up in a lot of different Ruby projects, including my own libraries. By accepting a bit more complexity in our accessor code, we end up with a more palatable API in both scenarios, and it feels like a good trade. However, the dual purpose accessors always felt like a bit of a hack to me, and I recently found a different approach that is I think is a bit more solid. The code below shows how I would attack this problem in new projects:

```ruby
class FakeMail

  def self.deliver(&block)
    mail = MessageBuilder.new(&block).message
    mail.deliver
  end

  class MessageBuilder
    def initialize(&block)
      @message = Message.new
      instance_eval(&block) if block
    end

    attr_reader :message

    def from(text)
      message.from = text
    end

    def to(text)
      message.to = text
    end

    def subject(text)
      message.subject = text
    end

    def body(text)
      message.body = text
    end
  end

  class Message
    attr_accessor :from, :to, :subject, :body

    def deliver
      puts "Delivering a message from #{from} to #{to} "+
      "with the subject '#{subject}' and "+
      "the body '#{body}'"
    end
  end
end
```

This code is a drop-in replacement for what I wrote before, but is quite different under the hood. Rather than putting the syntactic sugar directly onto the `Message` object, I create a `MessageBuilder` object for this purpose. When `FakeMail.deliver` is called, the `MessageBuilder` object ends up being the target context for the block to be evaluated in rather than the `Message` object. This effectively splits the code the implements the sugary interface from the code that implements the domain model, eliminating the need for dual purpose accessors.

There is another benefit that comes along with taking this approach, but it is more subtle. Whenever we use `instance_eval`, it evaluates the block as if you were executing your statements within the object it was called on. This means it is possible to bypass private methods and otherwise mess around with objects in ways that are typically reserved for internal use. By switching the context to a simple facade object whose only purpose is to provide some domain specific API calls for the user, it's less likely that someone will accidentally call internal methods or otherwise stomp on the internals of the system's domain objects.

It's worth mentioning that even this improved approach to implementing an `instance_eval` based interface comes with its own limitations. For example, whenever you use `instance_eval`, it makes it so that `self` within the block points to the object the block is being evaluated against rather than the object in the the surrounding scope. The closure property of Ruby code blocks makes it possible to access local variables, but if you reference instance variables, they will refer to the object your block is being evaluated against. This can confuse beginners and even some more experienced Ruby developers. 

If you want to use this style of API, your best bet is to reserve it for things that are relatively simple and configuration-like in nature. As things get more complex the limitations of this approach become more and more painful to deal with. That having been said, valid use cases for this pattern occur often enough that you should be comfortable implementing it whenever it makes sense to do so.

The next pattern is one that you probably WON'T use all that often, but is perhaps the best example of how far you can stretch Ruby's syntax and behaviors to fit your own domain.

### Implementing a shoddy version of XML Builder

One of the first libraries that impressed me as a Ruby newbie was Jim Weirich's XML Builder. The fact that you could create a single Ruby object that magically knew how to convert arbitrary method calls into an XML structure seemed like pure voodoo to me at the time. 

```ruby
require "builder"

builder = Builder::XmlMarkup.new

xml = builder.class do |roster|
  roster.student { |s| s.name("Jim");    s.phone("555-1234") }
  roster.student { |s| s.name("Jordan"); s.phone("123-1234") }
  roster.student { |s| s.name("Greg");   s.phone("567-1234") }
end

puts xml

=begin -- expected output --

<class><student><name>Jim</name><phone>555-1234</phone></student>
<student><name>Jordan</name><phone>123-1234</phone></student>
<student><name>Greg</name><phone>567-1234</phone></student></class>  

=end
```

Some time much later in my career, I was impressed again by how easy it is to implement this sort of behavior if you cut a few corners. While it's mostly smoke and mirrors, the snippet below is sufficient for replicating the behavior of the previous example.

```ruby
module FakeBuilder
  class XmlMarkup < BasicObject
    def initialize
      @output = ""
    end
    
    def method_missing(id, *args, &block)
      @output << "<#{id}>"
      
      block ? block.call(self) : @output << args.first

      @output << "</#{id}>"

      return @output
    end
  end
end
```

Despite how compact this code is, it gives us a lot to talk about. The heart of the implemenation relies on the use of a `method_missing` hook to convert unknown method calls into XML tags. There are few special things to note about this code, even if you are already familiar with `method_missing`.

Typically it is expected that if you implement a `method_missing` hook, you should be as conservative as possible about what you handle in your hook and then use `super` to delegate everything else upstream. For example, if you were to write dynamic finders similar to the ones that ActiveRecord provides (i.e. something like `find_by_some_attribute`), you would make it so that your `method_missing` hook only handled method calls which matched the pattern `/^find_by_(.*)/`. However, in the case of Builder all method calls captured by `method_missing` are potentially valid XML tags, and so `super` is not needed in its `method_missing` hook.

On a somewhat similar note, certain methods that are provided by `Object` are actually valid XML tag names that wouldn't be too rare to come across. In my example, I intentionally used XML data representing a class of students to illustrate this point, because it forces us to call `builder.class`. By inheriting from `BasicObject` instead of `Object`, we end up with far fewer reserved words on our object, which decreases the likelihood that we will accidentally call a method that does exist. While we don't think about it often, all `method_missing` based APIs hinge on the idea that your hook will only be triggered by calls to undefined methods. In many cases we don't need to think about this, but in the case of Builder (and perhaps when building proxy objects), we need to work with a blank slate object.

The final thing worth pointing out about this code is that it uses blocks in a slightly surprising way. Because the `method_missing` call yields the builder object itself whenever the block is given, it does not serve a functional purpose. To illustrate this point, it's worth noting that the code below is functionally equivalent to our original example:

```ruby
xml = builder.class do 
  builder.student { builder.name("Jim");    builder.phone("555-1234") }
  builder.student { builder.name("Jordan"); builder.phone("123-1234") }
  builder.student { builder.name("Greg");   builder.phone("567-1234") }
end

puts xml
```

However, Builder cleverly exploits block local variable assignment to allow contextual abbreviations so that the syntax more closely mirrors the underlying structure. These days we occasionally see `Object#tap` being used for similar purposes, but at the time that Builder did this it was quite novel.

While it's tempting to write Builder off as just a weird little bit of Ruby magic, it has some surprisingly practical benefits to its design. Unlike my crappy prototype, the real Builder library will automatically escape your strings so that they're XML safe. Also, because Builder essentially uses Ruby to build up an abstract syntax tree (AST) internally, it could possibly be used to render to multiple different output formats. While I've not actually tried it out myself, it looks like someone has already made a [JSON builder](https://github.com/nov/jsonbuilder) which matches the same API but emits JSON hashes instead of XML tags.

With those benefits in mind, this is a good pattern to use for problems that involve outputting documents that nicely map to Ruby syntax as an intermediate format. But as I mentioned before, those circumstances are rare in most day to day programming work, and so you shouldn't be too eager to use this technique as often as possible. That having been said, you could have some fantastic fun adding this sort of freewheeling code to various classes that don't actually need it in your production applications and then telling your coworkers I told you to do it. I'll leave it up to you to decide whether that's a good idea or not :)

With four tough examples down and only two more to go, we're on the home stretch. Take a quick break if you're feeling tired, and then let's move on to the next pattern. 

### Implementing Contest on top of MiniTest

When I used to write code for Ruby 1.8, I liked using the Test::Unit standard library for testing, but I wanted context support and full text test cases similar to what was found in RSpec. I eventually settled on using the [contest](https://github.com/citrusbyte/contest) library, because it gave me exactly what I needed in a very simple an elegant way.

When I moved to Ruby 1.9 and MiniTest, I didn't immediately invest the time in learning `MiniTest::Spec`, which provides similar functionality to contest as well as few other RSpec-style goodies. Instead, I looked into porting contest to MiniTest. After finding a [gist](https://gist.github.com/25455) from Chris Wanswrath and customizing it heavily, I ended up with a simple little test helper that made it possible for me to write tests in minitest which looked like this.

```ruby
context "A Portal" do
  setup do
    @portal = Portal.new
  end

  test "must not be open by default" do
    refute @portal.open?
  end

  test "must not be open when just the orange endpoint is set" do
    @portal.orange = [3,3,3]
    refute @portal.open?
  end

  test "must not be open when just the blue endpoint is set" do
    @portal.blue = [5,5,5]
    refute @portal.open?
  end

  test "must be open when both endpoints are set" do
    @portal.orange = [3,3,3]
    @portal.blue = [5,5,5]

    assert @portal.open?
  end

  # a pending test
  test "must require endpoints to be a 3 element array of numbers"
end
```

Without having to install any third party libraries, I was able to support this kind of test syntax via a single function in my test helpers file.

```ruby
def context(*args, &block)
  return super unless (name = args.first) && block

  context_class = Class.new(MiniTest::Unit::TestCase) do
    class << self
      def test(name, &block)
        block ||= lambda { skip(name) }

        define_method("test: #{name} ", &block)
      end

      def setup(&block)
        define_method(:setup, &block)
      end

      def teardown(&block)
        define_method(:teardown, &block)
      end

       def to_s
         name
       end
    end
  end

   context_class.singleton_class.instance_eval do
     define_method(:name) { name }
   end

  context_class.class_eval(&block)
end
```

If you look past some of the dynamic class generation noise, you'll see that a good chunk of this is quite similar to how I implemented a clone of `attr_accessor` earlier. The `test`, `setup`, and `teardown` methods are nothing more than class methods which delegate to `define_method`. The only slightly interesting detail worth noting here is that in the `test` method I define methods which are not callable using ordinary Ruby method calling syntax. The use of `define_method` allows us to bypass the ordinary syntactic limits of using `def`, and because these methods are only ever invoked dynamically, this works without any issues. The reason I don't bother to normalize the strings is because you end up getting more humanized output from the test runner this way.

If you turn your focus back onto the dynamic class generation, you can see that this code creates an anonymous subclass of `MiniTest::Unit::TestCase` and then eventually uses `class_eval` to evaluate the provided block in the context of this class. This code is what enables us to write `context "foo" do ... end` and get something that works similar to the way an ordinary class definition works.

If you're focusing on really subtle details, you'll notice that this code goes through a bunch of hoops to define meaningful `name` and `to_s` methods on the class it dynamically generates. This is in part a bunch of massaging to get better output from MiniTest's runner, and in part to make it so that our anonymous classes don't look completely anonymous during debugging. The irb session below might make some sense of what's going on here, but if it doesn't you can feel free to chalk this up as an edge case you probably don't need to worry about.

```
>> Class.new
=> #<Class:0x00000101069260>
>> name = "A sample class"
=> "A sample class"
>> Class.new { singleton_class.instance_eval { define_method(:to_s) { name } } }
=> A sample class
```

Getting away from these ugly little details and returning to the overall pattern, it is relatively common to see domain-specific APIs which dynamically create modules or classes and then wrap certain kinds of method definitions in block based APIs as well. It's a handy pattern when used correctly, and could be useful in your own projects. But even if you never end up using it yourself, it's good to know how this all works as it'll make code reading easier for you.

While this example was perfect for having a discussion about the pattern of dynamic class creation in general, I'd strongly recommend against using my helper in your MiniTest code at this point. You'll find everything you need in `MiniTest::Spec`, and that is a much more standard solution than using some crazy hack I cooked up simply because I could.

With that disclaimer out of the way, we can move on to our final topic.

### Implement your own Gherkin parser, or criticize mine!

So far, we've talked about various tools which enable the use of domain specific language (DSL) within your Ruby applications. However, there is a whole other category of DSL techniques which involve parsing external languages and then converting them into meaningful structures within the host language. This is a topic that deserves an entire article of its own.

But because it'll be a while before I get around to writing that article, we can wrap up this article with a little teaser of things to come. To do so, I am challenging you to implement a basic story runner that parses the Gherkin language which is used by [Cucumber](http://cukes.info/) and other similar tools.

Your mission, if you chose to accept it, is to take the following feature file and process it with Cucumber-style step definitions. You can feel free to simplify your prototype as much as you'd like, as long as you capture the core idea of processing the steps in the feature file and executing arbitrary code for each of those steps.

```
Feature: Addition
  Scenario: Add two numbers
    Given I have entered 70 into the calculator
    And I have entered 50 into the calculator
    When I press add
    Then the result should be 120
```

If that sounds like too much work for you, you can take on a slightly easier task instead. In preparation for this article, I build two different implementations that capture the essence of the way that that Cucumber story runner works. [One implementation uses global functions](https://github.com/elm-city-craftworks/dsl_construction/blob/master/cucumber/global-dsl/fake_cuke.rb), and the [other implementation uses eval() with a binding](https://github.com/elm-city-craftworks/dsl_construction/blob/master/cucumber/binding-dsl/fake_cuke.rb). I'd like you to examine these two approachs and share your thoughts on what the functional differences between them are.

While I know not everyone will have the time to try out this exercise, if a few of you work on this and share your results, it will lead to a good discussion which could help me gauge interest in a second article about external DSLs. So if you have a few spare moments, please participate! 

### Reflections

We've now reached the end of a whirlwind tour of several powerful tools Ruby provides for bending its syntax and semantics to meet domain-specific needs. While I tried to pick examples which illustrated natural uses of domain specific API construction patterns, I am left feeling that these are advanced techniques which even experienced developers have a hard time evaluating the tradeoffs of.

There are two metrics to apply before trying out anything you've seen in this article in your own projects. The first thing to remember is that any deviation from ordinary method definitions and ordinary method calls should offer a benefit that is at least proportional to how exotic your approach is. Cleverness for the sake of cleverness can be a real killer if you're not careful. The second thing to remember is that whenever if you provide nice domain-specific APIs for convenience or aesthetic reasons, you should always make sure to build it as a facade over a boring and vanilla API. This will help make sure your objects are easier to test and easier to work with in scenarios that your domain specific interface did not anticipate.

If you follow these two bits of advice, you can have fun using Ruby's sharp knives without getting cut too often. But if you do slip up from time to time, don't be afraid to abandon fancy interfaces in favor of having something a bit dull but easier to maintain and understand. It can be tempting to layer dynamic features on top of one another to "simplify" things, but that only hides the underlying problem which is that perhaps you were trying too hard. This is something that used to happen to me all the time, so don't feel bad when it happens to you. Just do what you can to learn from your mistakes as you try out new designs.

_NOTE: If you want to experiment with the examples in this article a bit more, you can find all of them in [this git repository](https://github.com/elm-city-craftworks/dsl_construction). If you fork the code and submit pull requests with improvements, I will review your changes and eventually make a note of them here if we stumble across some good ideas that I didn't cover._


================================================
FILE: articles/v2/012-working-with-binary-file-formats.md
================================================
Even if we rarely give them much thought, binary file formats are everywhere.
Ranging from images to audio files to nearly every other sort of media you can
imagine, binary files are used because they are an efficient way of
storing information in a ready-to-process format.

Despite their usefulness, binary files are cryptic and appear to be 
difficult to understand on the surface. Unlike a
text-based data format, simply looking at a binary file won't give you any 
hints about what its contents are. To even begin to understand a binary
encoded file, you need to read its format specification. These specifications 
tend to include lots of details about obscure edge cases, and that makes for
challenging reading unless you already have spent a fair amount of time 
working in the realm of bits and bytes. For these reasons, it's probably better
to learn by example rather than taking a more formal approach.

In this article, I will show you how to encode and decode the bitmap image
format. Bitmap images have a simple structure, and the format is well documented. 
Despite the fact that you'll probably never need to work with bitmap images 
at all in your day-to-day work, the concepts involved in both reading and 
writing a BMP file are pretty much the same as any other file format you'll encounter.

### The anatomy of a bitmap

A bitmap file consists of several sections of metadata followed by a pixel array that represents the color and position of every pixel in the image. 
The example below demonstrates that even if you break the sequence up into its different parts, it would still be a real 
challenge to understand without any documentation handy:

```ruby
# coding: binary

hex_data = %w[
  42 4D 
  46 00 00 00 
  00 00 
  00 00 
  36 00 00 00

  28 00 00 00 
  02 00 00 00 
  02 00 00 00 
  01 00 
  18 00 
  00 00 00 00 
  10 00 00 00 
  13 0B 00 00 
  13 0B 00 00
  00 00 00 00 
  00 00 00 00

  00 00 FF
  FF FF FF 
  00 00 
  FF 00 00 
  00 FF 00 
  00 00
]

out = hex_data.each_with_object("") { |e,s| s << Integer("0x#{e}") }

File.binwrite("example1.bmp", out)
```

Once you learn what each section represents, you can start
to interpret the data. For example, if you know that this is a
24-bit per pixel image that is two pixels wide, and two pixels high, you might
be able to make sense of the pixel array data shown below:

```
00 00 FF
FF FF FF 
00 00 
FF 00 00 
00 FF 00 
00 00
```

If you run this example script and open the image file it produces, you'll see
something similar to what is shown below once you zoom in close enough to see
its pixels:

![Pixels](http://i.imgur.com/XhKW1.png)


By experimenting with changing some of the values in the pixel array by hand, you will fairly quickly discover the overall structure of the array and the way pixels are represented. After figuring this out, you might also be able to look back on the rest of the file and determine what a few of the fields in the headers are without looking at the documentation.

After exploring a bit on your own, you should check out the [field-by-field walkthrough of a 2x2 bitmap file](http://en.wikipedia.org/wiki/BMP_file_format#Example_1) that this example was based on. The information in that table is pretty much all you'll need to know in order to make sense of the bitmap reader and writer implementations I've built for this article.

### Encoding a bitmap image

Now that you've seen what a bitmap looks like in its raw form, I can demonstrate
how to build a simple encoder object that allows you to generate bitmap images
in a much more convenient way. In particular, I'm going to show what I did to
get the following code to output the same image that we rendered via a raw
sequence of bytes earlier:

```ruby
bmp = BMP::Writer.new(2,2)

# NOTE: Bitmap encodes pixels in BGR format, not RGB!
bmp[0,0] = "ff0000"
bmp[1,0] = "00ff00"
bmp[0,1] = "0000ff"
bmp[1,1] = "ffffff"

bmp.save_as("example_generated.bmp")
```

Like most binary formats, the bitmap format has a tremendous amount of options
that make building a complete implementation a whole lot more complicated than
just building a tool which is suitable for generating a single type of image. I
realized shortly after skimming the format description that you can skip out on
a lot of the boilerplate information if you stick to 24bit-per-pixel images, so
I decided to do exactly that.

Looking at the implementation from the outside-in, you can see the general
structure of the `BMP::Writer` class. Pixels are stored in a two-dimensional
array, and all the interesting things happen at the time you write the image out
to file:

```ruby
class BMP 
  class Writer
    def initialize(width, height)
      @width, @height = width, height

      @pixels = Array.new(@height) { Array.new(@width) { "000000" } }
    end

    def []=(x,y,value)
      @pixels[y][x] = value
    end

    def save_as(filename)
      File.open(filename, "wb") do |file|
        write_bmp_file_header(file)
        write_dib_header(file)
        write_pixel_array(file)
      end
    end

    # ... rest of implementation details omitted for now ...
  end
end
```

All bitmap files start out with the bitmap file header, which consists of the
following things:

* A two character signature to indicate the file is a bitmap file (typically "BM").
* A 32bit unsigned little-endian integer representing the size of the file itself.
* A pair of 16bit unsigned little-endian integers reserved for application specific uses.
* A 32bit unsigned little-endian integer representing the offset to where the pixel array starts in the file.

The following code shows how `BMP::Writer` builds up this header and writes it
to file:

```ruby
class BMP 
  class Writer
    PIXEL_ARRAY_OFFSET = 54
    BITS_PER_PIXEL     = 24

    # ... rest of code as before ...

    def write_bmp_file_header(file)
      file << ["BM", file_size, 0, 0, PIXEL_ARRAY_OFFSET].pack("A2Vv2V")
    end

    def file_size
      PIXEL_ARRAY_OFFSET + pixel_array_size 
    end

    def pixel_array_size
      ((BITS_PER_PIXEL*@width)/32.0).ceil*4*@height
    end
  end
end
```

Out of the five fields in this header, only the file size ended up being
dynamic. I was able to treat the pixel array offset as a constant because the
headers for 24 bit color images take up a fixed amount of space. The file size
computations[^1] will make sense later once we examine the way that the pixel 
array gets encoded.

The tool that makes it possible for us to convert these various field values
into binary sequences is `Array#pack`. If you note that the file size of our
reference image is 2x2 bitmap is 70 bytes, it becomes clear what `pack`
is actually doing for us when we examine the byte by byte values 
in the following example:

```ruby
header = ["BM", 70, 0, 0, 54].pack("A2Vv2V") 
p header.bytes.map { |e| "%.2x" % e }

=begin expected output (NOTE: reformatted below for easier reading)
  ["42", "4d", 
   "46", "00", "00", "00", 
   "00", "00", 
   "00", "00", 
   "36", "00", "00", "00"]
=end
```
The byte sequence for the file header exactly matches that of our reference image, 
which indicates that the proper bitmap file header is being generated. 
Below I've listed out how each field in the header encoded:

```
  "A2" -> arbitrary binary string of width 2 (packs "BM" as: 42 4d)
  "V"  -> a 32bit unsigned little endian int (packs 70 as: 46 00 00 00)
  "v2" -> two 16bit unsigned little endian ints (packs 0, 0 as: 00 00 00 00)
  "V"  -> a 32bit unsigned little endian int (packs 54 as: 36 00 00 00)
```

While I went to the effort of expanding out the byte sequences to make it easier
to see what is going on, you don't typically need to do this at all while
working with `Array#pack` as long as you craft your template strings carefully.
But like anything else in Ruby, it's nice to be able to write little scripts or
hack around a bit in `irb` whenever you're trying to figure out how your
code is actually working.

After figuring out how to encode the file header, the next step was to work on
the DIB header, which includes some metadata about the image and how it should
be displayed on the screen:

```ruby
class BMP 
  class Writer
    DIB_HEADER_SIZE    = 40
    PIXELS_PER_METER   = 2835 # 2835 pixels per meter is basically 72dpi

    # ... other code as before ...

   def write_dib_header(file)
      file << [DIB_HEADER_SIZE, @width, @height, 1, BITS_PER_PIXEL,
               0, pixel_array_size, PIXELS_PER_METER, PIXELS_PER_METER, 
               0, 0].pack("Vl<2v2V2l<2V2")
  end
end
```

Because we are only working on a very limited subset of BMP features, it's
possible to construct the DIB header mostly from preset constants combined with
a few values that we already computed for the BMP file header.

The `pack` statement in the above code works in a very similar fashion as the
code that writes out the BMP file header, with one exception: it needs to handle
signed 32-bit little endian integers. This data type does not have a pattern of its own, 
but instead is a composite pattern made up of two
characters: `l<`. The first character (`l`) instructs Ruby to read a 32-bit
signed integer, and the second character (`<`) tells it to read it in
little-endian byte order.

It isn't clear to me at all why a bitmap image could contain negative values for
its width, height, and pixel density -- this is just how the format is
specified. Because our goal is to learn about binary file processing and not
image format esoterica, it's fine to treat that design decision as a black
box for now and move on to looking at how the pixel array is processed.

```ruby
class BMP 
  class Writer
    # .. other code as before ...

    def write_pixel_array(file)
      @pixels.reverse_each do |row|
        row.each do |color|
          file << pixel_binstring(color)
        end

        file << row_padding
      end
    end

    def pixel_binstring(rgb_string)
      raise ArgumentError unless rgb_string =~ /\A\h{6}\z/
      [rgb_string].pack("H6")
    end

    def row_padding
      "\x0" * (@width % 4)
    end
  end
end
```

The most interesting thing to note about this code is that each row of pixels ends up getting padded with some null characters. This is to ensure that each row of pixels is aligned on WORD boundaries (4 byte sequences). This is a semi-arbitrary limitation that has to do with file storage constraints, but things like this are common in binary files. 

The calculations below show how much padding is needed to bring rows of various widths up to a multiple of 4, and explains how I derived the computation for the `row_padding` method:

```
Width 2 : 2 * 3 Bytes per pixel = 6 bytes  + 2 padding  = 8
Width 3 : 3 * 3 Bytes per pixel = 9 bytes  + 3 padding  = 12
Width 4 : 4 * 3 Bytes per pixel = 12 bytes + 0 padding  = 12
Width 5 : 5 * 3 Bytes per pixel = 15 bytes + 1 padding  = 16
Width 6 : 6 * 3 Bytes per pixel = 18 bytes + 2 padding  = 20
Width 7 : 7 * 3 Bytes per pixel = 21 bytes + 3 padding  = 24
...
```

Sometimes calculations like this are provided for you in format specifications,
other times you need to derive them yourself. Choosing to work
with only 24bit per pixel images allowed me to skirt the question of how to
generalize this computation to an arbitrary amount of bits per pixel.

While the padding code is definitely the most interesting aspect of the pixel array, there are a couple other details about this implementation worth discussing. In particular, we should take a closer look at the `pixel_binstring` method:

```ruby
def pixel_binstring(rgb_string)
  raise ArgumentError unless rgb_string =~ /\A\h{6}\z/
  [rgb_string].pack("H6")
end
```

This is the method that converts the values we set in the pixel array via lines like `bmp[0,0] = "ff0000"` into actual binary sequences. It starts by matching the string with a regex to ensure that the input string is a valid sequence of 6 hexadecimal digits. If the validation succeeds, it then packs those values into a binary sequence, creating a string with three bytes in it. The example below should make it clear what is going on here:

```
>> ["ffa0ff"].pack("H6").bytes.to_a
=> [255, 160, 255]
```

This pattern makes it possible for us to specify color values directly in hexadecimal strings and then convert them to their numeric value just before they get written to the file.

With this last detail explained, you should now understand how to build a
functional bitmap encoder for writing 24bit color images. If seeing things
broken out step by step caused you to lose a sense of the big picture, you can
check out the [source code for BMP::Writer](https://gist.github.com/1351737). Feel free to play around with it a bit before moving on to the next section: the best way to learn is to actually run these code samples and try to extend them and/or break them in various ways.

### Decoding a bitmap image

As you might expect, there is a nice symmetry between encoding and decoding binary files. To show just to what extent this is the case, I will walk you through the code which makes the following example run:

```ruby
bmp = BMP::Reader.new("example1.bmp")
p bmp.width  #=> 2
p bmp.height #=> 2

p bmp[0,0] #=> "ff0000"   
p bmp[1,0] #=> "00ff00" 
p bmp[0,1] #=> "0000ff" 
p bmp[1,1] #=> "ffffff" 
```

The general structure of `BMP::Reader` ended up being quite similar to what I did for `BMP::Writer`. The code below shows the methods which define the public interface:

```ruby
class BMP
  class Reader
    def initialize(bmp_filename) 
      File.open(bmp_filename, "rb") do |file|
        read_bmp_header(file) # does some validations
        read_dib_header(file) # sets @width, @height
        read_pixels(file)     # populates the @pixels array
      end
    end

    attr_reader :width, :height

    def [](x,y)
      @pixels[y][x]
    end
  end
end
```

This time, we still are working with an ordinary array of arrays to store the
pixel data, and most of the work gets done as soon as the file is read in the
constructor. Because I decided to support only a single image type, most of the
work of reading the headers is just for validation purposes. In fact, the
`read_bmp_header` method does nothing more than some basic sanity checking, as
shown below:

```ruby
class BMP
  class Reader
    PIXEL_ARRAY_OFFSET = 54

    # ...other code as before ...

    def read_bmp_header(file)
      header = file.read(14)
      magic_number, file_size, reserved1,
      reserved2, array_location = header.unpack("A2Vv2V")
      
      fail "Not a bitmap file!" unless magic_number == "BM"

      unless file.size == file_size
        fail "Corrupted bitmap: File size is not as expected" 
      end

      unless array_location == PIXEL_ARRAY_OFFSET
        fail "Unsupported bitmap: pixel array does not start where expected"
      end
    end
  end
end
```

The key thing to notice about this code is that it reads from the file just the bytes it needs in order to parse the header. This makes it possible to validate a very large file without loading much data into memory. Reading entire files into memory is rarely a good idea, and this is especially true when it comes to binary data because doing so will actually make your job harder rather than easier. 

Once the header data is loaded into a string, the `String#unpack` method is used to extract some values from it. Notice here how `String#unpack` uses the same template syntax as `Array#pack` and simply provides the inverse operation. While the `pack` operation converts an array of values into a string of binary data, the `unpack` operation converts a binary string into an array of processed values. This allows us to recover the information packed into the bitmap file header as Ruby strings and fixnums.

Once these values have been converted into Ruby objects, it's easy to do some
ordinary comparisons to check to see if they're what we'd expect them to be.
Because they help detect corrupted files, clearly defined validations are an
important part of writing any decoder for binary file formats. If you do not do
this sort of sanity checking, you will inevitably run into 
subtle processing errors later on that will be much harder to debug.

As you might expect, the implementation of `read_dib_header` involves more of
the same sort of extractions and validations. It also sets the `@width` and
`@height` variables, which we use later to determine how to traverse the encoded
pixel array.

```ruby
class BMP 
  class Reader
    # ... other code as before ...

    BITS_PER_PIXEL     = 24
    DIB_HEADER_SIZE    = 40

    def read_dib_header(file)
      header = file.read(40)

      header_size, width, height, planes, bits_per_pixel, 
      compression_method, image_size, hres, 
      vres, n_colors, i_colors = header.unpack("Vl<2v2V2l<2V2") 

      unless header_size == DIB_HEADER_SIZE
        fail "Corrupted bitmap: DIB header does not match expected size"
      end

      unless planes == 1
        fail "Corrupted bitmap: Expected 1 plane, got #{planes}"
      end

      unless bits_per_pixel == BITS_PER_PIXEL
        fail "#{bits_per_pixel} bits per pixel bitmaps are not supported"
      end

      unless compression_method == 0
        fail "Bitmap compression not supported"
      end

      unless image_size + PIXEL_ARRAY_OFFSET == file.size
        fail "Corrupted bitmap: pixel array size isn't as expected"
      end

      @width, @height = width, height
    end
  end
end
```

Beyond what has already been said about this example and the DIB header itself, there isn't much more to discuss about this particular method. That means we can finally take a look at how `BMP::Reader` converts the encoded pixel array into a nested Ruby array structure.

```ruby
class BMP 
  class Reader
    def read_pixels(file)
      @pixels = Array.new(@height) { Array.new(@width) }

      (@height-1).downto(0) do |y|
        0.upto(@width - 1) do |x|
          @pixels[y][x] = file.read(3).unpack("H6").first
        end
        advance_to_next_row(file)
      end
    end

    def advance_to_next_row(file)
      padding_bytes = @width % 4
      return if padding_bytes == 0

      file.pos += padding_bytes
    end
  end
end
```

One interesting aspect of this code is that it uses explicit numerical iterators. These are relatively rare in idiomatic Ruby, but I did not see a better way to approach this particular problem. Rows are listed in the pixel array from the bottom up, while the image itself still gets indexed from the top down (with 0 at the top). This makes it necessary to iterate over the row numbers in reverse order, and the use of `downto` is the best way I could find to do that.

The other thing worth noticing about this code is that in the `advance_to_next_row` method, we actually move the pointer ahead in the file rather than reading the padding bytes between each row. This makes little difference when you're dealing with a maximum of three bytes of padding per row (two in this case), but is a good practice for writing more efficient code that consumes less memory.

When you take all these code examples and glue them together into a single class
definition, you'll end up with a `BMP::Reader` object that is capable giving you
the width and height of a 24bit BMP image as well as the color of each and every
pixel in the image. For those who'd like to experiment further, the [source code
for BMP::Reader](https://gist.github.com/1352294) is available.

### Reflections

The thing that makes me appreciate binary file formats is that if you just learn
a few basic computing concepts, there are few things that could be more
fundamentally simple to work with. But simple does not necessarily mean easy, and in the process of writing this article I realized that some aspects of binary file processing are not quite as trivial or intuitive as I originally thought they were.

What I can say is that this kind of work gets a whole lot easier with practice.
Due to my work on [Prawn](http://prawnpdf.org) I have written
implementations for various different binary formats including PDF, PNG, JPG,
and TTF. These formats each have their differences, but my experience tells me 
that if you fully understand the examples in this article, then you are already 
well on your way to tackling pretty much any binary file format.

[^1]: To determine the storage space needed for the pixel array in BMP images, I used the computations described in the [Wikipedia article on bitmap images](http://en.wikipedia.org/wiki/BMP_file_format#Pixel_storage).

> NOTE: If you'd like to learn more about this topic, consider doing the Practicing Ruby self-guided course on [Streams, Files, and Sockets](https://practicingruby.com/articles/study-guide-1?u=dc2ab0f9bb). You've already completed one of its reading exercises by working through this article!


================================================
FILE: articles/v2/014-arguments-and-results-1.md
================================================
Back in 1997, James Noble published a paper called [Arguments and Results](http://www.laputan.org/pub/patterns/noble/noble.pdf) which outlined several useful patterns for designing better object protocols. Despite the fact that this paper was written nearly 15 years ago, it addresses design problems that programmers still struggle with today. In this two part article, I will show how the patterns James came up with can be applied to modern Ruby programs.

<u>Arguments and Results</u> is written in such a way that it is natural to split the patterns it describes into two separate groups: patterns about method arguments and patterns about the results returned by methods. I've decided to split this Practicing Ruby article in the same manner in order to make it easier for me to write and easier for you to read. 

In this first installment, we will explore the patterns James lays out for working with method arguments, and in Issue 2.15 we'll look into results objects. If you read this part, be sure to read the second part once it comes out, because the two concepts complement each other nicely.

### Establishing a context 

It is very difficult to study design patterns without applying them within a particular context. When I am trying to learn new patterns, I tend to start by looking for a realistic scenario that the pattern might be applicable to. I then examine the benefits and drawbacks of the design changes within that context. James uses a lot of graphics programming examples in his paper and this is for good reason: it's an area where designing good interfaces for your objects can quickly become challenging.

I've decided to follow in James's footsteps here and use a trivial [SVG](http://www.w3.org/TR/SVG/) generator as the common theme for the examples in this article. The following code illustrates the interface that I started with before applying any special patterns:

```ruby
# image dimensions are provided to `Drawing` in cm, 
# all other measurements are done in units of 0.01 cm

drawing = Drawing.new(4,4)

drawing.line(:x1 => 100, :y1 => 100, :x2 => 200, :y2 => 250,
             :stroke_color => "blue", :stroke_width => 2)

drawing.line(:x1 => 300, :y1 => 100, :x2 => 200, :y2 => 250,
             :stroke_color => "blue", :stroke_width => 2)

File.write("sample.svg", drawing.to_svg)
```

The implementation details are not important here, but if you would like to see how this code works, you can check out the [source code for the Drawing class](https://github.com/elm-city-craftworks/pr-arguments-and-results/blob/7656768680b6a940a5ccf569fc0e0dce48a5dbfe/drawing.rb). The interface for `Drawing#line` uses keyword-style arguments in a similar fashion to most other Ruby libraries. Because keyword arguments are easier to remember and more flexible than ordinal arguments, this style of interface has become very popular among Ruby programmers. However, the more arguments a method takes, the more unwieldy this sort of API becomes. That tipping point is where design patterns about arguments come into play.

### Arguments object

As the number of arguments to a method increase, the amount of code within the method to handle those arguments tends to increase as well. This is because complex protocols typically require  arguments to be validated and transformed before they can be operated on. By introducing new objects to wrap related sets of arguments, it is possible to keep your argument processing logic somewhat separated from your business logic. The following code demonstrates how to use this concept to simplify the interface of the `Drawing#line` method:

```ruby
drawing = Drawing.new(4,4)

line1 = Drawing::Shape.new([100, 100], [200, 250])
line2 = Drawing::Shape.new([300, 100], [200, 250])

line_style = Drawing::Style.new(:stroke_color => "blue", :stroke_width => "2")

drawing.line(line1, line_style)

drawing.line(line2, line_style)

File.write("sample.svg", drawing.to_svg)
```

This approach takes a single complex method call on a single object and replaces it with several less complex method calls distributed across several objects. In the early stages of development, applying this pattern feels ugly because it involves writing a lot more code for both the library developer and application developer. However, as the complexity of the argument processing increases, the benefits of this approach begin to shine. The following example demonstrates how the newly introduced arguments objects raise the `Drawing#line` code up to a higher level of abstraction.

```ruby
def line(data, style)
  unless data.bounded_by?(@viewbox_width, @viewbox_height)
    raise ArgumentError, "shape is not within view box"
  end

  @lines << { :x1 => data[0].x.to_s, :y1 => data[0].y.to_s,
              :x2 => data[1].x.to_s, :y2 => data[1].y.to_s,
              :style => style.to_css }
end
```

The cost of making `Drawing#line` so concise is a big chunk of boilerplate code that on the surface feels a bit overkill at this stage in the game. However, it does not take a very wild imagination to see how these new objects set the stage for future extensions:

```ruby
class Point
  def initialize(x, y)
    @x, @y = x, y
  end

  attr_reader :x, :y
end

class Shape
  def initialize(*point_data)
    @points = point_data.map { |e| Point.new(*e) }
  end

  def [](index)
    @points[index]
  end

  def bounded_by?(x_max, y_max)
    @points.all? { |p| p.x <= x_max && p.y <= y_max }
  end
end

class Style
  def initialize(params)
    @stroke_width  = params.fetch(:stroke_width, 5)
    @stroke_color  = params.fetch(:stroke_color, "black")
  end

  attr_reader :stroke_width, :stroke_color

  def to_css
    "stroke: #{@stroke_color}; stroke-width: #{@stroke_width}"
  end
end
```

The interesting thing about these objects is that they actually represent domain models even though their original purpose was simply to wrap up some arguments to a single method defined on the `Drawing` object. James mentions in his paper that this phenomena is common and would call these "Found objects", i.e. objects that are part of the domain model that were found through refactoring rather than accounted for in the original design.

You might have noticed that in the previous example, I set some default values for some of the variables on the `Style` object. If you compare this to setting defaults directly within the `Drawing#line` method itself, it becomes obvious that there is a benefit here. Properties like
the color and thickness of the lines drawn to form a shape are universal properties, not things specific to straight lines only. Centralizing the defaults makes it so that they do not need to be repeated for each type of shape that the `Drawing` object supports.

### Selector object

Sometimes we end up with objects that have many methods that take similar arguments. While these methods may actually do different things, the only difference in the object protocol is the name of the message being sent. After adding a method for rendering polygons to my `Drawing` object, I ended up in exactly this situation. The following example shows just how similar the `Drawing#line` interface is to the newly created `Drawing#polygon` method:

```ruby
drawing = Drawing.new(4,4)

line1 = Drawing::Shape.new([100, 100], [200, 250])
line2 = Drawing::Shape.new([300, 100], [200, 250])

triangle = Drawing::Shape.new([350, 150], [250, 300], [150,150])

style = Drawing::Style.new(:stroke_color => "blue", :stroke_width => 2)

drawing.line(line1, style)

drawing.line(line2, style)

drawing.polygon(triangle, style)

File.write("sample.svg", drawing.to_svg)
```

Taking a look at the implementation of both methods, it is easy to see that there are deep similarities in structure between the two:

```ruby
class Drawing
  # NOTE: other code omitted, not important...

  def line(data, style)
    unless data.bounded_by?(@viewbox_width, @viewbox_height)
      raise ArgumentError, "shape is not within view box"
    end

    @elements << [:line, { :x1    => data[0].x.to_s, 
                           :y1    => data[0].y.to_s, 
                           :x2    => data[1].x.to_s, 
                           :y2    => data[1].y.to_s,
                           :style => style.to_css }] 
  end

  def polygon(data, style)
    unless data.bounded_by?(@viewbox_width, @viewbox_height)
       raise ArgumentError, "shape is not within view box"     
    end

    @elements << [:polygon, { 
      :points => data.each.map { |point| "#{point.x},#{point.y}" }.join(" "),
      :style  => style.to_css
    }]
  end
end
```

To make this code more DRY, James recommends converting our arguments object into what he calls a selector object. A selector object is an object which uses similar arguments to do different things depending on the type of message it is meant to represent. James recommends using double dispatch or multi-methods to implement this pattern, but that approach is not appropriate for Ruby because the language does not provide built-in semantics for function overloading. The good news is that he also mentions that inheritance can be used as an alternative, and in this case it was a perfect fit.

To simplify and clean up the previous example, I introduced `Line` and `Polygon` which inherit from `Shape`. I then combined the `Drawing#line` method and `Drawing#polygon` method into a single method called `Drawing#draw`. The following example demonstrates what the API ended up looking like as a result of this change:

```ruby
drawing = Drawing.new(4,4)

line1 = Drawing::Line.new([100, 100], [200, 250])
line2 = Drawing::Line.new([300, 100], [200, 250])

triangle = Drawing::Polygon.new([350, 150], [250, 300], [150,150])

style = Drawing::Style.new(:stroke_color => "blue", :stroke_width => 2)

drawing.draw(line1, style)
drawing.draw(line2, style)
drawing.draw(triangle, style)

File.write("sample.svg", drawing.to_svg)
```

The changes to the API are small but make the code a lot easier to read. This rearrangement introduces even more objects into the system, but simplifies the protocol between those objects. In large systems, this leads to greater maintainability and learnability at the cost of having a few more moving parts.

In order to implement this new interface, some non-trivial changes needed to be made under the hood. You can check out the [exact commit](https://github.com/elm-city-craftworks/pr-arguments-and-results/commit/47924901552d0509f97a3083737709980139feba) to see the details about what changed implementation-wise between this example and the last one, but most of the changes were just boring housekeeping. The general idea is that the `Drawing#draw` method now simply asks each shape object to represent itself as a hash which ultimately ends up getting converted into an XML tag within the SVG document. As an example, here is what the definition for the `Line` object looks like:

```ruby
class Drawing
  class Line < Shape
    def to_hash(style)
      { :tag_name => :line,
        :params => { :x1    => self[0].x.to_s,
                     :y1    => self[0].y.to_s,
                     :x2    => self[1].x.to_s,
                     :y2    => self[1].y.to_s,
                     :style => style.to_css } }
    end
  end
end
```

As you can imagine, the `Polygon` object uses a similar approach and this general pattern would be applicable for new types of shapes as well.


### Curried object

While method arguments exist to allow us to vary the objects we pass in, its not uncommon for the same method to be called many times with some of its arguments being held constant. In fact, all of the examples in this article have shown the same `Style` object being passed to the same method again and again, with only the shape varying. This has resulted in some repetitive code that looks ugly, and could be improved.

James recommends creating a curried object to deal with this sort of problem. The curried object acts as a lightweight proxy over the original object, but keeps the constant data stored in variables so that you do not need to keep repeating it. The following code applies this concept to clean up our previous example:

```ruby
line1 = Drawing::Line.new([100, 100], [200, 250])
line2 = Drawing::Line.new([300, 100], [200, 250])

triangle = Drawing::Polygon.new([350, 150], [250, 300], [150,150])

drawing = Drawing.new(4,4)
style   = Drawing::Style.new(:stroke_color => "blue", :stroke_width => 2)
pen     = Drawing::Pen.new(drawing, style)

pen.draw(line1)
pen.draw(line2)
pen.draw(triangle)

File.write("sample.svg", drawing.to_svg)
```

While introducing the new `Pen` object requires a change in the calling code so that `Pen#draw` gets called instead of `Drawing#draw`, no change to the implementation of `Drawing` was needed to introduce this new object. The following class definition will do the trick:

```ruby
class Drawing
  class Pen
    def initialize(drawing, style)
      @drawing, @style = drawing, style
    end

    def draw(shape)
      drawing.draw(shape, style)
    end
    
    private

    attr_reader :drawing, :style
  end
end
```

In this particular case, `Pen` is easy to write because the interface on `Drawing` is so small. In more complicated cases, it would make sense to use some of Ruby's metaprogramming features to implement a dynamic proxy of some sort. However, if you find yourself simultaneously facing a broad interface that has arguments that often remain constant in many of its functions, you may want to evaluate whether you have a flawed design before going down that road.

An interesting thing to note is that curried objects are not necessarily limited to arguments that remain constant. This pattern can also be applied in situations where method calls made in sequence have a clear pattern in the way that one or more arguments are varied. The example James gives in his paper describes some logic for a text editor in which lines of text are rendered to the screen with all the same style attributes from line to line, but with the line number incremented as each new line is rendered. Taking inspiration from that example, I decided to build a simple turtle graphics system to demonstrate how curried objects can be used for predictably varying arguments as well as constant arguments. The code below generates an image of an X when run:

```ruby
drawing = Drawing.new(4,4)
style   = Drawing::Style.new(:stroke_color => "blue", :stroke_width => 2)
turtle  = Drawing::Turtle.new(drawing, style)

turtle.move_to([0, 400])

turtle.pen_down
turtle.move_to([400, 0])

turtle.pen_up
turtle.move_to([0,0])

turtle.pen_down
turtle.move_to([400,400])

File.write("sample.svg", drawing.to_svg)
```

The implementation code to make the previous example work was very easy to write and required no changes to the rest of the system:

```ruby
class Drawing
  class Turtle
    def initialize(drawing, style)
      @drawing  = drawing
      @style    = style
      @inked    = false
      @position = [0,0]
    end

    def move_to(next_position)
      if inked
        drawing.draw(Line.new(position, next_position), style)
      end
      
      self.position = next_position
    end

    def pen_up
      self.inked = false
    end

    def pen_down 
      self.inked = true
    end

    private

    attr_reader   :drawing, :style
    attr_accessor :position, :inked 
  end
end
```

After taking a look at the finished `Turtle` object, I did wonder a little bit about whether the idea of a curried object in Ruby is nothing more than an ordinary object making use of object composition. However, because the name of the pattern is helpful for describing the intent of this sort of object in a succinct way, it may be a good label for us to use when discussing the merits of different design options.

### Reflections

Applying these various argument patterns to a realistic example made it much easier for me to see the power behind these ideas. I have gradually picked up bits and pieces of the various techniques shown here before reading this paper largely due to my trial and error work on the Prawn PDF generator. 

In lots of places in Prawn, we let hash arguments grow to an insanely large size and it created a lot of problems for us. We also ignored using curried objects in a lot of places by instead placing instance variables directly on the target objects and then mutating the state within them over time to vary things. This led to complicated transactional code and made it easy for things to end up in an inconsistent state. The solutions to these problems tended to be refactorings that are quite similar to what you've seen in this article, even if we didn't call them by a special name at the time.

Still, I do have some concern that these patterns might be overkill for any interfaces that you are reasonably sure won't get too complex over time. If we apply these patterns overzealously, you might end up needing to go through level after level of indirection just to accomplish anything useful, and that will make Ruby start to feel like Java. However, it seems like using some sort of formalized arguments object is obviously beneficial for highly complex interactions, and likely to be at least somewhat useful for medium complexity protocols as well.

No matter what the complexity of the problem I was working on, it's unlikely that I would make it so that the application developer needed to jump through so many hoops just to use my library. Instead, I would probably build a simple facade or DSL that made their life easier, even if a rich object structure was lurking under the hood. If I were really building an SVG generator, I might end up building a DSL for it that looked something like this:

```ruby
drawing do
  style :stroke_color => "blue", :stroke_width => 2 do
    line    [100, 100], [200, 250]
    line    [300, 100], [200, 250]
    polygon [350, 150], [250, 300], [150,150]
  end

  save_as "sample.svg"
end
```

If I implemented this as a thin veneer on top of code similar to what we ended up with in this article, I think that would be a pretty well designed library. The end user gets convenience for the normal case, but the underlying system would be easier to maintain, test, and learn. It would also give the user flexibility to interact with the system in ways I didn't anticipate.

Be sure to tune in next week for the second part of this article, where I'll focus on the results side of the method interface. Until then, I'd love to hear any questions or thoughts you have about this topic.


================================================
FILE: articles/v2/015-arguments-and-results-2.md
================================================
Back in 1997, James Noble published a paper called [Arguments and Results](http://www.laputan.org/pub/patterns/noble/noble.pdf) which outlined several useful patterns for designing better object protocols. Despite the fact that this paper was written nearly 15 years ago, it addresses design problems that programmers still struggle with today. In this two part article, I show how the patterns James came up with can be applied to modern Ruby programs.

<u>Arguments and Results</u> is written in such a way that it is natural to split the patterns it describes into two separate groups: patterns about method arguments and patterns about the results returned by methods. I've decided to split this Practicing Ruby article in the same manner in order to make it easier for me to write and easier for you to read. 

In [Issue 2.14](http://practicingruby.com/articles/14) I outlined various kinds of arguments objects that can be used to simplify the messages being sent within a system. In this issue, I will show how results objects can provide similar flexibility on the response side of things.

### Results objects

Results objects are similar to argument objects in that they simplify the interface of one object at the cost of introducing new objects into the system. The `Report` class I built for [Issue 2.13](http://practicingruby.com/articles/13) is a good example of this sort of object. If we start with its class definition and work our way backwards to what the code would have looked like without it, we will be able to see the reason why this object was introduced in the first place.

```ruby
module SubscriptionCounter
  class Report
    def initialize(series)
      @series = series

      @issue_numbers = series.map(&:number)
      @weekly_counts = series.map(&:count)
      @weekly_deltas = series.map(&:delta)
      @average_delta = Statistics.adjusted_mean(@weekly_deltas)
    end
 
    attr_reader :issue_numbers, :weekly_counts, :weekly_deltas, 
                :average_delta, :summary, :series

    def table(*fields)
      series.map { |e| fields.map { |f| e.send(f) } }
    end
  end
end
```

The following code outlines how `Report` is actually used by client code. It essentally serves as a bridge between `Series` and `Report::PDF`.

```ruby
campaigns = SubscriptionCounter::Campaign.all
series    = SubscriptionCounter::DataSeries.new(campaigns, 10)
report    = SubscriptionCounter::Report.new(series)

SubscriptionCounter::Report::PDF.new(report).save_as("pr-subscribers.pdf")
```

If we pretend that `Report` never existed and that its methods were implemented directly on `DataSeries`, we would end up with something similar to the following client code:

```ruby
campaigns = SubscriptionCounter::Campaign.all
series    = SubscriptionCounter::DataSeries.new(campaigns, 10)

SubscriptionCounter::Report::PDF.new(series).save_as("pr-subscribers.pdf")
```

This example actually looks a bit cleaner than the previous one, but results in six new methods getting added to `DataSeries` and introduces tighter coupling between the `DataSeries` and `PDF` objects. Due to the increased coupling, a change in the interface of the `DataSeries` object will directly impact the presentation code in the `PDF` object, whereas before the `Report` object provided a buffer zone between the two classes.

While it is still possible to substitute a `DataSeries` object with some other object that provides an equivalent interface, we have lost the flexibility of reusing the code that actually does the aggregation work. Before removing the `Report` object, it was possible to use it to wrap any pretty much any `Enumerable` collection of objects which provided `number`, `count`, and `delta` methods. Now we must either do the aggregation ourselves or use a `DataSeries` object directly.

These downsides are why I introduced the `Report` object in the first place, and they make the case for using an object that exists simply to aggregate some results based on the data contained in another object. If I wanted to make the integration of this results object a bit tighter and simplify the client code, I could have introduced a `DataSeries#report` method such as the one shown below:

```ruby
module SubscriptionCounter
  class DataSeries
    def report
      Report.new(self)
    end
  end
end
```

With this method added, I could either have the `Report::PDF` accept an object that responds to `report`, or call the method explicitly in my client code. If I went with the former approach I could use the same client code as shown in the previous example, making the `Report` object completely transparent to the end user. However, the latter approach still looks a bit cleaner than what I had originally without introducing too much coupling into the system:

```ruby
campaigns = SubscriptionCounter::Campaign.all
series    = SubscriptionCounter::DataSeries.new(campaigns, 10)

SubscriptionCounter::Report::PDF.new(series.report).save_as("pr-subscribers.pdf")
```

While this pattern certainly has its benefits, it may feel a bit unexciting to Ruby developers. When results objects are introduced simply to reduce coupling between two different subsystems in your project or to provide a bit of encapsulation for some cached values, they feel like ordinary objects that don't require a special name. However, explictly thinking of results objects as an abstraction opens the door for more interesting techniques as well.

### Lazy objects

One interesting aspect of introducing results objects into a system is that it helps facilitate lazy evaluation. Ruby's own `Enumerator` object provides an excellent example of how powerful this combination can be. Laziness allows `Enumerator` objects to efficiently chain different transformations together. This makes it possible to do things like `map.with_index` without having to iterate over the collection multiple times or store an intermediate representation of the indexed data:

```ruby
>> [1,2,3,4].map.with_index { |e,i| "#{i}. #{e}" }
=> ["0. 1", "1. 2", "2. 3", "3. 4"]
```

Lazy objects can also represent infinite or repeating sequences in a very elegant way. The examples below show some bits of functionality baked into `Enumerator` that make modeling these kinds of sequences a whole lot easier.

```ruby
>>  players = [:red, :black].cycle
=> #<Enumerator: [:red, :black]:cycle>
>> players.next
=> :red
>> players.next
=> :black
>> players.next
=> :red
>> players.next
=> :black
>> odds = Enumerator.new { |y| k = 0; loop { y << 2*k + 1; k += 1 } }
=> #<Enumerator: #<Enumerator::Generator:0x00000100972b50>:each>
>> odds.next
=> 1
>> odds.next
=> 3
>> odds.next
=> 5
>> odds.take(10)
=> [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]
```

While infinite sequences may seem like a very academic topic, they show up in some practical applications as well. For example, some video games use procedural generation to produce seemingly infinite randomly generated maps. The video below demonstrates that technique being used in a very crude manner, but the same general approach could be used to build rich three dimensional environments as well, such as the ones found in [MineCraft](http://www.minecraft.net/). (_NOTE: I accidentally uploaded this video with ambient sounds rather than muted, and won't be able to fix this until I return from vacation after December 15th. If you don't like the sound of keyboard motions, heavy breathing, and some weird beeping noise: mute your audio before playing this video. Sorry!_)

<div align="center">
<iframe width="640" height="480" src="//www.youtube.com/embed/fg-dYZfd6Y4?rel=0" frameborder="0" allowfullscreen></iframe>
</div>

To implement the map generation code, I put together a simple `Location` object which is essentially an infinite two dimensional doubly linked list. Notice how the class definition below makes extensive use of the common `||=` idiom to handle the lazy evaluation and caching.

```ruby
class Location
  def self.[](x,y)
    @locations        ||= {}
    @locations[[x,y]] ||= new(x,y)
  end 

  def initialize(x,y)
    @x     = x
    @y     = y
    @color = [:green, :green, :blue].sample
  end

  def ==(other)
    [x,y] == [other.x, other.y]
  end  

  attr_reader :x, :y, :color

  def north
    @north ||= Location[@x,@y-1] 
  end

  def south
    @south ||= Location[@x,y+1]
  end

  def east
    @east ||= Location[@x+1, @y] 
  end

  def west
    @west ||= Location[@x-1, @y] 
  end

  def neighbors
    [north, south, east, west]
  end
end
```

While this technique works fine and is the traditional way to achieve lazy evaluation in Ruby, it feels a bit primitive. Ruby does not provide a general purpose construct for lazy evaluation, but if it did, it would allow us to write code similar to what you see below:

```ruby
class Location
  def self.[](x,y)
    @locations        ||= {}
    @locations[[x,y]] ||= new(x,y)
  end 

  def initialize(x,y)
    @x     = x
    @y     = y
    @color = [:green, :green, :blue].sample

    @north = LazyObject.new { Location[@x,@y-1] }
    @south = LazyObject.new { Location[@x,y+1] }
    @east  = LazyObject.new { Location[@x+1, @y] }
    @west  = LazyObject.new { Location[@x-1, @y] }
  end

  def ==(other)
    [x,y] == [other.x, other.y]
  end  

  def neighbors
    [north, south, east, west]
  end

  attr_reader :x, :y, :color, :north, :south, :east, :west
end
```

Such an object can be implemented as a simple proxy which delays the execution of a callback until the results are actually needed. The following code illustrates one way to do that:

```ruby
class LazyObject < BasicObject
  def initialize(&callback)
    @callback = callback
  end

  def __result__
    @__result__ ||= @callback.call
  end

  def method_missing(*a, &b)
    __result__.send(*a, &b)
  end
end
```

Another option would be to use [lazy.rb](http://moonbase.rydia.net/software/lazy.rb/), which provides similar functionality via `Lazy::Promise` objects that get instantiated via the `Lazy.promise` method:

```ruby
require "lazy"

class Location
  def self.[](x,y)
    @locations        ||= {}
    @locations[[x,y]] ||= new(x,y)
  end 

  def initialize(x,y)
    @x     = x
    @y     = y
    @color = [:green, :green, :blue].sample

    @north = Lazy.promise { Location[@x,@y-1] }
    @south = Lazy.promise { Location[@x,y+1] }
    @east  = Lazy.promise { Location[@x+1, @y] }
    @west  = Lazy.promise { Location[@x-1, @y] }
  end

  def ==(other)
    [x,y] == [other.x, other.y]
  end

  def neighbors
    [north, south, east, west]
  end

  attr_reader :x, :y, :color, :north, :south, :east, :west
end
```

This approach provides a thread safe solution and prevents us from having to reinvent the wheel. The only downside is that _lazy.rb_ is a bit dated and generates some warnings on Ruby 1.9 due to the way it implements its core proxy object. But whether you use _lazy.rb_ or roll your own lazy object, it is important to understand that the difference between this pattern and the common Ruby idiom of delaying execution via cached method calls is more than just a matter of aesthetics. To illustrate why that is the case, we can can consider the difference in behavior between the two approaches when `Location#neighbors` is called.

In the original example that explictly defines the `north`, `south`, `east`, and `west` methods, the first time the `neighbors` method is called, four `Location` objects are created. This means that the following line of code will generate all four neighboring `Location` objects even if not all of them are needed to answer the question it asks:

```ruby
green_neighbor = location.neighbors.find { |loc| loc.color == :green }
```

By contrast, a `Location` object that uses some form of lazy object would behave differently here. Because `Enumerable#find` returns as soon as it finds a single object which matches its conditions, the `Location#color` method will not necessarily get called on each of the neighboring locations. This means that in the best case scenario, only one new `Location` object would end up getting created. While this particular example is a bit contrived, it's not hard to see why this is a desireable characteristic of lazy objects that cannot be easily emulated via the standard Ruby idiom for delayed execution.

### Future objects

Lazy objects provide certain performance benefits in the sense that they make it possible to avoid unnecessary computation, but they don't do anything to improve the perceived waiting time for any computations that actually need to be run. This is where future objects come in handy.

A future object is essentially an object which immediately begins doing some processing in the background but only blocks if the results are demanded before the thread has finished executing. The example below demonstrates how this sort of object can come in handy for building a simple non-blocking download manager:

```ruby
require "open-uri"
require "lazy"

class DownloadManager
  def initialize
    @downloads = []
  end

  def save(url, filename)
    downloads << Lazy.future { File.binwrite(filename, open(url).read) }
  end

  def finish_all_downloads
    downloads.each { |d| Lazy.demand(d) }
  end

  private

  attr_reader :downloads
end

downloader = DownloadManager.new 

downloader.save("http://prawn.majesticseacreature.com/manual.pdf", "manual.pdf")
puts "Starting Prawn manual download"

downloader.save("http://sandal.github.com/rbp-book/pdfs/rbp_1-0.pdf", "rbp_1-0.pdf")
puts "Starting download of Ruby Best Practices book"

puts "Waiting for downloads to finish..."
downloader.finish_all_downloads
```

In this particular example the callback doesn't return a meaningful value, and so the `DownloadManager#finish_all_downloads` method makes use of `Lazy.demand` to force each future to wrap up its computations. However, the following example demonstrates that the future objects that _lazy.rb_ provides can also be used as transparent proxy objects:

```ruby
require "open-uri"
require "lazy"

class Download
  def initialize(url, filename)
    @filename = filename
    @contents = open(url).read
  end

  def save
    File.binwrite(@filename, @contents)
  end
end

class DownloadManager
  def initialize
    @downloads = []
  end

  def save(url, filename)
    downloads << Lazy.future { Download.new(url, filename) }
  end

  def finish_all_downloads
    downloads.each { |d| d.save }
  end

  private

  attr_reader :downloads
end

downloader = DownloadManager.new 

downloader.save("http://prawn.majesticseacreature.com/manual.pdf", 
                "manual.pdf")
puts "Starting Prawn manual download"

downloader.save("http://sandal.github.com/rbp-book/pdfs/rbp_1-0.pdf", 
                "rbp_1-0.pdf")
puts "Starting download of Ruby Best Practices book"

puts "Waiting for downloads to finish..."
downloader.finish_all_downloads
```

In both examples, the future object will block as long as necessary to allow the computations to complete, but only when it is forced to do so. Until this occurs, other operations can continue in parallel and/or block the execution of these future objects. This means in the best case scenario, a computation will end up being completed before it is actually needed, and the results will be returned from the future object's cache at that time.

While implementing a generic future object from scratch would not be difficult for anyone who has experience working with threads, concurrency is a weak point for me and I rather not confuse folks by giving potentially bad advice through a naive implementation of my own. Those who are really itching to see how such an object is implemented should look at the [lazy.rb source code](https://github.com/mental/lazy/blob/master/lib/lazy.rb#L138-146), but if you treat future objects as black boxes you just need to know a few basic things about Ruby's thread model to make use of this construct effectively.

The most important thing to keep in mind is that thread scheduling in standard Ruby is affected by a global interpreter lock (GIL) which makes it so that most computations end up blocking the execution of other threads. Alternative implementations such as JRuby and Rubinius remove this lock, but in standard Ruby this basically means that threads are mostly useful for backgrounding operations such as file and network I/O. This is because unlike most computations, I/O operations will give other threads a chance to run while waiting on their data to become available. Because lazy.rb's implementation is thread based, future objects inherit the same set of restrictions. The other thing to be aware of is that Ruby does not explicitly join all of its threads once the main execution thread completes. This means that if I did not explicitly call `downloader.finish_all_downloads` in the previous example, the threads spun up by my future objects would be terminated if the main thread finished up before the downloads were completed. This may be obvious to anyone with a background in concurrency, but I scratched by head for a bit because of this issue.

Other than those issues, future objects pretty much allow you to solve some basic concurrency problems without knowing a whole lot about how to work with low level concurrency primitives. While the example I've shown here is a bit dull, I can imagine this technique might come in handy for things like sending emails or doing some time intensive computations that are part of an interactive reporting system. Both of these are problems I've had to solve before using tools like [Resque](https://github.com/defunkt/resque), but simple future objects might prove to be a lightweight 
alternative. I'd be curious to hear from our readers who have some concurrency experience whether that seems like a good idea or not, and also whether you have ideas for other potential applications of future objects.

## Reflections

The general concept of wrapping data in results objects isn't that exciting, but the notion of lazy objects and future objects show that results objects can be imbued with rich behaviors that can make our code more flexible and easier to understand. 

While Rubyists are no strangers to benefits of lazy evaluation, the process of writing this article has lead me to believe that we can probably benefit from having some higher level constructs to work with. However, explaining my thoughts on that would take a whole other article.

Similarly, it seems that Ruby provides all the basic tooling necessary for concurrency, even when you take into account the limitations of standard Ruby due to its GIL. It would be nice if we could establish some good patterns and constructs for making this kind of programming more accessible to the amateur. Such constructs may end up hiding some of the details that experienced developers care about, but would likely lead to more performant code without sacrificing maintainability or learnability.

On a closing note, the fact that there are two interesting subcategories of results objects hints that there may be more left to discover. This is similarly true for patterns about arguments. I can feel in my gut that there are other patterns out there just waiting to be discovered, but cannot think of any off the top of my head at the moment. Have you seen anything in the wild that hints at how we can expand on these ideas? If so, please leave a comment! 


================================================
FILE: articles/v2/README.md
================================================
These articles are from Practicing Ruby's second volume, which ran from 
2011-08-23 to 2011-11-30. The manuscripts in this folder correspond to the
following articles on practicingruby.com:

* [Issue 2.1: Ways to load code](http://practicingruby.com/articles/shared/tmxmprhfrpwq) (2011.08.23)
* [Issue 2.2: How to attack sticky problems](http://practicingruby.com/articles/shared/bhftubljbomqpmifbibmzmptlxhoin) (2011.08.30)
* [Issue 2.3: A closure is a double edged sword](http://practicingruby.com/articles/shared/mvzhovpjbghr) (2011.09.06)
* [Issue 2.4: Implementing Enumerable and Enumerator in Ruby](http://practicingruby.com/articles/shared/ggcwduoyfqmz) (2011.09.13)
* [Issue 2.5: Thoughts on regression testing](http://practicingruby.com/articles/shared/ggcwduoyfqmz) (2011.09.20)
* [Issue 2.6: Learning new things, step by step](http://practicingruby.com/articles/shared/vbmlgkdtahzd) (2011.09.27)
* [Issue 2.7: "Unobtrusive Ruby" in practice](http://practicingruby.com/articles/shared/ozkzbsdmagcm) (2011.10.04)
* [Issue 2.8: Ruby and the singleton pattern don't get along](http://practicingruby.com/articles/shared/jleygxejeopq) (2011.10.11)
* [Issue 2.9: Building Unix-style command line applications](http://practicingruby.com/articles/shared/qyxvmrgmhuln) (2011.10.18)
* [Issue 2.10: From requirements discovery to release](http://practicingruby.com/articles/shared/nlhxgszkgenq) (2011.10.27)
* [Issue 2.11: Domain specific API construction](http://practicingruby.com/articles/shared/iptocucwujtj) (2011.11.02)
* [Issue 2.12: Working with binary file formats](http://practicingruby.com/articles/shared/iptocucwujtj) (2011.11.09)
* [Issue 2.13: Designing business reporting applications](http://practicingruby.com/articles/shared/gthgvfebjvyn) (2011.11.17)
* [Issue 2.14: Thoughts on "Arguments and Results", Part 1](http://practicingruby.com/articles/shared/vpxpovppchww) (2011.11.23)
* [Issue 2.15: Thoughts on "Arguments and Results", Part 2](http://practicingruby.com/articles/shared/wdykkrmdfjvf) (2011.11.30)


================================================
FILE: articles/v3/001-quality-software.md
================================================
I ended the second volume of Practicing Ruby by launching an exploration into
the uncomfortable question of what it means to write good code. To investigate
the topic, I began to compile a wiki full of small case studies for each of the
different properties outlined by [ISO/IEC
9126](http://en.wikipedia.org/wiki/ISO/IEC_9126) -- an international standard
for evaluating software quality. While I made good headway on this project
before taking a break for the holidays, I left some of it unfinished and
promised to kick off this new volume by presenting my completed work.

While it is possible to read through the wiki by [starting at the overview on
the homepage](https://github.com/elm-city-craftworks/code_quality/wiki) and then
clicking through it page by page, there is a tremendous amount of content there
on lots of disjoint topics. To help find your way through these materials, I've
summarized their contents below so that you know what to expect.

### Functionality concerns

While we all know that getting our software to work correctly is important, the
functional qualities of our software are often not emphasized as much as they
should be. Issues to consider in this area include:

* The [suitability](https://github.com/elm-city-craftworks/code_quality/wiki/Suitability) 
of our software for serving its intended purpose. As an example, I note the
differences between the _open-uri_ vs. _net/http_ standard libraries and suggest
that while they have some overlap in functionality, they are aimed at very
different use cases.

* The [accuracy](https://github.com/elm-city-craftworks/code_quality/wiki/Accuracy) 
of our software in meeting its requirements. As an example, I discuss how a
small bug in Gruff made the entire library unusable for running a particular
report, even though it otherwise was well suited for the problem.

* The [interoperability](https://github.com/elm-city-craftworks/code_quality/wiki/Interoperability) 
of our software and how it effects our ability to fit seamlessly into the user's
environment. As an example, I discuss at a high level the benefits of using the
Rack webserver interface as compared to writing adapters that directly connect
web frameworks with web servers.

* The [security](https://github.com/elm-city-craftworks/code_quality/wiki/Security) 
of our software and how it affects the safety of our users and the systems our
software runs on. As an example, I discuss a small twitter bot I wrote for
demonstration purposes that had a operating system command injection
vulnerability, and also show how I fixed the problem.

### Reliability concerns

Even if our software does what it is supposed to do, if it does not do so
reliably, it will not do a good job at making users happy. Issues to consider in
this area include:

* The [maturity](https://github.com/elm-city-craftworks/code_quality/wiki/Maturity) 
of our software, i.e. the gradual reduction of unexpected defects over time. As
an example, I discuss some regression tests we've written for the Practicing
Ruby web application.

* The [fault tolerance](https://github.com/elm-city-craftworks/code_quality/wiki/Fault-Tolerance) 
of our software, i.e. how easy it is for us to mitigate the impact of failures
in our code. As an example, I discuss at a high level about how ActiveRecord
implements error handling around failed validations, and show a pure Ruby
approximation for how to build something similar.

* The [recoverability](https://github.com/elm-city-craftworks/code_quality/wiki/Recoverability) 
of our software when dealing with certain kinds of failures. As an example, I
discuss various features that resque-retry provide for trying to recover from
background job failures.

### Usability concerns

Once we have code that does it's job correctly and does it well, we still need
to think about how pleasant of an experience we create for our users. Issue to
consider in this area include:

* The [understandability](https://github.com/elm-city-craftworks/code_quality/wiki/Understandability) 
of our software, in particular how well its functionality is organized and how
well documented it is. As an example, I extract some guidelines for writing a
good README file using Sinatra's README as a reference.

* The [learnability](https://github.com/elm-city-craftworks/code_quality/wiki/Learnability)
of our software, i.e. how easy it is to discover new ways of using the software
based on what the user already knows. As an example, I discuss a change we made
to the Prawn graphics API to make things more consistent and easier to learn.

* The [operability](https://github.com/elm-city-craftworks/code_quality/wiki/Operability) 
of our software, particularly whether we give our users the control and
flexibility they need to get their job done. As an example, I discuss how most
Markdown processors in Ruby function as black boxes, and how RedCarpet 2 takes a
different approach that makes it much easier to customize.

* The [attractiveness](https://github.com/elm-city-craftworks/code_quality/wiki/Attractiveness) 
of our software. As an example, I show the difference between low level and high
level interfaces for interacting with the Cairo graphics library, and illustrate
how the use of syntactic sugar can influence user behavior.

### Efficiency concerns

Ruby has had a reputation for being a slow, resource intensive programming
language. As a result, we need to rely on some special tricks to make sure that
our code is fast enough to meet the needs of our users. Issues to consider in
this area include:

* The [performance](https://github.com/elm-city-craftworks/code_quality/wiki/Performance)
of our software. As an example, I talk at a very high level about the
computationally expensive nature of PNG alpha channel splitting, and how C
extensions can be used to solve that problem.

* The [resource utilization](https://github.com/elm-city-craftworks/code_quality/wiki/Resource-Utilization)
characteristics of our software. While this most frequently means memory and
disk space usage, there are lots of different resources our programs use. As an
example, I talk about the fairly elegant use of file locking in the PStore
standard library.

### Maintainability concerns

No matter how good our software is, it will ultimately be judged by how well it
can change and grow over time. This is the area we tend to spend most of our
time studying, because difficult to maintain projects make us miserable as
programmers. Issues to consider in this area include:

* The [analyzability](https://github.com/elm-city-craftworks/code_quality/wiki/Analyzability) 
of our software, i.e. how easy it is for us to reason about our code. As an
example, I discuss at a high level how the Flog utility assigns scores to
methods based on their complexity, and how that can be used to identify areas of
your code that need refactoring.

* The [changeability](https://github.com/elm-city-craftworks/code_quality/wiki/Changeability)
of our software, which is commonly considered the holy grail of software design.
As an example, I point out connascence as a mental model for reasoning about the
relationships between software components and how easy or hard they are to
change.

* The [stability](https://github.com/elm-city-craftworks/code_quality/wiki/Stability)
of our software, in particular how much impact changes have on users. As an
example, I talk about the merits of designing unobtrusive APIs for reducing the
amount of moving parts in our code.

* The [testability](https://github.com/elm-city-craftworks/code_quality/wiki/Testability)
of our software. As an example, I discuss how useful the SOLID principles are in
making our code easier to test.

### Portability concerns

One thing we don't think about often in Ruby, perhaps not often enough, is how
easy it is for folks to get our software up and running in environments other
than our own. While writing code in a high level language does get us away from
some of the problems that system programmers need to consider, there are still
platform and environment issues that deserve our attention. Issues to consider
in this area include:

* The [adaptability](https://github.com/elm-city-craftworks/code_quality/wiki/Adaptability) 
of our software to the user's environment. As an example, I discuss at a high
level the approach HighLine takes to shield the user from having to write low
level console interaction code.

* The [installability](https://github.com/elm-city-craftworks/code_quality/wiki/Installability) 
of our software. As an example, I discuss some general thoughts about the 
state of installing Ruby software, and look into an interesting approach to 
setting up a Rails application in Jordan Byron's Mission of Mercy clinic 
management project.

* The [co-existence](https://github.com/elm-city-craftworks/code_quality/wiki/Co-existence) 
of our software with other software in the user's environment. As an example, 
I discuss how conflicting monkey patches led me on a wild goose chase in 
one of my Rails applications.

* The [replaceability](https://github.com/elm-city-craftworks/code_quality/wiki/Replaceability) 
of our software as well as the ability for our software to act as a drop in 
replacement for other tools. Because I feel this concept is one baked into the UNIX and open 
source culture, I don't provide a specific case study but instead point out several applications 
of this idea in the wild.

### Reflections

Spending several weeks studying this topic just so I can *start* a discussion
with our readers has been a painful, but enlightening experience for me. As you
can see from the giant laundry list of concerns listed above, the concept of
software quality is much deeper than something like [The Four Simple Rules of
Design](http://www.c2.com/cgi/wiki?XpSimplicityRules) might imply. 

It is no surprise that we yearn for something more simple than what I've
outlined here, but I cannot in good conscience remove any of the focus areas
outlined by ISO/IEC 9126 as being unimportant when it comes to software quality.
While we cannot expect that all of our software will be shining examples of all
of these properties all of the time, we do have a responsibility for knowing how
to spot the tensions between these various concerns and we must do our best to
resolve them in a smart way.

While our intuition and experience may allow us to address most of the issues
I've outlined here at a subconscious level, I feel that more work needs to be
done for us to seriously consider ourselves good engineers. The real challenge
for me personally is to figure out how to continue to study these topics without
stifling my creativity, willingness to experiment, and ability to make decisions
without becoming overwhelmed.

I look forward to hearing your own thoughts on this topic, because it is one
that we probably need to work through together if we want to make any real
progress.


================================================
FILE: articles/v3/002-building-excellent-examples.md
================================================
Good code examples are the secret sauce that makes Practicing Ruby a high-quality learning resource. That said, the art of building excellent examples is one that I think all programmers should practice, not just those folks out there trying to teach for a living. The ability to express ideas clearly through well-focused snippets of code is key to writing good tests, documentation, bug reports, code reviews, demonstrations, and a whole lot of other stuff, too.

In this article, I've identified five patterns I use for expressing ideas through code examples. These techniques run the gamut from building contrived "Hello World" programs to crafting full-scale sample applications using a literate programming style. Each technique has its own strengths and weaknesses, and I've done what I could to outline them where possible.

Although this isn't necessarily a comprehensive list, it can help you start to improve the way you write your examples and also serves as a good jumping-off point for further discussion on the topic.

### Contrived examples

For any given programming language or software library, the odds are pretty good that the first example you'll run is a contrived "Hello World" program. The following example is taken from the Sinatra web framework but is similar in spirit to pretty much every other "Hello World" application out there:

```ruby
require 'sinatra'

get '/hi' do
  "Hello World!"
end
```

This kind of example seems quite useless on the surface and is neither interesting nor educational. As it turns out, these characteristics are precisely what make this "Hello World" program perfect! The contrived nature of the example allows it to serve as a simple sanity check for someone who is trying out Sinatra for the first time.

If you try to run this example and find that it doesn't work correctly, there are only a few possible points of failure. In most cases, not even getting a "Hello World" program to run correctly can be blamed on one of three things: out-of-date documentation, issues with your environment, or user error. The fact that there are very few moving parts makes it much easier for you to determine the source of your problem than it would be if the example were significantly more complex. This ease of debugging is precisely why most introductory tutorials start off with a "Hello World" program rather than something more exciting.

Although the most common use case for contrived examples is to construct "Hello World" applications, there are other use cases for this technique as well. In particular, contrived examples are a good fit for discussions about syntactic or structural differences between two pieces of code. As an example, consider a short tutorial that explains why a user might want to use Ruby's `attr_reader` functionality. It could start by showing a `Person` class that implements accessors explicitly:

```ruby
class Person
  def initialize(name, email)
    @name  = name
    @email = email
  end

  def name
    @name
  end

  def email 
    @email
  end
end
```

A followup example could then be provided to show how to simplify the code via `attr_reader`:

```ruby
class Person
  def initialize(name, email)
    @name  = name
    @email = email
  end

  attr_reader :name, :email
end
```

This scenario is very simplistic when compared to the class definitions we write in real projects, but the absence of complicated functionality makes it easier for the reader to focus on the syntactic differences between the two examples. It also allows the novice Ruby programmer to think of the difference between explicitly defining accessors and using `attr_reader` as a simple structural transformation rather than something with complex semantic differences. Although this mental model is not 100 percent accurate, it emphasizes the big picture, which is what actually matters for a novice programmer. The simplicity of these examples makes the general pattern much easier to remember, which justifies hiding a few things behind the curtain to be revealed later.

Unfortunately, the ability of contrived examples to hide the semantics of our programming constructs is just as often a drawback as it is an asset. The more complex a concept is, the more dangerous it is to present simplistic examples rather than working through more realistic scenarios. For example, it is common for object-oriented programming tutorials to use real-world objects and hierarchies to explain how class inheritance works, but the disconnect between these models and the kinds that real software projects implement is so great that this approach completely obfuscates the real power and purpose of object-oriented programming. By choosing a scenario that may feel natural to the reader but does not fit naturally with the underlying programming constructs, this sort of tutorial fails to emphasize the right details and leaves the door open for a wide range of misconceptions. These incorrect assumptions end up getting in the way of learning real object-oriented programming techniques rather than helping develop an understanding of them.

I could easily rant on this topic, but someone else did it for me by writing a great mailing list post entitled [Goodbye, shitty Car extends Vehicle object-orientation tutorial](http://lists.canonical.org/pipermail/kragen-tol/2011-August/000937.html). Despite the somewhat inflammatory title, it is a very insightful post, and I strongly recommend reading it if you want to see a strong argument for the limitations of contrived examples as teaching tools.

Figuring out where to draw the line between when it is appropriate to use a contrived example and when to use one that is based on a practical application is tricky. In general, I try to keep in mind that the purpose of a contrived example is specifically to remove context from the picture. Outside of "Hello World" programs and simple syntactic transformations, a lack of context hurts more than it helps, and so I try to avoid contrived examples as much as I can for pretty much every other use case. 

### Cheap counterfeits

One of my favorite techniques for teaching programming concepts is to construct cheap counterfeits that emulate the surface-level behavior of a more complicated structure. These "poor man's implementations" are similar to contrived examples in that they can hide as much complexity as they'd like from the reader but, because they are grounded by some realistic scenario, do not suffer from being totally disconnected from practical applications.

I have used this technique extensively throughout Practicing Ruby and my other written works, and it almost always works out well. In fact, the issue on [Implementing Enumerable and Enumerator in Ruby](http://practicingruby.com/articles/4) was entirely based on this strategy and turned out to be one of the most popular articles I've written for this journal. Although you are probably already very familiar with this pattern as a Practicing Ruby reader, I can still provide a bit of extra insight by decomposing it for you.

The purpose of building a cheap counterfeit is not to gain a deep understanding of how a certain construct actually works. Instead, the purpose of a counterfeit is to teach people how to steal ideas from other interesting bits of code for their own needs. For example, take the previous `attr_reader` example:

```ruby
class Person
  def initialize(name, email)
    @name  = name
    @email = email
  end

  attr_reader :name, :email
end
```

This is a great feature, because it replaces tedious boilerplate methods with a concise declarative statement. But without some sort of explanation as to how it works, `attr_reader` feels pretty magical and might be perceived as a special case that the Ruby internals are responsible for handling. This misconception can easily be cleared up by showing how to implement a cheap counterfeit version of `attr_reader` in application code:

```ruby
class Module
  def my_attr_reader(*args)
    args.each do |a|
      define_method(a) { instance_variable_get("@#{a}") }
    end
  end
end

class Person
  def initialize(name, email)
    @name  = name
    @email = email
  end

  my_attr_reader :name, :email
end
```

If teaching programmers how to use `attr_reader` is like treating them to a nice fish dinner, teaching them how to implement it is like giving them a fishing pole and showing them how to catch their own meals. Seeing a practical use of `define_method` opens the doors for a huge range of other applications, all of which hinge on the simple concept of dynamic method definition. For example, a similar technique could be used to convert hideous method names like `test_a_user_must_be_able_to_log_in` into the elegant syntax shown here:

```ruby
test "A user must be able to log in" do
  # your test code here
end
```

There are countless other applications of dynamic method definition, many of which I expect Practicing Ruby readers are already familiar with. The point here is that a single example that demystifies a certain technique can make a huge difference in what possibilities someone sees in a given system. This payoff is what makes cheap counterfeits such a tremendously good teaching tool.

An important thing to keep in mind, however, is that this technique is useful mostly for teaching concepts, as opposed to showing someone how a feature is really implemented. If you actually look into the implementation of `attr_reader`, you'll find a number of edge cases that this cheap counterfeit example does not take into consideration. Although these subtleties are not especially relevant if you're just trying to give a contextualized example of how `define_method` can be used, they would be important to point out if you were trying to write a specification for how `attr_reader` is meant to work, which is why cheap counterfeits are not a substitute for case studies of real code but instead serve a different purpose entirely.

### Simplified examples

Digging directly into the source code of a project is the most direct way to understand how its features are implemented, but it can be a somewhat disorienting process. Production code in all but the most trivial projects tends to accumulate edge cases, error-checking code, and other bits of cruft that make it harder to see what the core ideas are. When giving a talk about how something is implemented or writing documentation for potential contributors, it is sometimes helpful to provide simplified examples that demonstrate the key functionality while minimizing distractions.

Suppose I want to do a lightning talk about how [MiniTest](https://github.com/seattlerb/minitest) is implemented, with the goal of attracting new contributors to the project. In a talk like that, I'd definitely need to discuss a bit about how assertions work. A logical place to start might be the `Assertions#assert` method:

```ruby
def assert test, msg = nil
  msg ||= "Failed assertion, no message given."
  self._assertions += 1
  unless test then
    msg = msg.call if Proc === msg
    raise MiniTest::Assertion, msg
  end
  true
end
```

The implementation of `assert` is simple enough that I could probably show it as-is without losing my audience. But if I keep in mind that this code is going to be shown on a slide for just a few seconds, I might show the following simplified example instead:

```ruby
def assert(test, msg=nil)
  msg ||= "Failed assertion, no message given."

  raise(MiniTest::Assertion, msg) unless test
  true
end
```

This code omits some implementation details, but it preserves the main idea, which is that MiniTest's assertions work by raising an exception when a test fails. The fact that `assert` is where the number of assertions is counted is fairly obvious and only adds noise when you want to get a rough idea for how the code works at a glance. Likewise, the fact that a message can be passed in as a `Proc` object rather than a string is an interesting but obscure edge case that does not need to be emphasized. By removing these two statements from the method definition, the core behavior is easier to notice.

The process of creating a simplified example starts with looking at the original source code and then determining which details are essential to expressing the idea you want to express and which details can be considered background noise. The next step is to construct an example that serves as a functional subset of the original implementation when used within a certain context. You don't want to deviate too much from the original idea, but you can clean up the syntax a bit where appropriate to make the example easier to understand. From there, you can treat the example as a substitute for the real implementation for the purposes of demonstration; you just need to make sure to point out that you have simplified things a bit.

With this MiniTest example, the simplified version of the code is only slightly less complicated than the original, so the benefits of using this technique are a bit subdued. In practice, you're much more likely to run into situations in which there are dozens of lines of implementation code but only a handful of them are central to the idea that you are trying to express. In those situations, this pattern is especially effective at cutting through the cruft to get at the real meat of the problem you want to focus on. However, it's worth keeping in mind that even relatively small and easy-to-understand chunks of code can be simplified if they happen to include statements that are not directly relevant to the point you are trying to make.

### Reduced examples

A reduced example is one that reproduces a certain behavior in the most simple possible way. This technique is most commonly used for putting together bug reports and is one of the most important skills you can have as a software developer.

In the [Ruby Best Practices](http://rubybestpractices.com/) book, I told a story about a bug that was spotted in Prawn and how we reduced the original report to something much more simple in order to discover the root cause of the problem. Because this is still the best example I've found of this process in action, I'll summarize that story here rather than telling a new one.

A developer sent us the following bug report to demonstrate that our code for generating fixed-width columns of text had a problem that was causing page breaks to be inserted unnecessarily:

```ruby
Prawn::Document.generate("span.pdf") do
  span(350, :position => :center) do 
    text "Here's some centered text in a 350 point column. " * 100
  end
  
  text "Here's my sentence."

  bounding_box([50,300], :width => 400) do 
    text "Here's some default bounding box text. " * 10 

    pos = bounds.absolute_left - margin_box.absolute_left
    span(bounds.width, :position => pos) do
      text "The rain in Spain falls mainly on the plains. " * 300 
    end
  end

  text "Here's my second sentence."
end
```

In this example, he expected all of his text to be rendered on one page and was trying to show that each time he used the `span` construct, an unnecessary page break was created. He showed that this was the case both within the default page boundaries and within a manually specified bounding box. As far as user reports go, this example was pretty good, because it was specifically designed to show the problem he was having and was clearly not just some broken production code that he wanted help with.

That having been said, an understanding of how Prawn works under the hood made it possible to simplify this example quite a bit, even before investigating further. Because the default page boundaries in Prawn are implemented in terms of bounding boxes and the `bounding_box` method just temporarily swaps those dimensions with new ones, the second part of this report was superfluous. Removing it got the reproducible sample down to the example shown here:

```ruby
Prawn::Document.generate("span.pdf") do
  span(350) do 
    text "Here's some text in a 350pt wide column. " * 20
  end

  text "This text should appear on the same page as the spanning text"
end
```

In making this reduction, I also did some other minor cleanup chores such as reworking the text to be self-documenting and removing the `:position => :center` option for `span`, because it didn't affect the outcome. At this point, even someone without experience in how Prawn works would be able to more easily spot the problem in the example.

Although `span` is not a trivial construct, it had only two possible points of failure: the `bounding_box` method and the `canvas` method. Because `bounding_box` is fundamental to pretty much everything Prawn does and we had plenty of evidence that it was working as expected, we turned our attention to `canvas`.

The purpose of `canvas` is  to execute the contents of a block while ignoring all document margins and bounding boxes in place, essentially converting everything to absolute coordinates on the page. After the block is executed, it is supposed to keep the text pointer wherever it left off, which means that it should not trigger a pagebreak unless the text flows beyond the bottom of the page. To test this behavior, we coded up the following example:

```ruby
Prawn::Document.generate("canvas_sets_y_to_0.pdf") do 
  canvas { text "Some text at the absolute top left of the page" }
  text "This text should not be after a pagebreak" 
end
```

After running this example, we noticed that it exhibited the same defect that we saw in the user's bug report. Because this method is almost as deep down the Prawn call chain as you can go, it became clear that at this point we had our reduced example. The benefit of drilling down like this became apparent when we converted our sample code into a regression test:

```ruby
class CanvasTest < Test::Unit::TestCase
  def setup 
    @pdf = Prawn::Document.new
  end

  def test_canvas_should_not_reset_y_to_zero 
    after_text_position = nil

    @pdf.canvas do 
      @pdf.text "Hello World" 
      after_text_position = @pdf.y
    end

    assert_equal after_text_position, @pdf.y 
  end
end
```

After seeing this test fail and applying a quick patch that got it to go green, we went back and ran the original bug report the user provided us with. As predicted, the bad behavior went away and things were once again working as expected.

The benefits of reducing the example before writing a regression test were tremendous. Not only was the test easier to write, but it also ended up capturing the problem at a much lower level than it would have if we immediately set in with codifying the bug report as a unit test. In addition to these benefits, the reduction process itself greatly simplified the debugging process, as it allowed us to proceed methodically to find the root of the problem.

I've mostly used this reduction technique while debugging, but it can also be a useful way to find your way around a complex codebase. By starting with a practical example that exercises the system from the outermost layer, you can drill down into the code and trace your way through the call chains to find how some particular aspect of the software works. Although this is a less direct approach than just reading the documentation, it will give you a better fundamental understanding of how the system hangs together, and it's a fun way to practice code reading.

Whether you are exploring a new codebase or tracking down a bug, the reduction process limits the scope of the things you need to think about, allowing you to dedicate your attention in a more focused way. This effect is somewhat similar to what we find when we make use of simplified examples but is more of a drilling down process than it is a pruning process. Both have their merits, and they can even be used in combination at times.

### Sample applications 

Although all of the techniques I've discussed so far can be quite useful for studying, investigating, and teaching about specific issues, none of them are particularly suitable for demonstrating big-picture topics. When you want to emphasize how things come together, as opposed to how each individual part works, nothing beats the combination of a sample application with a good walkthrough tutorial. Several issues from Practicing Ruby Volume 2 made use of this format and were very well received by the readers here.

In [Learning new things step-by-step](http://practicingruby.com/articles/6), I built a small game for the purpose of demonstrating how to develop software in tiny bite-sized chunks. The nice thing about this approach is that it allows the reader to follow along at home (either mentally or by literally running the code themselves), while proceeding at their own pace. I hope it also encourages folks to experiment and draw their own conclusions rather than just rigidly following a predefined script.

In [Building Unix-style command line applications](http://practicingruby.com/articles/9), I tackled the creation of a Ruby clone of the Unix `cat` utility by focusing on distinct areas of functionality, one at a time. This is a slightly less linear format than the step-by-step approach of the game development article, but it allows readers to look at the complete application from several different angles, depending on the topics that interested them most.

Finally, in [Designing business reporting applications](http://practicingruby.com/articles/13), I totally break away from linearity by presenting the source code of a full application in literate programming style. This approach allows readers to study the real implementation code and my commentary side by side and to bounce around as they see fit. This lack of explicit structure encourages readers to explore in a free-form fashion rather than focusing on some predefined areas of interests.

Although these articles were some of the most successful ones that I've published here at Practicing Ruby, they were also among the most challenging to write. I had to apply a much higher standard of writing clear and concise code than I would if I were simply trying to make a project easy enough for me to maintain on my own. The prose was tricky to organize, because it's hard to decide which areas to emphasize and which to gloss over in a complete application. For these reasons, sample applications can be a cumbersome and time-consuming learning resource to produce. However, the investment seems to be well worth it in the end.

### Reflections

Writing good examples can be seriously hard work. This is why all too often we see people overusing contrived examples or simply attempting to pass off unrefined snippets of production code as learning materials. However, code examples in all of their myriad forms lay the foundation for how we communicate our ideas as software developers.

An important thing to remember when writing code examples is that the process is in many ways similar to writing prose. If we simply spit out a brain dump without thinking about how it will be interpreted and understood by others, we will end up with crappy results. But if we remember that the main goal of writing our examples is to communicate an idea to our fellow programmers, we naturally begin to ask the questions that lead us to improve our work.

I hope that by sharing these few patterns with you, I've given you some useful ideas for how to improve your code communication skills. Following the patterns I've outlined here will lead you to writing better examples for your documentation, bug reports, unit tests, tutorials, and quite a few other things as well. 

Though the techniques I've shown here are ones that work well in a wide range of contexts, I am sure there are other approaches worth learning about. If you've seen a novel use of code examples in the wild, please let me know! I'd also be happy to hear any other thoughts you have on this topic, and I wouldn't mind helping a few folks come up with good examples for the projects they're working on. If you've got something you want me to take a look at, just leave a comment and I'll be sure to get back to you.


================================================
FILE: articles/v3/003-turing-tarpit.md
================================================
> NOTE: This article describes an interactive challenge that was done in
> realtime at the time it was published. You can still make use of it
> by starting from the initial version of the code, but if you'd
> rather skip to the end results, be sure to read [Issue 3.4](https://practicingruby.com/articles/spiral-staircase-of-refactoring).

A programming language that is endlessly flexible but difficult to use because
of its lack of support for common operations is known as a [Turing
tarpit](http://en.wikipedia.org/wiki/Turing_tarpit). While a range of esoteric
programming languages fall under this category, the [Brainfuck
language](http://en.wikipedia.org/wiki/Brainfuck) is one that stands out for its
extreme minimalism.

Brainfuck manages to be Turing complete in only 8 operations, which means that
despite being functionally equivalent to Ruby in a theoretical sense, it offers
virtually none of the conveniences that a modern programmer has come to expect.
You don't need to go any farther than its "Hello World" program to see that it
isn't a language that any sane person would want to work in:

```ruby
++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.
<<+++++++++++++++.>.+++.------.--------.>+.>.
```

However, the simplicity of the language is not without at least some merit. A
consequence of having a somewhat trivial to parse syntax and very basic
semantics is that while Brainfuck may be one of the hardest languages to *use*,
it simultaneously happens to be one of the easiest languages to *implement*.
While it's not quite as easy a task as working on a code kata, it takes roughly
the same order of magnitude of effort to produce a fully functioning Brainfuck
interpreter. The key difference is that you end up with some software that is
much deeper than your average bowling score calculator after you've completed
the exercise.

A functioning Brainfuck interpreter is complex enough where you can begin to ask
serious questions about code quality and overall design. It is also something
that you can build new functionality on top of in a ton of interesting ways. So
in this exercise, the real payoff comes after you've run your first "Hello
World" example and have a working interpreter to play with. The downside is that
it might take you a day or two of work to get to that point, and after all that
effort, you might need to think about doing something that's just a bit more
meaningful with your life. But like most things that involve this sort of
drudgery, being a Practicing Ruby subscriber can help you skip the boring stuff
and get right to the juicy parts.

Rather than a typical article, this issue is instead an interactive challenge
for our readers to try at home. I've posted a [simple and functional Brainfuck
interpreter](https://github.com/elm-city-craftworks/turing_tarpit/tree/starting_point) on GitHub,
and I'm inviting all of you to look for ways of improving it. The code itself is
good in places and not so good in others, and I've intentionally left in some
things that can be improved. Your challenge, if you choose to accept it, is as
follows:

* If you spot things in the code that can be improved, let me know!
* If you spot a bug, file a bug report via github issues and optionally send a pull request that includes a failing test.
* If you have time to make a refactoring or improvement yourself, fork the project and submit pull requests
* If you want to add documentation patches, those are welcome too!
* Feel free to work on this with friends and colleagues, even if they aren't Practicing Ruby subscribers.

In [Issue 3.4](https://practicingruby.com/articles/spiral-staircase-of-refactoring), I will
go over the improvements we made to this project as a group, and discuss why
they're worthwhile. Because I know there are several things that need
improvement in this code that are pretty general in nature, I'm reasonably sure
that article will be a generally interesting discussion on software design and
Ruby idioms, as opposed to a collection of esoterica. But since I don't know
what to expect from your contributions, the exact contents will be a surprise
even to me.


================================================
FILE: articles/v3/004-climbing-the-spiral-staircase-of-refactoring.md
================================================
In [Issue 3.3](http://practicingruby.com/articles/shared/bwgflabwncjv), I presented a proof-of-concept Ruby implementation of the [Brainfuck programming language](http://en.wikipedia.org/wiki/Brainfuck) and challenged Practicing Ruby readers to improve upon it. After receiving several patches that helped move things along, I sat down once again to clean up the code even further. What I came to realize as I worked on my revisions is that the refactoring process is very similar to climbing a spiral staircase. Each structural change to the code simultaneously left the project back where it started along one vector while moving it forward along another.

Because we often look at the merits of a given refactoring technique within the context of a single transition from worse code to better code, it's easy to mistakenly assume that the refactoring process is much more linear than it actually is. In this article, I've tried to capture a much wider angle view of how refactoring really works in the wild. The end result is a story which I hope will spark some good discussions about how we can improve our code quality over time.

### Prologue. Everything has to start somewhere

I decided to name my interpreter [Turing Tarpit](http://en.wikipedia.org/wiki/Turing_tarpit), because that term is perfectly apt for describing languages like Brainfuck. In a nutshell, the term refers to any language which is infinitely flexible, yet nearly impossible to use for anything practical. It turns out that building this sort of mind trap for programmers is quite easy to do.

My first iteration was easy enough to build, and consisted of three objects: a `Tape`, an `Interpreter`, and a `Scanner`. The rough breakdown of responsibilities was something like this:

* The [Tape object](https://github.com/elm-city-craftworks/turing_tarpit/blob/starting_point/lib/turing_tarpit.rb#L103-149) implemented something similar to the storage mechanism in a [Turing machine](http://en.wikipedia.org/wiki/Turing_machine#Informal_description). It provided mechanisms for accessing and modifying numeric values in cells, as well as a way to increment and decrement the pointer that determined which cell to operate on.

* The [Interpreter object](https://github.com/elm-city-craftworks/turing_tarpit/blob/starting_point/lib/turing_tarpit.rb#L7-34) served as a mapping between Brainfuck's symbolic operators and the operations provided by the `Tape` object. It also implemented the I/O functionality required by Brainfuck.

* The [Scanner object](https://github.com/elm-city-craftworks/turing_tarpit/blob/starting_point/lib/turing_tarpit.rb#L36-69) was responsible for taking a Brainfuck source file as input and transforming it into a stream of operations that could be handled by the `Interpreter` object. For the most part this simply meant reading the source file one character at a time, but this object also needed to account for Brainfuck's forward and backward jump operations.

While my initial implementation was reasonably clean for a proof-of-concept, it definitely had room for improvement. I decided to ask for feedback early in the hopes that folks would find and fix the things I knew were problematic while simultaneously checking my blindspots for issues that I hadn't noticed myself.

### Act I. Getting a fresh perspective on the problem

Some of the issues brought up by contributors were fairly obvious housekeeping chores, but nonetheless made the project nicer to work with:

* Steve Klabnik [requested a way to run the whole test suite at once](https://github.com/elm-city-craftworks/turing_tarpit/pull/3) instead of file by file. He had provided a patch with a Rakefile, but since the project didn't have any immediate need for other rake tasks, we ended up deciding that a simple _test/suite.rb_ file would be sufficient. Notes were added to the README on how to run the tests.

* Renato Riccieri [broke the classes out into individual files](https://github.com/elm-city-craftworks/turing_tarpit/pull/6). The original implementation had everything in _lib/turing_tarpit.rb_, simply for convenience reasons while spiking. Breaking the classes into individual files brought the project more in line with [standard Ruby packaging conventions](http://chneukirchen.github.com/rps/).

* Benoit Daloze [refactored some ugly output code](https://github.com/elm-city-craftworks/turing_tarpit/pull/2) to use `putc(char)` instead of `print("" << char)`. Since the latter was obviously a hack due to my lack of awareness of the `putc` method, this was a welcome contribution.

After this initial round of cleanup, we ended up thinking through a pair of more substantial problems: the inconsitent use of private accessors, and a proposed refactoring to break up the `Scanner` object into two separate objects, a `Tokenizer` and a `Scanner`.

**The story behind my recent private accessor experiments**

Ryan LeCompte was the one to bring up [the question about private accessors](https://github.com/elm-city-craftworks/turing_tarpit/issues/1), and was curious about why I had used them in some places but referenced instance variables directly in others. The main reason for this was simply that the use of private accessors is a new experiment for me, and so in my haste of getting a first version out the door, I remembered to use them in some places but not in others.

This project in particular posed certain challenges for using private accessors conveniently. A specific example of where I ran into some weird edge cases can easily be seen in the `Tape` object:

```ruby
module TuringTarpit
  class Tape
    def initialize
      self.pointer_position = 0
      # ...
    end

    def increment_pointer
      self.pointer_position = pointer_position + 1
    end

    # ...

    private

    attr_writer :pointer_position
  end
end
```

If you just glance quickly at this class definition, it is very tempting to try to refactor `increment_pointer` so that it uses convenient `+=` syntax, resulting in something like the code below:

```ruby
def increment_pointer
  self.pointer_position += 1
end
```

In most cases, this refactoring would be a good one because it makes the code slightly less verbose without sacrificing readability. However, it turns out that Ruby does not extend the same private method special casing to `self.foo += something` as it does to `self.foo = something`. This means that if you attempt to refactor this code to use `+=` it ends up raising a `NoMethodError`. Because this is definitely a downside of using private accessors, it's reasonable to ask why you'd bother to use them in the first place rather than using public accessors or simply referring to instance variables directly.

The best reason I can find for making use of accessors in general vs. instance variables is simply that the former are much more flexible. New behavior such as validations or transformations can be added later by changing what used to be vanilla accessors into ordinary method definitions. Additionally, if you accidentally introduce a typo into your code, you will get a `NoMethodError` right away rather than having to track down why your attribute is `nil` when you didn't expect it to be in some completely different place in your code.

The problem with making accessors public is that it hints to the consumer that it is meant to be touched and used, which is often not the case at all, especially for writers. While Ruby makes it trivial to circumvent privacy protections, a private method communicates to the user that it is meant to be treated as an implementation detail and should not be depended on. So the reason for using a private accessor is the same as the reason for using a private method: to mark the accessor as part of the internals of the object.

The interesting thing I stumbled across in this particular project is that if you take this technique to the extreme, it is possible to build entire applications without ever explicitly referencing an instance variable. It comes at the cost of the occasional weird edge case when calling private methods internally, but makes it possible to treat instance variables as a whole as a _language implementation detail_, rather than an _application implementation detail_. Faced with the opportunity to at least experiment with that idea, I decided to make the entire Turing Tarpit codebase completely free of instance variables, which ended up taking very little effort.

The jury is still out on whether or not this is a good idea, but I plan to keep trying the idea out in my projects and see whether I run into any more issues. If I don't experience problems, I'd say this technique is well worth it because it emphasizes message-passing rather than state manipulation in our objects. 

**Splitting up the Scanner object**

After helping out with a few of the general housekeeping chores, Steve Klabnik then turned his attention to one of the weakest spots in the code, the `Scanner` object. He pointed out that having an object with dependencies on a whole lot of private methods is a bit of a code smell, and focused specifically on the `Scanner#next` method. The original implementation looked like this:

```ruby
module TuringTarpit
  class Scanner
    # ...

    def next(cell_value)
      validate_index

      element = @chars[@index]
      
      case element
      when "["
        jump_forward if cell_value.zero?

        consume
        element = @chars[@index]
      when "]"
        if cell_value.zero?
          while element == "]"
            consume
            element = @chars[@index]
            validate_index
          end
        else
          jump_back
          consume
          element = @chars[@index]
        end
      end
      
      consume
      element
    end
  end
end
```

Steve pointed out that the `Scanner#next` method was really doing more of a tokenizing operation, and that most of the scanning work was actually being done by the various private methods that were being used to traverse the underlying string. He prepared a patch which made this relationship explicit by introducing a `Tokenizer` object which would provide a method to replace `Scanner#next`. His newly introduced object allowed for a re-purposing of the `Scanner` object which allowed its methods to become public:

```ruby
module TuringTarpit
  class Tokenizer
    # ...

    def next(cell_value)
      scanner.validate_index

      element = scanner.current_char

      case element
      when "["
        scanner.jump_forward if cell_value.zero?

        scanner.consume
        element = scanner.current_char
      when "]"
        if cell_value.zero?
          while element == "]"
            scanner.consume
            element = scanner.current_char
            scanner.validate_index
          end
        else
          scanner.jump_back
          scanner.consume
          element = scanner.current_char
        end
      end

      scanner.consume
      element
    end
  end
end
```

The thing in particular I liked about this patch is that it abstracted away some of the tedious index operations that were originally present in `Scanner#next`. As much as possible I prefer to isolate anything that can cause off-by-one errors or other such nonsense, and this refactoring did a good job of addressing that issue.

The interesting thing about this refactoring is that while I intended to work on the same area of the code if no one else patched it, I had planned to approach it in a very different way. My original idea was to implement some sort of generic stream datastructure and reuse it in both `Scanner` and `Tape`. However, seeing that Steve's patch at least partly addressed my concerns while possibly opening some new avenues as well, I abandoned that idea and merged his work instead.

### Act II. Building a better horse

After applying the various patches from the folks who participated in this challenge, the code was in a much better place than where it started. However, much work was still left to be done!

In particular, the code responsible for turning Brainfuck syntax into a stream of operations still needed a lot of work. The `Tokenizer` class that Steve introduced was an improvement, but without further revisions would simply serve as a layer of indirection rather than as an abstraction. Zed Shaw describes the difference between these two concepts very eloquently in his essay [Indirection Is Not Abstraction](http://zedshaw.com/essays/indirection_is_not_abstraction.html) by stating that _"Abstraction is used to reduce complexity. Indirection is used to reduce coupling or dependence."_

As far as the `Tokenizer` object goes, Steve's patch reducing coupling somewhat by pushing some of the implementation details down into the `Scanner` object. However, the procedure is pretty much identical with the exception of the lack of explicit indexing code, and so the baseline complexity actually increases because what was once done by one object is now split across two objects.

To address this problem, the dividing lines between the two objects needed to be leveraged so that they could interact with each other at a higher level. It took me a while to think through the problem, but in doing so I realized that I could now push more functionality down into the `Scanner` object so that `Tokenizer#next` ended up with fewer moving parts. After some major gutting and re-arranging, I ended up with a method that looked like this:

```ruby
module TuringTarpit
  class Tokenizer
    # ...

    def next(cell_value)
      case scanner.next_char
      when Scanner::FORWARD_JUMP
        if cell_value.zero?
          scanner.jump_forward
        else
          scanner.next_char
        end
      when Scanner::BACKWARD_JUMP
        if cell_value.zero?
          scanner.skip_while(Scanner::BACKWARD_JUMP)
        else
          scanner.jump_back
        end
      end

      scanner.current_char
    end
  end
end
```

After this refactoring, the `Tokenizer#next` method was a good deal more abstract in a number of ways:

* It expected the `Scanner` to handle validations itself rather than telling it when to check the index 

* It no longer referenced Brainfuck syntax and instead used constants provided by the `Scanner`

* It eliminated a lot of cumbersome assignments by reworking its algorithm so that `Scanner#current_char` always referenced the right character at the end of the scanning routine.

* It expected the `Scanner` to remain internally consistent, rather than handling edge cases itself.

These reductions in complexity made a hugely positive impact on the readability and understandability of the `Tokenizer#next` method. While all of these changes could have technically been made before the split between the `Scanner` and `Tokenizer` happened, cutting the knot into two pieces certainly made untangling things easier. This is why indirection and abstraction often go hand in hand, despite the fact that they are very different concepts from one another.

### Act III. Mountains are once again merely mountains

After building on top of Steve's work to simplify the syntax-processing code even further, I finally felt like that part of the project was in decent shape. I then decided to turn my attention back to the `Interpreter` object, since it had not received any love from the challenge participants. The original code for it looked something like this:

```ruby
module TuringTarpit
  class Interpreter
    def run
      loop do
        case tokenizer.next(tape.cell_value)
        when "+"
          tape.increment_cell_value
        when "-"
          tape.decrement_cell_value
        when ">"
          tape.increment_pointer
        when "<"
          tape.decrement_pointer
        when "."
          putc(tape.cell_value)
        when ","
          value = STDIN.getch.bytes.first
          next if value.zero?

          tape.cell_value = value
        end
      end
    end
  end
end
```

While this implementation wasn't too bad, there were two things I didn't like about it. The first issue was that it directly referenced Brainfuck syntax, which sort of defeated the purpose of having the tokenizer be syntax independent. The second problem was that I found the case statement to feel a bit brittle and limiting. What I really wanted was a dynamic dispatcher similar to the following method:

```ruby
def run
  loop do
    if operation = tokenizer.next(evaluator.cell_value)
      tape.send(operation)
    end
  end
end
```

In order to introduce this kind of functionality, I'd need to find a place to introduce a simple mapping from Brainfuck syntax to operation names. I already had the keys and values in mind, I just needed to find a place to put them:

```ruby
OPERATIONS = { "+" => :increment_cell_value,
               "-" => :decrement_cell_value,
               ">" => :increment_pointer,
               "<" => :decrement_pointer,
               "." => :output_cell_value,
               "," => :input_cell_value }
```

Figuring out how to make this work was surprisingly challenging. I found that the extra layers of indirection between the `Tape` and the `Scanner` meant that any change made too far down the chain would need to be echoed all the way up it, and that changes made towards the top felt tacked on and out of place. This eventually led me to question what the separation between `Scanner` and `Tokenizer` was really gaining me, as well as the separation between `Interpreter` and `Tape`.

After a fair amount of ruminating, I decided to take my four objects and join them together at the seams so that only two remained. The `Scanner` and `Tokenizer` ended up getting joined back together to form a new `Interpreter` class. The job of the `Interpreter` is to take Brainfuck syntax and turn it into a stream of operations. You can get a rough idea of how it all came together by checking out the following code:

```ruby
module TuringTarpit
  class Interpreter
    FORWARD_JUMP = "["
    BACKWARD_JUMP = "]"

    OPERATIONS = { "+" => :increment_cell_value,
                   "-" => :decrement_cell_value,
                   ">" => :increment_pointer,
                   "<" => :decrement_pointer,
                   "." => :output_cell_value,
                   "," => :input_cell_value }

    def next_operation(cell_value)
      case next_char
      when FORWARD_JUMP
        if cell_value.zero?
          jump_forward
        else
          skip_while(FORWARD_JUMP)
        end
      when BACKWARD_JUMP
        if cell_value.zero?
          skip_while(BACKWARD_JUMP)
        else
          jump_back
        end
      end

      OPERATIONS[current_char]
    end

    # ... lots of private methods are back, but now fine-tuned.
  end
end
```

The old `Interpreter` object and `Tape` object were also merged together, forming a single object I ended up calling `Evaluator`. The job of the `Evaluator` object is to take a stream of operations provided by the newly defined `Interpreter` object and then execute them against a Turing Machine like data structure. In essence, the `Evaluator` object is nothing more than the original `Tape` object I implemented along with a few extra methods which account for the things the original `Interpreter` object was meant to do:

```ruby
module TuringTarpit
  class Evaluator 
    def self.run(interpreter)
      evaluator = new

      loop do
        if operation = interpreter.next_operation(evaluator.cell_value)
          evaluator.send(operation)
        end
      end
    end

    def output_cell_value
      putc(cell_value)
    end

    def input_cell_value
      value = $stdin.getch.ord
      return if value.zero?

      self.cell_value = value
    end

    # other methods same as original Tape methods
  end
end
```

I had mixed feelings about recombining these objects, because to some extent it felt like a step backwards to me. In particular, I think this refactoring resulted in some minor violations of the [Single Responsibility Principle](http://en.wikipedia.org/wiki/Single_responsibility_principle), and increased the overall coupling of the system somewhat. However, the independence of the four different objects the system previously consisted of seemed artificial at best. To the extent that they could be changed easily or swapped out for one another, I could not think of a single practical reason why I'd actually want that kind of flexibility. In this particular situation it turned out that recombining the objects greatly reduced their communications overhead, and so was worth the loss in generality.

### Epilogue. Sending the ship out to sea

I was really tempted to keep noodling on the design of this project, because even in my final version of the code I still felt that I could have done better. But at a certain point I decided that I could end up getting caught in this trap forever, and the only way to free myself from it was to wrap up my work and just ship the damn thing. This ultimately meant that I had to take care of several chores that neither I nor the various participants in this challenge bothered to work on earlier:

* I added a [set of integration tests](https://github.com/elm-city-craftworks/turing_tarpit/blob/act3/test/integration/evaluator_test.rb) which ran the `Evaluator` against a couple sample Brainfuck programs to make sure we had some decent end-to-end testing support. Found a couple bugs that way.

* I set up and ran [simplecov](https://github.com/colszowka/simplecov) to check whether my tests were at least *running* all the implementation code, and ended up spotting a faulty test which wasn't actually getting run.

* I added a [bin/turing_tarpit](https://github.com/elm-city-craftworks/turing_tarpit/blob/act3/bin/turing_tarpit) file so that you can execute Brainfuck programs without building a Ruby shim first. 

* Did the usual gemspec + Gemfile dance and pushed a 1.0.0 gem to rubygems.org. Typically I'd call a project in its early stages a 0.1.0 release, but I honestly don't see myself working on this much more so I might as well call it 'production ready'.

After I wrapped up all these chores, I decided to go back and check out what my [flog](https://github.com/seattlerb/flog) complexity scores were for each stage in this process. It turns out that the final version was the least complex, with the lowest overall score, lowest average score, and lowest top-score by method. The original implementation came in second place, and the other two iterations were in a distant third and fourth place. While that gave me some reassurances, it doesn't mean much except for that Flog seems to really hate external method calls.

### Reflections

This has been one of my favorite articles to write for Practicing Ruby so far. It forced me to look at the refactoring process in a much more introspective way than I have typically done in the past, and gave me a chance to interact with some of our awesome readers. I do think it ended up raising more questions and challenges in my mind than it did give me answers and reassurances, but I suppose that's a sign that learning happened.

While I found it very hard to summarize the refactoring lifecycle for this project, my hope is that I've at least given you a glimpse of the spiral staircase metaphor I chose to name this article after. If it didn't end up making you feel too dizzy, I'd love to hear your thoughts about this exercise as well as what your own process is like when it comes to refactoring code.


================================================
FILE: articles/v3/005-framework-design-and-implementation.md
================================================
Ruby is a great language for building application frameworks, particularly
micro-frameworks. The sad thing is that by the time most frameworks become
popular, they end up quite complicated. To discover the joy of building reusable
scaffolding for others, it's necessary to take a look at where the need for that
scaffolding comes from in the first place.

In the January 2012 core skills session at Mendicant University, I asked our
students to each build multi-user email based applications. While the students
were working on very different projects, there was a ton of boilerplate that was
common between them all. Because it was too painful to watch the same bits of
code get written again and again in slightly different ways, I decided to build
a tiny framework to solve this problem.

In this issue of Practicing Ruby and the one that follows it, I'm going to have
you work through the code I wrote and help me figure out what goes into building
a good application framework. The goal for this issue is to generate ideas and
questions about the codebase. All of what we learn from this exercise will be
neatly packaged up and synthesized in time for Issue 3.6, but for now I'm
looking for folks to get their hands dirty.

## The Challenge

I would like you to spend at least the same amount of time you'd ordinarily
spend reading a Practicing Ruby article actively reading and working through
[Newman 0.1.1](https://github.com/mendicant-original/newman/tree/v0.1.1), my
micro-framework for email based applications. I have intentionally left the
source uncommented for two reasons: to get you to practice your code reading
skills and to get your candid feedback on the strengths and weaknesses of my
overall design without influencing you too much.

As you read the code, don't just passively click through files on github!
Instead, pull down the source and play with it: Run the examples if you can, or
even better, build your own examples. Try to break stuff if you think you might
be able to find a bug or two, or try to add a new feature you find interesting.
This is an open sandbox to play in!

Once you have managed to find your way around, you're encouraged to start
actively collaborating. I'll be available via the #newman IRC channel and
[newman@librelist.org](newman@librelist.org) to listen to any ideas or questions
you have. Of course, feel free to use Github for bug reports, feature requests,
and comments on pull requests / commits.

In this code I've tried to apply pretty much everything I've ever taught via
Practicing Ruby whenever there was an opportunity to do so. I've also broken
away from established Ruby conventions in places to explore new ideas. Reading
it will be worth your time, and if you actively involve yourself in the
conversations around it, you'll be sure to level up your Ruby skills in no time. 

**One last thing: Don't be afraid to ask where to get started if you feel stuck.
The purpose of this exercise is to learn, and I will do what I can to help you
get a lot out of this challenge.**


================================================
FILE: articles/v3/006-framework-design-and-implementation.md
================================================
In [Issue 3.5](http://practicingruby.com/articles/22), I  challenged Practicing Ruby subscribers to read through and play with the uncommented source code of [Newman 0.1.1](https://github.com/mendicant-original/newman/tree/v0.1.1), the first release of my micro-framework for building email-centric applications. My hope was that by looking through the implementation of a framework in its very early stages of development, readers would be able to familiarize themselves with the kinds of challenges involved in building this sort of project.

If you didn't participate in that challenge, I recommend spending an hour or two working through it now before reading the rest of this article. My feeling was (and is) that because framework development is about taking care of a thousand tiny details, it's important to see where this kind of project begins before you can really appreciate where it ends up. Assuming you've gone ahead and done that, we can move on to this week's exercise.

### The challenge revisited

I had originally planned to provide a nice annotated walk-through of Newman's implementation up front, but then decided it'd be better if you had a chance to explore it without much guidance before sharing my own explanations of how it all hangs together.

However, this would be little more than an exercise in code reading if I didn't revisit that challenge and provide you with comprehensive implementation notes. With that in mind, you can now read [the fully documented source code](http://mendicant-original.github.com/newman/lib/newman.html), complete with little bits of design ruminations peppered throughout the code. This ought to answer some of the questions that were rattling around in the back of your mind, and even if it didn't, it may spark new questions or ideas that you can share with me as they arise.

Just reading through the documented codebase should teach you a lot about how a micro-framework can be built in Ruby. But the process of doing so might still leave you feeling a bit disoriented because it provides a view of the big picture in terms of dozens of microscopic snapshots rather than exposing a handful of bright-line items to focus on.  With that in mind, I've decided to outline a few of the recurring obstacles I kept running into even in the first week of development on this project, as I think they'll be friction points for frameworks of all varieties.

### Lessons learned the hard way

There is a ton of content to read in that source walk-through, so I'll try to keep these points brief in the hopes that they'll spark some discussions that might possibly lead to further investigation in future articles. But these are the main things to watch out for if you're designing your own framework or contributing to someone else's early stage project:

**1) Dealing with global state sucks**

In the context of Newman, I had to deal with global state in the form of configuration settings, logging, mailer objects, and persistence layers. The first version of Newman had to resort to turning many of its objects into singleton objects because any object which manipulates global state can have global side effects. 

The viral nature of singleton objects was something that I rarely encountered in application or even library code, but became blindingly apparent in working on this framework. For example, Newman's first version shipped with a `Newman::Mailer` object, but because this object was using Mail's global settings functionality, it was not practical to ever create more than one `Newman::Mailer` object. This was really annoying, because it meant that Newman would be limited to monitoring a single inbox per process, which seems like an artificial restriction. But because a `Newman::Server` object is essentially a router designed to work bridge our mailer object to our application objects, it too needed to become a singleton object!

We eventually were able to work around this for the most part by using some low level APIs provided by the mail gem, and this will pave the way for multi-inbox support in Newman in the near future. But I was sort of shocked at how much impact depending on a singleton object can have on the overall flexibility of a framework, because I had not experienced this problem in my usual work on applications and libraries.

For the things that'd be ordinarily implemented as singleton objects, such as loggers or configuration objects, I took care to make it so that even if in practice the system makes use of globally available state, the structure allows for that to be changed with minimal impact. As an example of this, you can see that Newman actually passes its setting objects, data storage objects, and loggers down the call chain to any object that needs them rather than having those objects reference a constant or global variable. This makes it possible for isolation to occur at any point in the chain, and makes it so that Newman has very few hard dependencies on shared state, with its use only being a matter of convenience. The unfortunate side effect of this sort of design is a bit of repetitive code, but I've tried to minimize that where possible by providing convenience constructors and factory methods that make this job easier.

**2) Handling application errors in server software is hard**

In the first version of Newman, any application error would cause the whole server to come crashing down with it. This is definitely not the right way to do things, at least not by default, as it means that a problem with a single callback in a single application can bring down a whole process that is otherwise working as expected.

You might think that the solution to this is to simply rescue application errors and log them, and that is more-or-less the approach I chose to solving this problem. However, I quickly ran into an issue with this in testing. I didn't want a bunch of verbose log output while running my integration tests, so I ran the server with logging turned off. But soon enough, I was getting tests which were failing but giving me no feedback at all as to why that was happening. I eventually discovered that this was due to application errors being swallowed silently, causing the application to fail to respond but not give any feedback that would help with debugging them. The code was not raising an exception, so the tests were not halting with an error, they were just failing.

To solve this issue, I added a server configuration object which allowed toggling exception raising on and off. When that setting was enabled, the server would halt, which is exactly the behavior I wanted in my tests. This did the trick and it's been mostly smooth sailing since then. But the question remains: what are the best defaults to use for this sort of thing? I'm thinking that for test mode, logging should be off and exception raising should be on, and for the default runtime behavior, it should be exactly the opposite. Is that sane? I don't really know, but I suppose it's worth a try.

Another open question I have here is how much effort should be put into only rescuing certain kinds of errors. In most cases, "log and move on" seems like the right behavior from the server's perspective, but could this lead to weird edge cases? As the framework designer, it's my job to help make it hard for the application developer to shoot himself in the foot. But unfortunately right now I don't really know where to aim. More research is required here.

**3) Frameworks are tricky to test**

The whole point of a framework is to tie together a bunch of loose odds and ends to produce a cohesive environment for application development. Writing tests for applications developed within a framework should be *easier* than writing tests for applications developed standalone, but writing tests for the framework itself present new and interesting challenges that aren't typically encountered in ordinary application development.

When I first started working on Newman, I wasn't writing any tests at all
because I had no faith that any of the objects I was creating were going to
survive first contact with real use cases. Instead, I focused on building little
example applications which moved the functionality along and served as a way of
manual testing the code in a fairly predictable way. But after even a modest
amount of functionality was built, the lack of automated testing made it so that
each new change to the system involved a long, cumbersome, and error prone
manual testing session, to the point where it was no longer practical.

From there, I decided to build some simple "live tests" that would essentially
script away the manual checking I was doing, running the example programs
automatically, and automating the sending and checking of email to give me a
simple red/green check. The process of introducing these changes required me to
make some changes to the system, such as allowing the system to run tick by tick
rather than in a busy-wait loop, among other things. This cut down some of the
manual testing time and made the test plan more standardized, but was still very
brittle because cleaning up after a failed test was still a manual process.

Sooner or later, we introduced some surface-level test doubles, such as a simple
TestMailer which could serve as a stand-in replacement for the real mail object.
With this object in place it was possible to convert some of the live tests to a
set of acid tests which were capable of testing the system from end to end
reaching all systems except for the actual mail interactions. This was a huge
improvement because it made basically the same tests run in fractions of a
second rather than half a minute, but I'm still glad it's not where we started
at. Why? Simply because email is a super messy domain to work in and five
minutes of manual testing will expose many problems that hours of work on finely
crafted automated tests would never catch unless you happen to be an email
expert (I'm not). The only thing I regret is that I should have developed these
tests concurrently with my manual testing, rather than waiting until the pain
became so great that the project just ground to a halt.

Even after these improvements, Newman 0.2.0 still ended up shipping with no unit
tests. Part of this is because of the lack of available time I had to write
them, but the other part is that it still feels like a challenge for me to
meaningfully test isolated portions of the framework since they literally will
never be useful in isolation. I'm stuck in between a rock and a hard place,
because the use of mock objects feels too artificial, but dragging in the real
dependencies is an exercise in tedium. I'd love some guidance on how to test
this sort of code effectively and will be looking into how things like Sinatra
and Rails do their testing to see if I can learn anything there.

But one thing is for sure, I wouldn't suggest trying to build a framework unit
test by unit test. The problem domain is way too messy with way too many
interlocking parts for that to be practical. I think if I took the TDD approach
I'd still be working on getting the most basic interactions working in Newman
today rather than talking about it's second major release. Still, maybe I'm just
doing it wrong?

**4) Defining a proper workflow takes more effort than it seems**

This obstacle is hard to describe succinctly, so I won't try to do so. But the
main point I want to stress is that many frameworks are essentially nothing more
than a glorified tool for ferrying a bunch of request data around and building
up a response in the end. But deciding where to sneak in extension points, and
where to give the application developer control vs. where to make a decision for
them is very challenging.

Try to start with one of Newman's examples and trace the path from the point
where an email is received to the point where a response gets sent out. Then let
me know what you think of what I've done, and perhaps give me some ideas for how
I can do it better. I'm still not happy with the decomposition as it is, but I'm
struggling to figure out how to fix it.

**5) Modularity is great, but comes at a cost**

As soon as you decide to make things modular, you have to be very careful about
baking assumptions into your system. This means making interfaces between
objects as simple as possible, limiting the amount of dependencies on shared
state, and providing generic adapter objects to wrap specific implementation
details in some contexts. This is another thing that's hard to express in a
concise way, but the point is that modularity is a lot more complicated when you
are not just concerned about reusability/replaceability within a single
application, but instead within an entire class of applications, all with
somewhat different needs.

I've been trying to ask myself the question of whether a given bit of
functionality really will ever be customized by a third-party extension, and if
I think it will be, I've been trying to imagine a specific use case. If I can't
find one, I decide to avoid generalizing my constructs. However, this is a
dangerous game and finding the right balance between making a system highly
customizable and making it cohesive is a real challenge. It's where all the fun
problems come from with framework design, but is also the source of a lot of
headaches.

### Reflections

I'm afraid dear Practicing Rubyist that once again I've raised more questions
than answers. I always worry when I find myself over my own head that perhaps
I've lost some of you in the process. But I want to emphasize the fact that this
journal is meant to chronicle a collective learning process for all of us, not
just a laundry list of developer protips that I can pull off the top of my head.
Even if this series of articles was hard to digest, it will pave the way for
more neatly synthesized works in the future.

Also, don't underestimate your ability to contribute something to this
conversation! If any ideas or questions popped up in your head while reading
through these notes, please share them without thinking about whether or not
your insights would be worth sharing. I've been continuously impressed by the
quality of the feedback around here, so I'd love to hear what you think.


================================================
FILE: articles/v3/007-criteria-for-disciplined-inheritance.md
================================================
Inheritance is a key concept in most object-oriented languages, but applying it
skillfully can be challenging in practice. Back in 1989, [M.
Sakkinen](http://users.jyu.fi/~sakkinen/) wrote a paper called [Disciplined
inheritance](http://scholar.google.com/scholar?cluster=5893037045851782349&hl=en&as_sdt=0,7&sciodt=0,7)
that addresses these problems and offers some useful criteria for working around
them. Despite being over two decades old, this paper is extremely relevant to
the modern Ruby programmer.

Sakkinen's central point seems to be that most traditional uses of inheritance lead to poor encapsulation, bloated object contracts, and accidental namespace collisions. He provides two patterns for disciplined inheritance and suggests that by normalizing the way that we model things, we can apply these two patterns to a very wide range of scenarios. He goes on to show that code that conforms to these design rules can easily be modeled as ordinary object composition, exposing a solid alternative to traditional class-based inheritance.

These topics are exactly what this two-part article will cover, but before we can address them, we should establish what qualifies as inheritance in Ruby. The general term is somewhat overloaded, so a bit of definition up front will help start us off on the right foot. 

### Flavors of Ruby inheritance

Although classical inheritance is centered on the concept of class-based hierarchies, modern object-oriented programming languages provide many different mechanisms for code sharing. Ruby is no exception: it provides four common ways to model inheritance-based relationships between objects.

* Classes provide a single-inheritance model similar to what is found in many other object-oriented languages, albeit lacking a few privacy features.

* Modules provide a mechanism for modeling multiple inheritance, which is easier to reason about than C++ style class inheritance but is more powerful than Java's interfaces.

* Transparent delegation techniques make it possible for a child object to dynamically forward messages to a parent object. This technique has similar effects as class-/module-based modeling on the child object's contract but preserves encapsulation between the objects.

* Simple aggregation techniques make it possible to compose objects for the purpose of code sharing. This technique is most useful when the subobject is not meant to be a drop-in replacement for the superobject.

Although most problems can be modeled using any one of these techniques, they each have their own strengths and weaknesses. Throughout both parts of this article, I'll point out the trade-offs between them whenever it makes sense to do so.

### Modeling incidental inheritance 

Sakkinen describes **incidental inheritance** as the use of an inheritance-based modeling approach to share implementation details between dissimiliar objects. That is to say that child (consumer) objects do not have an _is-a_ relationship to their parents (dependencies) and therefore do not need to provide a superset of their parent's functionality.

In theory, incidental inheritance is easy to implement in a disciplined way because it does not impose complex constraints on the relationships between objects within a system. As long as the child object is capable of working without errors for the behaviors it is meant to provide, it does not need to take special care to adhere to the [Liskov Substitution Principle](http://blog.rubybestpractices.com/posts/gregory/055-issue-23-solid-design.html). In fact, the child needs only to expose and interact with the bits of functionality from the parent object that are specifically relevant to its domain.

Regardless of the model of inheritance used, Sakkinen's paper suggests that child objects should rely only on functionality provided by immediate ancestors. This is essentially an inheritance-oriented parallel to the [Law of Demeter](http://en.wikipedia.org/wiki/Law_of_Demeter) and sounds like good advice to follow whenever it is practical to do so. However, this constraint would be challenging to enforce at the language level in Ruby and may not be feasible to adhere to in every imaginable scenario. In practice, the lack of adequate privacy controls in Ruby make traditional class hierarchies or module mixins quite messy for incidental inheritance, which complicates things a bit. But before we discuss that problem any further, we should establish what incidental inheritance looks like from several different angles in Ruby.

In the following set of examples, I construct a simple `Report` object that computes the sum and average of numbers listed in a text file. I break this problem into three distinct parts: a component that provides functionality similar to Ruby's `Enumerable` module, a component that uses those features to do simple calculations on numerical data, and a component that outputs the final report. The contrived nature of this scenario should make it easier to examine the structural differences between Ruby's various ways of implementing inheritance relationships, but be sure to keep some more realistic scenarios in the back of your mind as you work through these examples. 

The classical approach of using a class hierarchy for code sharing is worth looking at, even if most practicing Rubyists would quickly identify this as the wrong approach to this particular problem. It serves as a good baseline for identifying the problems introduced by inheritance and how to overcome them. As you read through the following code, think of its strengths and weaknesses, as well as any alternative ways to model this scenario that you can come up with.

```ruby
class EnumerableCollection
  def count
    c = 0
    each { |e| c += 1 }
    c
  end

  # Samnang's implementation from Issue 2.4
  def reduce(arg=nil) 
    return reduce {|s, e| s.send(arg, e)} if arg.is_a?(Symbol)

    result = arg
    each { |e| result = result ? yield(result, e) : e }

    result
  end
end

class StatisticalCollection < EnumerableCollection
  def sum
    reduce(:+) 
  end

  def average
    sum / count.to_f
  end 
end

class StatisticalReport < StatisticalCollection
  def initialize(filename)
    self.input = filename
  end

  def to_s
    "The sum is #{sum}, and the average is #{average}"
  end

  private 

  attr_accessor :input

  def each
    File.foreach(input) { |e| yield(e.chomp.to_i) }
  end
end

puts StatisticalReport.new("numbers.txt")
```

Through its inheritance-based relationships, `StatisticalReport` is able to act as a simple presenter object while relying on other reusable components to crunch the numbers for it. The `EnumerableCollection` and `StatisticalCollection` objects do most of the heavy lifting while managing to remain useful for a wide range of different applications. The division of responsibilities between these components is reasonably well defined, and if you ignore the underlying mechanics of the style of inheritance being used here, this example is a good demonstration of effective code reuse.

Unfortunately, the devil is in the details. When viewed from a different angle, it's easy to see a wide range of problems that exist even in this very simple application of class-based inheritance:

1. `EnumerableCollection` and `StatisticalCollection` can be instantiated, but
it is not possible to do anything meaningful with them as they are currently
written. Although it's not always a bad idea to make use of abstract
classes, valid uses of that pattern typically invert the relationship shown
here, with the child object filling in a missing piece so that its parent can do
a complex job.

2. Although `StatisticalReport` relies on only two relatively generic methods from `StatisticalCollection` and `StatisticalCollection` similarly relies on only two methods from `EnumerableCollection`, the use of class inheritance forces a rigid hierarchical relationship between the objects. Even if it's not especially awkward to say a `StatisticalCollection` is an `EnumerableCollection`, it's definitely weird to say that a `StatisticalReport` is also an `EnumerableCollection`. What makes matters worse is that this sort of modeling prevents `StatisticalReport` from inheriting from something more related to its domain such as a `HtmlReport` or something similar. As my [favorite OOP rant](http://lists.canonical.org/pipermail/kragen-tol/2011-August/000937.html) proclaims, class hierarchies do not exist simply to satisfy our inner Linnaeus.

3. There is no encapsulation whatsoever between the components in this system. The purely functional nature of both `EnumerableCollection` and `Statistics` make this less of a practical concern in this particular example but is a dangerous characteristic of all code that uses class-based inheritance in Ruby. Any instance variables created within a `StatisticalReport` object will be directly accessible in method calls all the way up its ancestor chain, and the same goes for any methods that `StatisticalReport` defines. Although a bit of discipline can help prevent this from becoming a problem in most simple uses of class inheritance, deep method resolution paths can make accidental collisions of method definitions or instance variable names a serious risk. Such a risk might be mitigated by the introduction of class-specific privacy controls, but they do not exist in Ruby. 

4. As a consequence of points 2 and 3, the `StatisticalReport` object ends up with a bloated contract that isn't representative of its domain model. It'd be awkward to call `StatisticalReport#count` or `StatisticalReport#reduce`, but if those inherited methods are not explicitly marked as private in the `StatisticalReport` definition, they will still be callable by clients of the `StatisticalReport` object. Once again, the stateless nature of this program makes the effects less damning in this particular example, but it doesn't take much effort to imagine the inconsistencies that could arise due to this problem. In addition to real risks of unintended side effects, this kind of modeling makes it harder to document the interface of the `StatisticalReport` in a natural way and diminishes the usefulness of Ruby's reflective capabilities.

At least some of these issues can be resolved through the use of Ruby's module-based mixin functionality. The following example shows how our class-based code can be trivially refactored to use modules instead. Once again, as you read through the code, think of its strengths and weaknesses as well as how you might approach the problem differently if it were up to you to design this system.

```ruby
module SimplifiedEnumerable
  def count
    c = 0
    each { |e| c += 1 }
    c
  end

  # Samnang's implementation from Issue 2.4
  def reduce(arg=nil) 
    return reduce {|s, e| s.send(arg, e)} if arg.is_a?(Symbol)

    result = arg
    each { |e| result = result ? yield(result, e) : e }

    result
  end
end

module Statistics
  def sum
    reduce(:+) 
  end

  def average
    sum / count.to_f
  end 
end

class StatisticalReport
  include SimplifiedEnumerable
  include Statistics

  def initialize(filename)
    self.input = filename
  end

  def to_s
    "The sum is #{sum}, and the average is #{average}"
  end

  private 

  attr_accessor :input

  def each
    File.foreach(input) { |e| yield(e.chomp.to_i) }
  end
end

puts StatisticalReport.new("numbers.txt")
```

Using module mixins does not improve the encapsulation of the components in the system or solve the problem of `StatisticalReport` inheriting methods that aren't directly related to its problem domain, but it does alleviate some of the other problems that Ruby's class-based inheritance causes. In particular, it makes it no longer possible to create instances of objects that wouldn't be useful to use as standalone objects and also loosens the dependencies between the components in the system.

Although the `Statistics` and `SimplifiedEnumerable` modules are still not capable of doing anything useful without being tied to some other object, the relationship between them is much looser. When the two are mixed into the `StatisticalReport` object, an implicit relationship between `Statistics` and `SimplifiedEnumerable` exists due to the calls to `reduce` and `count` from within the `Statistics` module, but this relationship is an implementation detail rather than a structural constraint. To see the difference yourself, think about how easy it would be to switch `StatisticalReport` to use Ruby's `Enumerable` module instead of the `SimplifiedEnumerable` module I provided and compare that to the class-based implementation of this scenario.

The bad news is that the way that modules solve some of the problems that we discovered about class hierarchies in Ruby ends up making some of the other problems even worse. Because modules tend to provide a whole lot of functionality based on a very thin contract with the object they get mixed into, they are one of the leading causes of child obesity. For example, swapping my `SimplifiedEnumerable` module for Ruby's `Enumerable` method would cause a net increase of 42 new methods that could be directly called on `StatisticalReport`. And now, rather than having a single path to follow in `StatisticalReport` to determine its ancestry chain, there are two. A nice feature of mixins is that they have fairly simple rules about how they get added to the method lookup path to avoid some of the complexities involved in class-based multiple inheritance, but you still need to memorize those rules and be aware of the combinatorial effects of module inclusion.

As it turns out, modules are a pragmatic compromise that is convenient to use but only slightly more well-behaved than traditional class inheritance. In simple situations, they work just fine, but for more complex systems they end up requiring an increasing amount of discipline to use effectively. Nonetheless, modules tend to be used ubiquitously in Ruby programs despite these problems. A naïve observer might assume that this is a sign that we don't have a better way of doing things in Ruby, but they would be mostly wrong.

All the problems discussed so far with inheritance can be solved via simple aggregation techniques. For strong evidence of that claim, take a look at the refactored code shown here. As in the previous examples, keep an eye out for the pros and cons of this modeling strategy, and think about what you might do differently.

```ruby
class StatisticalCollection
  def initialize(data)
    self.data = data
  end

  def sum
    data.reduce(:+) 
  end

  def average
    sum / data.count.to_f
  end 

  private

  attr_accessor :data
end

class StatisticalReport
  def initialize(filename)
    self.input = filename
    
    self.stats = StatisticalCollection.new(each)
  end

  def to_s
    "The sum is #{stats.sum}, and the average is #{stats.average}"
  end

  private 

  attr_accessor :input, :stats

  def each
    return to_enum(__method__) unless block_given?

    File.foreach(input) { |e| yield(e.chomp.to_i) }
  end
end

puts StatisticalReport.new("numbers.txt")
```

The first thing you'll notice is that the code is much shorter, as if by magic, but really it's because I completely cheated here and got rid of my counterfeit `Enumerable` object so that I could expose a potentially good idiom for dealing with iteration in an aggregation-friendly way. Feel free to mentally replace the object passed to `StatisticalCollection`'s constructor with something like the code shown here if you don't want me to get away with parlor tricks:

```ruby
require "forwardable"

class EnumerableCollection
  extend Forwardable

  # Forwardable bypasses privacy, which is what we want here.
  delegate :each => :data

  def initialize(data)
    self.data = data
  end

  def count
    c = 0
    each { |e| c += 1 }
    c
  end

  # Samnang's implementation from Issue 2.4
  def reduce(arg=nil) 
    return reduce {|s, e| s.send(arg, e)} if arg.is_a?(Symbol)

    result = arg
    each { |e| result = result ? yield(result, e) : e }

    result
  end

  private

  attr_accessor :data
end
```

Regardless of what iteration strategy we end up using, the following points are worth noting about the way we've modeled our system this time around:

1. There are three components in this system, all of which are useful and testable as standalone objects.

2. The relationships between all three components are purely indirect, and the coupling between the objects is limited to the names and behavior of the methods called on them rather than their complete surfaces.

3. There is strict encapsulation between the three components: each have their own namespace, and each can enforce their own privacy controls. It's possible of course to side-step these protections, but they are at least enabled by default. The issue of accidental naming collisions between methods or variables of objects is completely eliminated.

4. As a result of points 2 and 3, the surface of each object is kept narrowly in line with its own domain. In fact, the public interface of `StatisticalReport` has been reduced to its constructor and the `to_s` method, making it as thin as possible while still being useful. 

There are certainly downsides to using aggregation; it is not a golden hammer by any means. But when it comes to **incidental inheritance**, it seems to be the right tool for the job more often than not. I'd love to hear counterarguments to this claim, though, so please do share them if you have something in mind that you don't think would gracefully fit this style of modeling.

### Reflections

Although it may be a bit hard to see why disciplined inheritance matters in the trivial scenario we've been talking about throughout this article, it become increasingly clear as systems become more complex. Most scenarios that involve incidental inheritance are actually relatively horizontal problems in nature, but the use of class-based inheritance or module mixins forces a vertical method lookup path that can become very unwieldy, to say the least. When taken to the extremes, you end up with objects like `ActiveRecord::Base`, which has a path that is 43 levels deep, or `Prawn::Document`, which has a 26-level-deep path. In the case of Prawn, at least, this is just pure craziness that I am ashamed to have unleashed upon the world, even if it seemed like a good idea at the time.

In a language like Ruby that lacks both multiple inheritance and true class-specific privacy for variables and methods, using class-based hierarchies or module mixins for complex forms of incidental inheritance requires a tremendous amount of discipline. For that reason, the extra effort involved in refactoring towards an aggregation-based design pales in comparison to the maintenance headaches caused by following the traditional route. For example, in both `Prawn` and `ActiveRecord`, aggregation would make it possible to flatten that chain by an order of magnitude while simultaneously reducing the chance of namespace collisions, dependencies on lookup order, and accidental side effects due to state mutations. It seems like the cost of somewhat more verbose code would be well worth it in these scenarios.

In Issue 3.8, we will move on to discuss an essential form of inheritance that Sakkinen refers to as **completely consistent inheritance**. Exploring that topic will get us closer to the concept of mathematical subtypes, which are much more interesting at the theoretical level than incidental inheritance relationships are. But because Ruby's language features make even the simple relationships described in this issue somewhat challenging to manage in an elegant way, I am still looking forward to hearing your ideas and questions about the things I've covered so far.

A major concern I have about incidental inheritance is that I still don't have a
clear sense of where to draw the line between the two extremes I've outlined in
this article. I definitely want to look further into this area, so please leave
a comment if you don't mind sharing your thoughts.


================================================
FILE: articles/v3/008-criteria-for-disciplined-inheritance.md
================================================
In [Issue 3.7](http://practicingruby.com/articles/24), I started to explore the criteria laid out by Sakkinen's
[Disciplined Inheritance](http://scholar.google.com/scholar?cluster=5893037045851782349&hl=en&as_sdt=0,7&sciodt=0,7), 
a language-agnostic paper published more than two decades ago that is surprisingly 
relevant to the modern Ruby programmer. In this issue, we continue where Issue 3.7 
left off: on the question of how to maintain complete compatibility between
parent and child objects in inheritance-based domain models. Or, to put it another way,
this article explores how to reuse code safely within a system—
without it becoming a maintenance nightmare.

After taking a closer look at what Sakkinen exposed regarding this topic, I came to
realize that the ideas he presented were strikingly similar to the [Liskov Substitution
Principle](http://en.wikipedia.org/wiki/Liskov_Substitution_Principle). In fact,
the extremely dynamic nature of Ruby makes 
establishing [a behavioral notion of subtyping](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.39.1223) (Liskov and Wing 1993)
a prerequisite for developing disciplined inheritance practices. 
As a result, this article refers to Liskov's work more than Sakkinen's, 
even though both papers have extremely interesting things to say on this topic. 

### Defining a behavioral subtype 

Both Sakkinen and Liskov describe the essence of the inheritance relationship as 
the ability of a child object to serve as a drop-in replacement wherever
its parent object can be used. I've greatly simplified the concept by
stating it in such a general fashion, but this is the thread that ties
their independent works together. 

Liskov goes a step farther than Sakkinen by defining two kinds of 
behavioral subtypes: children that extend the behavior specified by their 
parents, and children that constrain the behavior specified by their parents. 
These concepts are not mutually exclusive, but because each brings up
its own set of challenges, it is convenient to separate them in this
fashion.

Both Sakkinen and Liskov emphasize that the abstract concept of subtyping 
is  much more about the observable behavior of objects than it is about
what exactly is going on under the hood. This concept is a natural way of thinking
for Rubyists, and it is worth keeping in mind as you read through the rest
of this article. In particular, when we talk about the type of an object,
we are focusing on what that object *does*, not what it *is*.

Although the concept of a behavioral subtype sounds like a direct analogue for
what we commonly refer to as "duck typing" in Ruby, the former is about
the full contract of an object rather than how it acts under certain
circumstances. I go into more detail about the differences between
these concepts toward the end of this article,
but before we can discuss them meaningfully, we need to take a look
at Liskov's two types of behavioral subtyping and how they can
be implemented.

### Behavioral subtypes as extensions

Whether you realize it or not, odds are good that you are already familiar with using behavioral subtypes as extensions. Whenever we inherit from `ActiveRecord::Base` or mix `Enumerable` into one of our objects, we're making use of this concept. In essence, the purpose of an extension is to bolt new behavior on top of an existing type to form a new subtype.

To ensure that our child objects maintain the substitution principle, we need to make sure that any new behavior and modifications introduced by extensions follow a few simple rules. In particular, all new functionality must be either completely transparent to the parent object or defined in terms of the parent object's functionality. Changing the signature of a method provided by the parent object would be considered an incompatible change, as would directly modifying instance variables referenced by the parent object. These strict rules may seem like overkill, but they are the only way to guarantee that your extended subtypes will be drop-in replacements for their supertypes.

In practice, obeying these rules is not as hard as it seems. For example, suppose we wanted to extend `Prawn::Document` so that it implements some helpers for article typesetting:

```ruby
Prawn::Article.generate("test.pdf") do
  h1 "Criteria for Disciplined Inheritance"
 
  para %{
    This is an example of building a Prawn-based article
    generator through the use of a behavioral subtype as
    an extension. It's about as wonderful and self-referential
    as you might expect.
  }

  h2 "Benefits of behavioral subtyping"

  para %{
    The benefits of behavioral subtyping cannot be directly
    known without experiencing them for yourself.
  }

  para %{
    But if you REALLY get stuck, try asking Barbara Liskov.
  }
end
```

The most simple way to implement this sort of domain language would be to create a subclass of `Prawn::Document`, as shown in the following example:

```ruby
module Prawn
  class Article < Document
    include Measurements

    def h1(contents)
      text(contents, :size => 24)
      move_down in2pt(0.3)
    end

    def h2(contents)
      move_down in2pt(0.1)
      text(contents, :size => 16)
      move_down in2pt(0.2)
    end

    def para(contents)
      text(contents.gsub(/\s+/, " "))
      move_down in2pt(0.1)
    end
  end
end
```

As far as Liskov is concerned, `Prawn::Article` is a perfectly legitimate extension because instances of it are drop-in substitutes for `Prawn::Document` objects. In fact, this sort of extension is trivial to prove to be a behavioral subtype because it is defined purely in terms of public methods that are provided by its parents (`Prawn::Document` and `Prawn::Measurements`). Because the functionality added is so straightforward, the use of subclassing here might just be the right tool for the job. 

The downside of using subclassing is that even minor alterations to program requirements can cause encapsulation-related issues to become a real concern. For example, if we decide that we want to add a pair of instance variables that control the fonts used for headers and paragraphs, it would be hard to guarantee that these variables wouldn't clash with the data contained within `Prawn::Document` objects. We can assume that calls to public methods provided by the parent object are safe, but we cannot say the same about referencing instance variables, so a delegation-based model starts to look more appealing.

Suppose we wanted to support the following API, but via delegation rather than subclassing:

```ruby
Prawn::Article.generate("test.pdf") do
  header_font    "Courier"
  paragraph_font "Helvetica"

  h1 "Criteria for Disciplined Inheritance"
 
  para %{
    This is an example of building a Prawn-based article
    generator through the use of a behavioral subtype as
    an extension. It's about as wonderful and self-referential
    as you might expect.
  }

  h2 "Benefits of behavioral subtyping"

  para %{
    The benefits of behavioral subtyping cannot be directly
    known without experiencing them for yourself.
  }

  para %{
    But if you REALLY get stuck, try asking Barbara Liskov.
  }
end
```

Using a `method_missing` hook and the `Prawn::Article.generate` class method, it
is fairly easy to implement this DSL:

```ruby
module Prawn
  class Article
    def self.generate(*args, &block)
      Prawn::Document.generate(*args) do |pdf|
        new(pdf).instance_eval(&block)
      end
    end

    def initialize(document)
      self.document = document      
      document.extend(Prawn::Measurements)

      # set defaults so that @paragraph_font and @header_font are never nil.
      paragraph_font "Times-Roman"
      header_font    "Times-Roman"
    end

    def h1(contents)
      font(header_font) do
        text(contents, :size => 24)
        move_down in2pt(0.3)
      end
    end

    def h2(contents)
      font(header_font) do
        move_down in2pt(0.1)
        text(contents, :size => 16)
        move_down in2pt(0.2)
      end
    end

    def para(contents)
      font(paragraph_font) do
        text(contents.gsub(/\s+/, " "))
        move_down in2pt(0.1)
      end
    end

    def paragraph_font(font=nil)
      return @paragraph_font = font if font

      @paragraph_font
    end

    def header_font(font=nil)
      return @header_font = font if font

      @header_font
    end

    def method_missing(id, *args, &block)
      document.send(id, *args, &block)
    end

    private

    attr_accessor :document
  end
end
```

Taking this approach involves writing more code and adds some complexity. However, that is a small price to pay for the peace of mind that comes with cleanly separating the data contained within the `Prawn::Article` and `Prawn::Document` objects. This design also makes it harder for `Prawn::Article` to have name clashes with `Prawn::Document`'s private methods and forces any private method calls to `Prawn::Document` to be done explicitly. Because transparent delegation exposes the full contract of the parent object, it is still necessary for the child object to maintain full compatibility with those methods in the same manner that a class-inheritance-based model would. Nonetheless, this pattern provides a safer way to implement subtypes because it avoids incidental clashes, which could otherwise occur easily.

Although the examples we've looked at so far—combined with your own experiences—should give you a good sense of how to extend code via behavioral subtypes, there are some common pitfalls I have glossed over in order to keep things simple. I'll get back to those before the end of the article, but for now let's turn our attention to the other kind of subtypes Liskov describes in her paper. She refers to them as _constrained subtypes_, but I call them _restriction subtypes_ as an easy-to-remember mirror image of the _extension subtype_ concept.

### Behavioral subtypes as restrictions

Just as subtypes can be used to extend the behavior of a supertype, they can also be used to restrict generic behaviors by providing more specific implementations of them. The example Liskov uses in her paper illustrates how a stack structure can be viewed as a restriction on the more general concept of a bag.

In its most simple form, a bag is essentially nothing more than a set that can contain duplicates. Items can be added and removed from a bag, and it is possible to determine whether the bag contains a given item. However, much like with a set, order is not guaranteed. The following code, which is somewhat of a contrived example, implements a `Bag` object similar to the one described in Liskov's paper:

```ruby
ContainerFullError  = Class.new(StandardError)
ContainerEmptyError = Class.new(StandardError)

class Bag
  def initialize(limit)
    self.items  = [] 
    self.limit = limit
  end

  def push(obj)
    raise ContainerFullError unless data.length < limit

    data.shuffle!
    data.push(obj)
  end

  def pop
    raise ContainerEmptyError if data.empty?

    data.shuffle!
    data.pop
  end

  def include?(obj)
    data.include?(obj)
  end

  private

  attr_accessor :items, :limit
end
```

The challenge in determining whether a `Stack` object can meaningfully be considered a subtype of this sort of structure is that we need to find a way to describe the functionality of a bag so that it is general enough to allow for interesting subtypes to exist but specific enough to allow the `Bag` object to be used on its own in a predictable way. Because Ruby lacks the design-by-contract features that Liskov depends on in her paper, we need to describe this specification verbally rather than relying on our tools to enforce them for us. Something like the following list of rules is roughly similar to what she describes more formally in her work:

1) A bag has `items` and a size `limit`.

2) A bag has a `push` operation, which adds a new object to the bag's `items`.

* If the current number of `items` is less than the `limit`, the new object is added to the bag's `items`.

* Otherwise, a `ContainerFullError` is raised.

3) A bag has a `pop` operation, which removes an object from the bag's `items` and returns it as a result.

* If the bag has no `items`, a `ContainerEmptyError` is raised.

* Otherwise, one object is removed from the bag's `items` and returned.

4) A bag has an `include?` operation, which indicates whether the provided object is one of bag's `items`.

* If the bag's `items` contains the provided object, `true` is returned.

* Otherwise, `false` is returned.

With these rules in mind, we can see that the following `Stack` object satisfies the definition of a bag while simultaneously introducing a predictable ordering to `items`:

```ruby
ContainerFullError  = Class.new(StandardError)
ContainerEmptyError = Class.new(StandardError)

class Stack
  def initialize(limit)
    self.items  = [] 
    self.limit = limit
  end

  def push(obj)
    raise ContainerFullError unless data.length < limit

    data.push(obj)
  end

  def pop
    raise ContainerEmptyError if data.empty?

    data.pop
  end

  def include?(obj)
    data.include?(obj)
  end

  private

  attr_accessor :items, :limit
end
```

With this example code in mind, we can specify the behavior of a stack in the following way:

1) A stack is a bag.

2) A stack's `pop` operation follows a last-in, first-out (LIFO) order.

Because the ordering requirements of a stack don't conflict with the defining characteristics of a bag, a stack can be substituted for a bag without any issues. The key thing to keep in mind here is that restriction subtypes can create additional constraints on top of what was specified by their supertypes but cannot loosen the constraints put upon them by their ancestors in any way. For example, based on the way we defined bag objects, we would not be able to return `nil` instead of raising `ContainerEmptyError` when `pop` is called on an empty stack, even if that seems like a fairly innocuous change.

Once again, maintaining this sort of discipline may seem on the surface to be more trouble than it is worth. However, these kinds of assumptions are baked into useful patterns such as the [template method pattern](http://en.wikipedia.org/wiki/Template_method_pattern) and are also key to designing type hierarchies for all sorts of data structures. A good example of these concepts in action can be found in the way Ruby organizes its numeric types. The class hierarchy is shown here, but be sure to check out Ruby's documentation if you want to get a sense of how exactly these classes hang together.

<img src="http://i.imgur.com/ObKrf.jpg" width=800/>

Whether you are designing extension subtypes or restriction subtypes, it is unfortunately easier to get things wrong than it is to get them right, due to all the subtle issues that need to be considered. For that reason, we'll now take a look at a few examples of flawed behavioral subtypes and how to go about fixing them.

### Examples of flawed behavioral subtypes

To test your understanding of behavior subtype compatibility while simultaneously exposing some common pitfalls, I provide the following three flawed examples for you to study. As you read through them, try to figure out what the subtype compatibility problem is and how you might go about solving it.

1) Suppose we want to add an equality operator to the bag structure. A sample operator is provided here for the `Bag` object, which conforms to the following newly specified
feature: "Two bags are considered equal if they have equivalent items and size limits". What problems will we encounter in implementing a bag-compatible equality operator for the `Stack` object? 

```ruby
class Bag
  # other code similar to before

  def ==(other)
    [data.sort, limit] == [other.sort, other.limit]
  end

  protected 
  
  # NOTE: Implementing == is one of the few legitimate uses of 
  # protected methods / attributes
  attr_accessor :data, :limit
end
```

2) Suppose we have two mutable objects, a `Rectangle` and a `Square`, and we wish to implement `Square` as a restriction of `Rectangle`. Given the following implementation of a `Rectangle` object, what problems will be encountered in defining a `Square` object?

```ruby
class Rectangle
  def area
    width * height
  end

  attr_accessor :width, :height
end
```

3) Suppose we have a `PersistentSet` object that delegates all method calls to the `Set` object provided by Ruby's standard library, as shown in the following code. Why is this not a compatible subtype, even though it does not explicitly modify the behavior of any of `Set`'s operations?

```ruby
require "set"
require "pstore"

class PersistentSet 
  def initialize(filename)
    self.store = PStore.new(filename)

    store.transaction { store[:data] ||= Set.new }
  end

  def method_missing(name, *args, &block)
    store.transaction do 
      store[:data].send(name, *args, &block)
    end
  end

  private

  attr_accessor :store
end
```

To avoid spoiling the fun of finding and fixing the defects with these examples yourself, I've hidden my explanation of the [problems](https://gist.github.com/15b50f918c88bccd6eac) and [solutions](https://gist.github.com/3f53d4094759c0508e19) on a pair of gists. Please spend some time on this exercise before reading the spoilers, as you'll learn a lot more that way!

A huge hint is that the first problem is based on an issue discussed in [Liskov's paper](http://www.cs.cmu.edu/~wing/publications/LiskovWing94.pdf) and the second and third problems are discussed in an [article about LSP](http://www.objectmentor.com/resources/articles/lsp.pdf) by Bob Martin. However, please note that their solutions are not exactly the most natural fit for Ruby, so there is still room for some creativity here.

### Behavioral subtyping versus duck typing

Between this article and the topics discussed in [Issue 3.7](http://practicingruby.com/articles/24), this two-part series offers a fairly comprehensive view of disciplined inheritance practices for the Ruby programmer. However, as I hinted toward the beginning of this article, there is the somewhat looser concept of duck typing that deserves a mention if we really want to see the whole picture.

What duck typing and behavioral subtypes have in common is that both concepts rely on what an object can do rather than what exactly it is. They differ in that behavioral subtypes seem to be more about the behavior of an entire object and duck typing is about how a given object behaves within a certain context. Duck typing can be a good deal more flexible than behavioral subtyping in that sense, because typically it involves an object implementing a meaningful response to a single message rather than an entire suite of behaviors. You can find a ton of examples of duck typing in use in Ruby, but perhaps the easiest to spot is the ubiquitous use of the `to_s` method.

By implementing a `to_s` method in our objects, we are able to indicate to Ruby that our object has a meaningful string representation, which can then be used in a wide range of contexts. Among other things, the `to_s` method is automatically called by irb when an `inspect` method is not also provided, called by the `Kernel#puts` method on whatever object you pass to it, and called automatically on the result of any expression executed via string interpolation. Implementing a meaningful `to_s` method is not exactly a form of behavioral subtyping but is still a very useful form of code sharing. [Issue 1.14](http://blog.rubybestpractices.com/posts/gregory/046-issue-14-duck-typing.html) and [Issue 1.15](http://blog.rubybestpractices.com/posts/gregory/047-issue-15-duck-typing-2.html) cover duck typing in great detail, but this single example is enough to point out the merits of this technique and how much simpler it is than the topics discussed in this article.

### Reflections

A true challenge for any practicing Rubyist is finding a balance between the free-wheeling culture of Ruby development and the more rigorous approaches of our predecessors. Disciplined inheritance techniques will make our lives easier, and knowing what a behavioral subtype is and how to design one will surely come in handy on any moderately complex project. However, we should keep our eyes trained on how these issues relate to maintainability, understandability, and changeability rather than obsessing about how they can lead us to mathematically pure designs.

I think there is room for another article on the practical applications of these ideas, in which I might talk about applying some design-by-contract concepts to Ruby programs or how to develop shared unit tests that make it easier to test for compatibility when implementing subtypes. But I don't plan to work on that article immediately, so for now we can sort out those issues via comments on this article. If you have any suggestions for how to tie these ideas back to real problems, or questions on how to apply them to the things you've been working on, please share your thoughts. 


================================================
FILE: articles/v3/009-using-games-to-practice-domain-modeling.md
================================================
As programmers, it is literally our job to make [domain models](http://en.wikipedia.org/wiki/Domain_model) understandable to computers. While this can be some of the most creative work we do, it also tends to be the most challenging. The inherent difficulty of designing and implementing conceptual models leads many to develop their problem solving skills through a painful process of trial and error rather than some form of deliberate practice. However, that is a path paved with sorrows, and we can do better.

Defining problem spaces and navigating within them does get easier as you become more experienced. But if you only work with complex domain models while you are knee deep in production code, you'll find that many useful modeling patterns will blend in with application-specific details and quickly fade into the background without being noticed. Instead, what is needed is a testbed for exploring these ideas that is complex enough to mirror some of the problems you're likely to encounter in your daily work, but inconsequential enough to ensure that your practical needs for working code won't get in the way of exploring new ideas. 

While there are a number of ways to create a good learning environment for studying domain modeling, my favorite approach is to try to clone bits of functionality from various games I play when I'm not coding. In this article, I'll walk you through an example of this technique by demonstrating how to model a simplified version of the [Minecraft crafting system](http://www.minecraftwiki.net/wiki/Crafting).

### Defining the problem space

> **NOTE:** Those who haven't played Minecraft before may want to spend a few minutes watching this video [tutorial about crafting](http://www.youtube.com/watch?v=AKktiCsCPWE) or skimming [the game's wiki page](http://www.minecraftwiki.net/wiki/Crafting) on the topic before continuing. However, because I only focus on a few very basic ideas about the system for this exercise, you don't need to be a Minecraft player in order to enjoy this article.

The crafting table is a key component in Minecraft because it provides the player with a way to turn natural resources into useful tools, weapons, and construction materials. Stripped down to its bare essence, the function of the crafting table is essentially to convert various input items laid out in a 3x3 grid into some quantity of a different type of item. For example, a single block of wood can be converted into four wooden planks, a pair of wooden planks can be combined to produce four sticks, and a stick combined with a piece of coal will produce four torches. Virtually all objects in the Minecraft world can be built in this fashion, as long as the player has the necessary materials and knows the rules about how to combine them together. 

Because positioning of input items within the crafting table's grid is significant, players need to make use of recipes to learn how various input items can be combined to produce new objects. To make recipes easier for the player to memorize, the game allows for a bit of flexibility in the way things are arranged, as long as the basic structure of the layout is preserved. In particular, the input items for recipes can be horizontally and vertically shifted as long as they remain within the 3x3 grid, and the system also knows how to match mirror images as well. However, after accounting for these variants, there is a direct mapping from the inputs to the outputs in the crafting system.

As of 2012-02-27, Minecraft supports 174 crafting recipes. This is a small enough number where even a naïve data model would likely be fast enough to not cause any usability problems, even if you consider the fact that most of those recipes can be shifted around in various ways. But in the interest of showing off some neat Ruby data modeling tricks, I've decided to try to implement this model in an efficient way. In doing so, I found out that inputs can be checked for corresponding outputs in constant time, and that there are some useful constraints that make it so that only a few variants need to be checked in most cases in order to find a match for the player's input items.

My [finished model](https://github.com/elm-city-craftworks/crafting_table) ended up consisting of three parts: A `Recipe` object responsible for codifying the layout of input items and generating variants based on that layout, a `Cookbook` object which maps recipes to their outputs, and an `Importer` object which generates a cookbook object from CSV formatted recipe data. In the following sections, I will take a look at each of these objects and point out any interesting details about them.

### Modeling recipes 

> **NOTE:** To keep my implementation code easy to follow, I have simplified the recipe model somewhat so that it does not consider mirror images of recipes to be equivalent. Implementing that sort of behavior could be a fun exercise for the reader, and would make this model a closer match to what Minecraft actually implements.

The challenge involved in modeling Minecraft recipes is that you need to treat horizontal and vertically shifted item sets as being equivalent to one other. Or in other words, as long as the shape of an item set is preserved, there is a bit of flexibility about where you can place items on the table. For example, all of the recipes below are considered to be equivalent to one another:

![](http://i.imgur.com/HSop9.png)

A naïve approach to the problem will lead you to checking up to 25 variants for each recipe, only to find out that most of them are invalid mutations of the original item set that place at least one item outside of the 3x3 grid. Some simple checks can be put in place to throw out invalid variants, but it is better to never generate them at all. 

> **UPDATE 2012-03-01**: Based on a suggestion by [Shane Emmons](http://community.mendicantuniversity.org/people/semmons99), I worked on a better approach to this problem after this article was published. The basic idea is that rather than generating recipe variants, you instead normalize the recipes into a single common layout on demand. Check out the [updated code here](https://github.com/elm-city-craftworks/crafting_table/blob/607a4d8fc958c2e746b899c43b5cbb01301b3c6b/lib/crafting_table/recipe.rb). The solution described below is still interesting though, so feel free to read on anyway!

The approach I ended up taking is to compute margins surrounding each item that indicate how they can be shifted. As each new item gets added to the recipe, its margins and the margins of the current item set are intersected to obtain a new set of boundaries. The following diagram demonstrates the process of adding three items (B, C, A) to the grid sequentially, with each newly added item reducing the number of equivalent recipes that can be generated:

![](http://i.imgur.com/SEen2.png)

This process is very efficient because it involves simple numerical computations at insert time, rather than processing the whole item set at once. With that in mind, my implementation of `Recipe#[]=` updates the margins for the item set right away whenever a new item is added:

```ruby
recipe[0,0] = "B"
p recipe.send(:margins) #=> {:top=>2, :left=>0, :right=>2, :bottom=>0}

recipe[1,0] = "C"
p recipe.send(:margins) #=> {:top=>2, :left=>0, :right=>1, :bottom=>0} 

recipe[0,1] = "A"
p recipe.send(:margins) #=> {:top=>1, :left=>0, :right=>1, :bottom=>0} 
```

The following code shows how `Recipe#[]=` is implemented. In particular, it demonstrates that item set margins are directly updated on insert, but variant layouts are not generated until later.

```ruby
module CraftingTable
  class Recipe
    TABLE_WIDTH = 3
    TABLE_HEIGHT = 3

    def initialize
      self.ingredients = {}
      self.margins     = { :top    => Float::INFINITY, 
                           :left   => Float::INFINITY, 
                           :right  => Float::INFINITY,
                           :bottom => Float::INFINITY }

      self.variants = Set.new
      self.variants_need_updating = false
    end

    # ... various unrelated details omitted ...

    def []=(x,y,ingredient_type)
      raise ArgumentError unless (0...TABLE_WIDTH).include?(x)
      raise ArgumentError unless (0...TABLE_HEIGHT).include?(y)

      # storing positions as vectors makes variant computations easier
      ingredients[Vector[x,y]] = ingredient_type

      update_margins(x,y)

      self.variants_need_updating = true
    end

    private

    attr_accessor :margins, :ingredients, :variants_need_updating

    def update_margins(x,y)
      margins[:left]   = [x,                margins[:left]  ].min
      margins[:right]  = [TABLE_WIDTH-x-1,  margins[:right] ].min
      margins[:bottom] = [y,                margins[:bottom]].min
      margins[:top]    = [TABLE_HEIGHT-y-1, margins[:top]   ].min
    end
  end
end
```

I deferred the process of generating variants simply because doing so at insert time would cause many unnecessary intermediate computations to be done for multi-item recipes. While such a small number of possible variations pretty much guarantees there won't be performance issues whether or not lazy evaluation is used, I wanted to use this situation as a chance to think through how I would model variant generation if efficiency was a real concern. In production code, premature optimization is the root of all evil, but when you're in deliberate practice mode it can be quite fun.

I ultimately decided to generate the variants on demand when two `Recipe` objects are compared to one another. As you can see from the following code, my implementation of `Recipe#==` causes both recipe objects involved in the test to update their variants if necessary:

```ruby
module CraftingTable
  class Recipe
    # ... various unrelated details omitted ...

    def ==(other)
      return false unless self.class == other.class

      variants == other.variants
    end

    protected

    def variants
      update_variants if variants_need_updating

      @variants
    end
  end
end
```

While the high level interface for `Recipe` comparison is easy to follow, the way I ended up generating the underlying variant data is a bit messy. The implementation details for `Recipe#update_variants` are shown below, but the rough idea here is that I compute a set of `valid_offsets` and then use them to do vector addition to translate items to different coordinates within the grid. After performing this transformation, I wrap the variant data in a `Set` object so that they can easily be compared in an order-independent fashion. Assuming all this happens successfully, the `variants_need_updating` flag gets set to `false` to indicate that the variant data is now up to date. 

```ruby
module CraftingTable
  class Recipe
    
    # ... various unrelated details omitted ...

    private

    attr_accessor :margins, :ingredients, :variants_need_updating
    attr_writer   :variants

    def update_variants
      raise InvalidRecipeError if ingredients.empty?

      variant_data = valid_offsets.map do |x,y|
        ingredients.each_with_object({}) do |(position, content), variant|
          new_position = position + Vector[x,y]

          variant[new_position] = content
        end
      end

      self.variants                  = Set[*variant_data]
      self.variants_need_updating    = false
    end

    def valid_offsets
      horizontal = (-margins[:left]..margins[:right]).to_a
      vertical   = (-margins[:bottom]..margins[:top]).to_a

      horizontal.product(vertical)
    end
  end
end
```

An interesting thing to note about this design is that variants are purely implementation details that are not exposed via the public API. While the large amount of code I've shelved in private methods seems to indicate that there might be an object to extract here, I like the idea that from the outside perspective, the equivalence relationship between recipes is established without having to do any sort of explicit check to see whether two different layouts share a common variant. To see the true benefits of this kind of information hiding, we can take a look at how it affects the design of the cookbook.

### Modeling a cookbook

One of the first things I noticed about this problem domain was that the mapping of inputs to outputs were a natural fit for a hash structure. While it took a while to sort out the details, I eventually was able to put together a `Cookbook` object that works in the manner shown below:

```ruby
cookbook     = CraftingTable::Cookbook.new
torch_recipe = CraftingTable::Recipe.new

torch_recipe[1,1] = "coal"
torch_recipe[1,0] = "stick"

cookbook[torch_recipe] = ["torch", 4]

# ---

user_recipe = CraftingTable::Recipe.new
user_recipe[2,2] = "coal"
user_recipe[2,1] = "stick"

p cookbook[user_recipe] #=> ["torch", 4]
```

The final implementation of this object turned out to be incredibly simple, although it required some minor extensions to the `Recipe` object in order to work correctly. Take a look at the following class definition to see just how little `CraftingTable::Cookbook` is doing under the hood:

```ruby
module CraftingTable

  # This is the complete definition of my Cookbook object, with no omissions!

  class Cookbook
    def initialize
      self.recipes = {}
    end

    def [](recipe)
      recipes[recipe]
    end

    def []=(recipe, output)
      if recipes[recipe]
        raise ArgumentError, "A variant of this recipe is already defined!"
      end

      recipes[recipe] = output
    end

    private

    attr_accessor :recipes
  end
end
```

On the surface, the class seems to have only two tangible features to it: it severely narrows the interface to the hash it wraps so that it becomes nothing more than a simple key/value store, and it forces single assignment semantics. However, when we look at how the object is actually used, we see that there is an implicit dependency on some deeper, more domain specific logic. Revisiting the usage example from before, you can see that the `Cookbook` object treats variants of the same recipes as if they were the same hash key. 

```ruby
# ... unimportant boilerplate omitted

torch_recipe[1,1] = "coal"
torch_recipe[1,0] = "stick"

cookbook[torch_recipe] = ["torch", 4]

# ---

user_recipe[2,2] = "coal"
user_recipe[2,1] = "stick"

p cookbook[user_recipe] #=> ["torch", 4]
```

If you haven't already dug into the source to locate where this bit of magic comes from, it has to do with the fact that Ruby provides hooks that allow you to use complex objects as hash keys. In particular, customizing the way that objects are used as hash keys involves overriding the `Object#hash` and `Object#eql?` methods. If you take a closer look at the `Recipe` object, you'll see it does define both of these methods:

```ruby
module CraftingTable
  # ... various unrelated details omitted ...

  class Recipe
    def ==(other)
      return false unless self.class == other.class

      variants == other.variants
    end

    # this is the standard idiom, as in most cases == should be the same as eql?
    alias_method :eql?, :==

    def hash
      variants.hash
    end
  end
end
```

While I don't want to get bogged down in the details of how these hooks work, the basic idea is that the `hash` method returns a numeric identifier which determines which bucket to store the value in. When a provided key hashes to the same number of an object already in the hash, the `eql?` method determines whether the keys are actually equivalent. Because `Recipe#hash` simply delegates to `Set#hash`, all item sets with the same elements have the same hash value, even if their order differs. Likewise, when `eql?` is called, it ends up delegating to `Set#==` which has the same semantics. If you trace your way through the usage example, you'll find that because `torch_recipe` and `user_recipe` generate the same variants, they also can stand in for one another as hash keys due to these overridden methods.

Without a doubt, this is a *clever* technique. But I'm still on the fence about whether it is a good approach or not. On the one hand, it makes use of a well defined hook that Ruby provides which seems to be well suited for the problem we're trying to model. On the other hand, it is not a very explicit way of building an API at all, and requires a non-trivial understanding of low level features of Ruby to fully understand this code. This is a common tension whenever designing Ruby objects: Matz assumes we're all a lot smarter and a lot more responsible than we might consider ourselves. 

I decided to go this route because in learning exercises I like to push my boundaries a bit and see where it takes me. But if this were production code, I would think about going with a slighly less elegant but more explicit approach. For example, I might have made the `Recipe#variants` method public and then did something similar to the following code in the `Cookbook#[]` method:

```ruby
module CraftingTable
  class Cookbook
    def [](recipe)
      variant = recipe.variants.find { |v| recipes[v] }

      recipes[variant]
    end

    # ...
  end
end
```

That said, I would love to hear your thoughts on this particular pattern. Sometimes when a technique is rare, it's hard to tell whether it seems unintuitive because it is actually hard to understand, or if it just feels that way because it isn't familiar territory.

### Modeling a recipe importer

With the interesting modeling out of the way, all that remains to talk about is how to get data imported into cookbooks in a way that doesn't require a lot of tedious assignment statements. For this purpose, I built a simple `Importer` object which takes a CSV file as input and builds up a `Cookbook` object from it.

The data format consists of multiline records separated by an empty line, as shown below:

```ruby
torch,4
-,-,-
-,coal,-
-,stick,-

crafting_table,1
-,-,-
wooden_plank,wooden_plank,-
wooden_plank,wooden_plank,-
```

While the data isn't pretty as a raw CSV file, this format makes it convenient to edit the data via a spreadsheet program, and doing so provides a pretty nice layout of the input grid. Once the file is written up, it ends up getting used in the manner shown below:

```ruby
cookbook      = CraftingTable::Importer.cookbook_from_csv(recipe_file)
user_recipe_1 = CraftingTable::Recipe.new

user_recipe_1[1,0] = "stick"
user_recipe_1[1,1] = "coal"

p cookbook[user_recipe_1] #=> ["torch", 4]

user_recipe_2 = CraftingTable::Recipe.new

user_recipe_2[0,0] = "wooden_plank"
user_recipe_2[0,1] = "wooden_plank"
user_recipe_2[1,0] = "wooden_plank"
user_recipe_2[1,1] = "wooden_plank"

p cookbook[user_recipe_1] #=> ["crafting_table", 1]
```

The implementation of the `Importer` object is mostly an uninspired procedural hack, with the only interesting detail of it being that it manually iterates over the CSV data using `CSV.new` in combination with a `File` object as yet another unnecessary-yet-educational efficiency optimization:

```ruby
module CraftingTable
  Importer = Object.new

  class << Importer
    def cookbook_from_csv(filename)
      cookbook = Cookbook.new

      File.open(filename) do |f|
        csv = CSV.new(f)

        until f.eof?
          product, quantity = csv.gets

          grid = [csv.gets, csv.gets, csv.gets]

          cookbook[recipe_from_grid(grid)] = [product, quantity.to_i]
          
          csv.gets
        end
      end

      cookbook
    end

    def recipe_from_grid(grid)
      recipe = Recipe.new

      last_row = Recipe::TABLE_WIDTH  - 1
      last_col = Recipe::TABLE_HEIGHT - 1

      ((0..last_row).to_a).product((0..last_col).to_a) do |x,y|
        row = x
        col = last_col - y
        
        next if grid[col][x] =~ /-/

        recipe[x,y] = grid[col][x]
      end

      recipe
    end
  end
end
```

This object is boring enough that I originally had planned to not implement it at all, in favor of having a `Cookbook.from_csv` method and perhaps a `Recipe.from_grid` method. However, I am increasingly growing suspicious of the presence of too many factory methods on objects, and worried that I'd be mixing the concerns of data extraction and data manipulation too much by doing that. In particular, I would have had to figure out a way to avoid directly referencing the "-" string as an indicator of an empty cell in `Recipe.from_grid`, and I didn't want to focus my energy on that because it felt like a waste of time.

This code represents a small compromise in that it isolates something that doesn't quite have a natural home so that it can be refactored later into something more elegant. Because this is a bolt-on feature, I felt comfortable making that trade so that I could focus more on the heart of the problem. However, if data import needs became more complex, this code would almost certainly need to be refactored into something more well organized.

### Reflections

Hopefully this article has given you a strong sense of how deep even seemingly simple game constructs can be if you really think them through. In my experience, this phenomenon is strikingly similar to the kinds of complexity that arise naturally in even moderately complicated business applications. The main difference is that in a practice environment, you don't need to worry about how much money you're costing someone else by spending as much time thinking about the problem as you do writing implementation code.

While doing deliberate practice of this variety, it is perfectly acceptable to actively seek out ways to induce analysis paralysis, premature optimization, and extreme over-engineering. In fact, the closer you get to feeling like your solution is completely overkill for the problem at hand, the more likely it is that you're going to learn something useful from the exercise. Experiencing the tensions that arise from this kind of perfectionism in a low-risk environment can make it a lot easier to take a middle of the road path when dealing with your day to day work.

The thing I like most about this sort of exercise is that it will often lead you to come across patterns or techniques that actually are directly applicable in practical scenarios. Whenever I stumble across a technique which is just as easy to implement as a more commonly used alternative but is more robust in some way, I tend to experiment with introducing those ideas into my production code to see how they work out for me. Sometimes these experiments work and other times they don't, but they always improve my understanding of why I do things the way I do.

While I remain a firm believer in the idea that deliberate practice should be done only in moderation and that there is no substitute for working on real problems that matter to you, the occasional sitting or two spent on shaking up what you think you know about this craft is well worth the effort. There are lots of different ways to do that, but this is the way that works for me. I'd love to hear what you think of it, and would also like to hear what other ways you've tried to hone your problem solving skills. 


================================================
FILE: articles/v3/010-robustness.md
================================================
Writing robust code is always challenging, even when dealing with extremely well controlled environments. But when you enter the danger zone where software failures can result in data loss or extended service interruptions, coding for robustness becomes essential even if it is inconvenient to do so. 

In this article, I will share some of the lessons I've learned about building
stable software through my work on the [Newman mail framework](https://github.com/mendicant-original/newman). While the techniques I've discovered so far are fairly ordinary, it was easy to underestimate their importance in the early stages of the project's development. My hope is that by exposing my stumbling points, it will save others from making the same mistakes.

### Lesson 1: Collect enough information about your workflow.

In many contexts, collecting the information you need to analyze a failure is the easy part of the debugging process. When your environment is well controlled, a good stack trace combined with a few well placed `puts` statements are often all you need to start reproducing an error in your development environment. Unfortunately, these well-worn strategies are not nearly as effective for debugging application frameworks.

To get a clearer sense of the problem, consider that Newman's server software knows almost nothing about the applications it runs, nor does it know much of anything about the structures of the emails it is processing. It also cannot assume that its interactions with external IMAP and SMTP servers will be perfectly stable. In this kind of environment, something can go wrong at every possible turn. This means that in order to find out where and why a failure occured, it is necessary to make the sequence of events easier to analyze by introducing some sort of logging system.

A good place to start when introducing event logging is with the outermost layer of the system. In Newman's case, this means tracking information about every incoming and outgoing email, as shown below: 

```
I, [2012-03-10T12:46:57.274091 #9841]  INFO -- REQUEST: 
{:from=>["gregory_brown@letterboxes.org"], :to=>["test+ping@test.com"],
:bcc=>nil, :subject=>"hi there!", :reply_to=>nil}

I, [2012-03-10T12:46:57.274896 #9841]  INFO -- RESPONSE: 
{:from=>["test@test.com"], :to=>["gregory_brown@letterboxes.org"], 
:bcc=>nil, :subject=>"pong", :reply_to=>nil}
```

Because Newman currently only understands how to filter messages based on their TO and SUBJECT fields, the standard log information is fairly helpful for basic application debugging needs. However, when dealing with complex problems, it is nice to be able to see [the raw contents of the messages](https://raw.github.com/gist/01fbab481a21f4d43bbf/0778e1a0ae887e6423bce985298e3f8d60eb37a0/gistfile1.txt). Rather than choosing one or the other, Newman handles both log formats by outputting them at different log levels:

```ruby
module Newman
  module EmailLogger
    def log_email(logger, prefix, email)
      logger.debug(prefix) { "\n#{email}" }
      logger.info(prefix) { email_summary(email) }
    end

    private

    def email_summary(email)
      { :from     => email.from,
        :to       => email.to,
        :bcc      => email.bcc,
        :subject  => email.subject,
        :reply_to => email.reply_to }
    end
  end
end
```

Having the ability to dynamically decide what level of detail your log output should contain is one of the main advantages of using a proper logging system instead of directly outputting messages to the console. While it would be possible to implement some sort of information filtering mechanism without using a formal logging system, doing so would involve reinventing many of the things that the `Logger` standard library already provides for you.

The cost of introducing a logging system is that once you depend on logs for your debugging information, some form of exception logging becomes absolutely necessary. Because failures can be very context-dependent, deciding how handle them can be tricky. 

### Lesson 2: Plan for various kinds of predictable failures.

Because Newman does not know anything about the applications it runs except that they all implement a `call` method, it is not possible to be selective about what kinds of errors to handle. Instead, a catch-all mechanism is implemented in the `process_request` method:

```ruby
module Newman
  class Server
    def process_request(request, response)
      apps.each do |app|
        app.call(:request  => request, 
                 :response => response, 
                 :settings => settings,
                 :logger   => logger)
      end

      return true
    rescue StandardError => e
      if settings.service.raise_exceptions
        raise
      else
        logger.info("APP ERROR")  { e.inspect }
        logger.debug("APP ERROR") { "#{e.inspect}\n" + e.backtrace.join("\n  ") }

        return false
      end
    end
  end
end
```

If you trace the execution path through this method, you'll find that there are three possible outcomes. If everything worked as expected, the method simply returns true. However, if an exception is raised by one of the applications, then the `raise_exceptions` configuration setting is used to determine whether to simply re-raise the exception or log the error and return false.

The reason `Newman::Server#process_request` is implemented in this somewhat awkward way is that it is necessary to let the application developer determine whether or not application errors should crash the server. Generally speaking, this would be a bad behavior in production, because it means that a single fault in an edge case of a single feature could halt a whole service that is otherwise working as expected. However, when it comes to writing tests, it might be nice for applications to raise their exceptions rather than quietly writing stack traces to a log file and moving on. This pair of competing concerns explains why the `raise_exceptions` configuration option exists, even if it leads to ugly implementation code.

While `Newman::Server#process_request` does a good job of handling application errors, there are a range of failures that can happen as a result of server operations as well. This means that `Newman::Server#tick` needs to implement its own exception handling and logging, as shown below:

```ruby
module Newman
  class Server
    def tick         
      mailer.messages.each do |request| 
        response = mailer.new_message(:to   => request.from, 
                                      :from => settings.service.default_sender)

        process_request(request, response) && response.deliver
      end
    rescue StandardError => e
      logger.fatal("SERVER ERROR") { "#{e.inspect}\n" + e.backtrace.join("\n  ") }
      raise
    end
  end
end
```

While it may be possible to recover from some of the errors that occur at the server level, there are many problems which simply cannot be recovered from automatically. For this reason, `Newman::Server#tick` always re-raises the exceptions it encounters after logging them as fatal errors. While implementing this method in such a conservative way helps shield against dangerous failures, it does not completely prevent them from occurring. Sadly, that is a lesson I ended up learning the hard way.

### Lesson 3: Reduce the impact of catastrophic failures. 

A few days before this article was published, I accidentally introduced an
infinite send/receive loop into the experimental Newman-based mailing list system [MailWhale](https://github.com/mendicant-original/mail_whale). I caught the problem right away, but not before my email provider banned me for 1 hour for exceeding my send quota. In the few minutes of chaos before I figured out what was going wrong, there was a window of time in which any incoming emails would simply be dropped, resulting in data loss.

It's painful to imagine what would have happened if this failure occured while someone wasn't actively babysitting the server. While the process was crashing with a `Net::SMTPFatalError` each time cron ran it, this happened after reading all incoming mail. As a result, the incoming mail would get dropped from the inbox without any response, failing silently. Once the quota was lifted, a single email would cause the server to start thrashing again, eventually leading to a permanent ban. In addition to these problems, anyone using the mailing list would be bombarded with at least a few duplicate emails before the quota kicked in each time. Although I was fortunate to not live out this scenario, the mere thought of it sends chills down my spine.

While the infinite loop I introduced could probably be avoided by doing some simple checks in Newman, the problem of the server failing repeatedly is a general defect that could cause all sorts of different problems down the line. To solve this problem, I've implemented a simplified version of the [circuit breaker](http://en.wikipedia.org/wiki/Circuit_breaker_design_pattern) pattern in MailWhale, as shown below:

```ruby
require "fileutils"

# unimportant details omitted...

begin
  if File.exists?("server.lock")
    abort("Server is locked because of an unclean shutdown. Check "+
          "the logs to see what went wrong, and delete server.lock "+
          "if the problem has been resolved") 
  end

  server.tick
rescue Exception # used to match *all* exceptions
  FileUtils.touch("server.lock")
  raise 
end
```

With this small change, any exception raised by the server will cause a lock file to be written out to disk, which will then be detected the next time the server runs. As long as the `server.lock` file is present, the server will immediately shut itself down rather than continuing on with its processing. This forces someone (or some other automated process) to intervene in order for the server to resume normal operations. As a result, repeated failure is a whole lot less likely to occur. 

If this circuit breaker were in place when I triggered the original infinite loop, I would have still exceeded my quota, but the only data loss would be the first request the server failed to respond to. All email that was sent in the interim would remain in the inbox until the problem was fixed, and there would be no chance that the server would continue to thrash without someone noticing that an unclean shutdown had occurred. This is clearly a better behavior, and perhaps this is how things should have been implemented in the first place.

Of course, we now have the problem that this code is a bit too aggressive. There are likely to be many kinds of failures which are transient in nature, and shutting down the server and hard-locking it like this feels overkill for those scenarios. However, I am gradually learning that it is better to whitelist things than blacklist them when you can't easily enumerate what can possibly go wrong. For that reason I've chosen to go with this extremely conservative solution, but I will need to put this technique through its paces a bit before I can decide whether it is really the right approach. 

### Reflections

I originally planned to cover many more lessons in this article, but the more I worked on it, the more I realized my own lack of experience in producing truly robust software. When it comes to email, it's like the entire problem space is one big fuzz test: there seems to be an infinite amount of ways for things to crash and burn.

In addition to the few issues I have already outlined, Newman is going to need to jump many more hurdles before it can be considered stable. In particular, I need to sort out the following problems:

* Sometimes connections via IMAP can hang indefinitely, so some sort of timeout logic needs to be introduced. To deal with this, I'm thinking of looking into the [retriable](https://github.com/kamui/retriable) gem.

* In one of my test runs of our simple ping/pong application, I ended up causing newman to reply to a Gmail mailer daemon, which caused a request/response loop to occur. Thankfully, Gmail's daemon gave up after a few tries, but if it didn't I would have ended up melting my inbox again. This means that Newman will need some way to deal with bounced emails. We've looked at some options for this, but most involve some pretty messy heuristics that make the cure look worse than the disease.

* Currently it is relatively straightforward to write automated tests to reproduce known issues, but very hard for me to come up with realistic test scenarios in a proactive way. This means that while we can shore up Newman's stability over time in this fashion, we'll always be trailing behind on the problems we haven't encountered yet. I need to look into whether there are some email-based acid tests I can run the server against.

* There is still a great deal of ugliness / fragility in the way Newman does its exception handling. The techniques I've shown in this article are meant to be considered a rough starting point, not a set of best practices. I plan to re-read Avdi Grimm's [Exceptional Ruby](http://exceptionalruby.com/) and see what ideas I can apply from it. When I first read that book I thought many of the techniques it recommended were overkill for day to day Ruby applications, but several of them may be just what Newman needs.

The bad news is that all of the above problems seem challenging enough to deal with, but they're likely to be just the first set of roadblocks on the highway to the danger zone. There are still a lot of unknown-unknowns that may get in my way. The good news is that because I can take my time while working on this project, the uncertainty of things is part of what makes this a fun problem to work on.

Have you ever had a similar experience of coding in a dangerous and poorly-defined environment before? If so, I'd love to hear your story, as well as any advice you might have for me.


================================================
FILE: articles/v3/README.md
================================================
These articles are from Practicing Ruby's third volume, which ran from
2012-01-03 to 2012-03-13. The manuscripts in this folder correspond to the
following articles on practicingruby.com:

* [Issue 3.1: The qualities of great software](https://practicingruby.com/articles/shared/hhgcloeuoega) (2012.01.03)
* [Issue 3.2: Patterns for building excellent examples](https://practicingruby.com/articles/shared/wfsyjrkiwidq) (2012.01.10)
* [Issue 3.3: Exploring the depths of a Turing tarpit](https://practicingruby.com/articles/shared/bwgflabwncjv) (2012.01.17)
* [Issue 3.4: Climbing the spiral staircase of refactoring](https://practicingruby.com/articles/shared/ndejcopauhne) (2012.01.25)
* [Issue 3.5: Framework design and implementation, Part 1](https://practicingruby.com/articles/shared/rtzdzdwwzfxm) (2012.02.02)
* [Issue 3.6: Framework design and implementation, Part 2](https://practicingruby.com/articles/shared/exckjeiytsaw) (2012.02.08)
* [Issue 3.7: Criteria for disciplined inheritance, Part 1](https://practicingruby.com/articles/shared/uvgdkprzmoqf) (2012.02.15)
* [Issue 3.8: Criteria for disciplined inheritance, Part 2](https://practicingruby.com/articles/shared/lxgettcjiggh) (2012.02.21)
* [Issue 3.9: Using games to practice domain modeling](https://practicingruby.com/articles/shared/ihlfxtwgquny) (2012.02.28)
* [Issue 3.10: Lessons learned from coding in the danger zone](https://practicingruby.com/articles/shared/lwvegkvhalqr) (2012.03.13)

If you enjoy what you read here, please subscribe to [Practicing Ruby](http://practicingruby.com). These articles would not exist without the support of our paid subscribers.


================================================
FILE: articles/v4/001-testing-costs-benefits.md
================================================
Over the last several years, Ruby programmers have gained a reputation of being
*test obsessed* -- a designation that many of our community members consider to
be a badge of honor. While I share their enthusiasm to some extent, I can't help but notice
how dangerous it is to treat any single methodology as if it were a panacea.

Our unchecked passion about [test-driven
development](http://en.wikipedia.org/wiki/Test-driven_development) (TDD) has paved the way for deeply
dogmatic thinking to become our cultural norm. As a result, many vocal members
of our community have oversold the benefits of test-driven development 
while downplaying or outright ignoring some of its costs. While I don't doubt
the good intentions of those who have advocated TDD in this
way, I feel strongly that this tendency to play fast and loose with very complex
ideas ends up generating more heat than light.

To truly evaluate the impact that TDD can have on our work, we need to go 
beyond the anecdotes of our community leaders and seek answers to 
two important questions:

> 1) What evidence-based arguments are there for using TDD? 

> 2) How can we evaluate the costs and benefits of TDD in our own work?

In this article, I will address both of these questions and share with you my
plans to investigate the true costs and benefits of TDD in a more rigorous and
introspective way than I have done in the past. My hope is that by considering a
broad spectrum of concerns with a fair amount of precision, I will be able to
share relevant experiences that may help you challenge and test your own 
assumptions about test-driven development.

### What evidence-based arguments are there for using TDD? 

Before publishing this article, I conducted a survey that collected thoughts
from Practicing Ruby readers about the costs and benefits of test-driven
development that they have personally experienced. Over 50 individuals responded, and
as you might expect there was a good deal of diversity in replies. However, the
following common assumptions about TDD stood out:

```diff
+ Increased confidence in developers working on test-driven codebases
+ Increased protection from defects, especially regressions
+ Better code quality (in particular, less coupling and higher cohesion)
+ Tests as a replacement/supplement to other forms of documentation
+ Improved maintainability and changeability of codebases
+ Ability to refactor without fear of breaking things
+ Ability of tests to act as a "living specification" of expected behavior
+ Earlier detection of misunderstandings/ambiguities in requirements
+ Smaller production codebases with more simple designs
+ Easier detection of flaws in the interactions between objects
+ Reduced need for manual testing
+ Faster feedback loop for discovering whether an implementation is correct
- Slower per-feature development work because tests take a lot of time to write
- Steep learning curve due to so many different testing tools / methodologies
- Increased cost of test maintenance as projects get larger
- Some time wasted on fixing "brittle tests"
- Effectiveness is highly dependent on experience/discipline of dev team
- Difficulty figuring out where to get started on new projects
- Reduced ability to quickly produce quick and dirty prototypes
- Difficulty in evaluating how much time TDD costs vs. how much it saves
- Reduced productivity due to slow test runs
- High setup costs
```

Before conducting this survey, I compiled [my own list of
assumptions](https://gist.github.com/2277788) about test-driven 
development, and I was initially relieved to see that there was a high degree of
overlap between my intuition and the experiences that Practicing Ruby 
readers had reported on. However, my hopes of finding some solid ground to stand
on were shattered when I realized that virtually all of these claims did not have
any conclusive empirical evidence to support them.

Searching the web for answers, I stumbled across a great [three-part
article](http://scrumology.com/the-benefits-of-tdd-are-neither-clear-nor-are-they-immediately-apparent/)
 called "The benefits of TDD are neither clear nor are they immediately
apparent", which presents a fairly convincing argument that we don't know as
much about the effect of TDD on our craft as we think we do. The whole article is
worth reading, but this paragraph in [part
3](http://scrumology.com/the-benefits-of-tdd-why-tdd-part-3/) really grabbed my
attention:

> Eighteen months ago, I would have said that TDD was a slam dunk. Now that I’ve taken the time to look at the papers more closely … and actually read more than just the introduction and conclusion … I would say that the only honest conclusion is that TDD results in more tests and by implication, fewer defects. Any other conclusions such as better design, better APIs, simpler design, lower complexity, increased productivity, more maintainable code etc., are simply not supported.

Throughout the article, the author emphasizes that he believes in the value of
TDD and seems to think that the inconsistency of rigor and quality in the
studies at least partially explain why their results do not mirror the 
expectations of practitioners. He even offers some standards for what he 
believes would make for more reliable studies on TDD:

> My off-the-top-of-my-head list of criteria for such a study, includes (a) a multi year study with a minimum of 3 years consecutive years (b) a study of several teams (c) team sizes must be 7 (+/-2) team members and have (d) at least 4 full time developers. Finally, (e) it needs to be a study of a product in production, as opposed to a study based on student exercises. Given such as study it would be difficult to argue their conclusions, whatever they be.

His points (c) and (d) about team size seem subject to debate, but it is fair
to say that studies should at least consider many different team sizes as
opposed to focusing on individual developers exclusively. All other points he
makes seem essential to ensuring that results remain tied to reality, but he
goes on to conclude that his requirements are so complicated and costly to 
implement that it could explain why all existing studies fall short of this gold
standard.

Intrigued by this article, I went on to look into whether there were other, more
authoritative sources of information about the overall findings of research on
test-driven development. As luck would have it, the O'Reilly book on
evidence-based software engineering ([Making
Software](http://www.amazon.com/Making-Software-Really-Works-Believe/dp/0596808321)) had a chapter on this
topic called "How effective is test-driven development?" which followed a
similar story arc.

In this chapter, five researchers present the result of their systematic review of 
quantitative studies on test driven development. After analyzing what published 
literature says about internal quality, external quality, productivity, 
and correctness testing, the researchers found some evidence that both 
correctness  testing and external quality are improved through TDD. However, 
upon limiting the scope to well-defined studies only, the positive effect 
on external quality disappears, and even the effect on correctness 
testing weakens significantly. In other words, their conclusion matched the
conclusions of the previously mentioned article: <u>*there is simply not a whole lot of
science supporting our feverish advocacy of TDD and its benefits.*</u>

While the lack of rigorous and conclusive evidence is disconcerting, it is not 
necessarily a sign that our perception of the costs and benefits of 
TDD is invalid. Instead, we should treat these findings as an invitation to
slow down and look at our own decision making process in a more careful and
introspective way. 

### How can we evaluate the costs and benefits of TDD in our own work?

Because there are very few evidence-supported generalizations that can be made
about test-driven development, we each have the responsibility to discover for
ourselves what effects the red-green-refactor cycle truly has on our work. But
based on my personal experience, many of us have a long way to go before we can
even begin to answer this question.

In the process of preparing this article, I ended up identifying three
guidelines that I feel are essential for any sort of introspective evaluation. I
have listed them below, along with some brief notes on how I have failed
miserably at meeting these preconditions when it comes to analyzing TDD.

---

**1) We must be aware of our assumptions, and be willing to test them.**

_How I failed to do this:_ As someone who learned TDD primarily because other smart people
told me it was the right way to do things, my personal opinions about testing
were developed reactively rather than proactively. As a result, I have ignored certain 
observations and amplified others to fit a particular mental model that is
mostly informed by gut reactions rather than reasoned choices.

**2) We must be aware of our limitations and try to overcome them.**

_How I failed to do this:_ My mixed feelings towards TDD are in part due to my
own lack of effort to fully understand the methodology. 
While I may have done enough formal practice to have some basic intuitive sense of what
the red-green-refactor cycle is like, I have never been able to sustain 
a pure TDD workflow over the entire lifetime of any reasonably complex
project that I have worked on. As a result, it is likely that I have been 
blaming testing tools and methodologies for my some of my own deficiencies.

**3) We must be continuously mindful of context and avoid over-generalization.**

_How I failed to do this:_ I have always been irked by the lack of sufficient context in literature about
test-driven development, but I have found myself guilty of committing a similar
crime on numerous occasions. Even when I have tried to use specific examples to support
my arguments, I have often failed to consider that my working environment is very
different than that of most programmers. As a result, I have made more than few
sweeping generalizations which could be invalid at best and misleading at worst.

---

If I had to guess why I approached TDD in such a haphazard way despite my
tendency to treat other areas of software development with a lot more 
careful attention, I would say it was a combination of immaturity and a 
deeply overcommitted work schedule. When I first learned Ruby in 2004, I 
studied just enough about software testing and the TDD workflow to get 
by, and then after that only brushed up on my software testing skills 
when it was absolutely essential to do so. There was simply too much to learn
about and not enough time, and so I never ended up giving TDD as much attention 
as it might have deserved.

Like most things that get learned in this fashion, my knowledge of software
testing in the test-driven style is full of gaping holes and 
dark corners. Until recently this is something I have always been able to work
around, but my role as a teacher has forced me to identify this as a real weak 
spot of mine that needs to be dealt with. 

### Looking at TDD from a fresh perspective

Relearning the fundamentals of test-driven development is the only way 
I am ever going to come up with a coherent explanation for [my 
assumptions](https://gist.github.com/2277788) about the costs and benefits of this kind of workflow, 
and is also the only way that I will be able to break free from various 
misconceptions that I have been carrying around for the better part of 
a decade.

For a period of 90 days from 2012-04-10 to 2012-07-09, I plan to follow 
disciplined TDD practices as much as possible. The exact process I want 
to adopt is reflected in the handy-dandy flow chart shown below:

<div align="center">
<img
src="http://upload.wikimedia.org/wikipedia/en/9/9c/Test-driven_development.PNG"
title="Image Credit: Excirial on Wikipedia CC-SA" >
</div>

This is a workflow that I am already quite familiar with and have practiced
before, but the difference this time around is that I'm going to avoid cutting
corners. In the past, I have usually started projects by spiking a rough
prototype before settling into a TDD workflow, and that may have dampened the
effect that writing tests up front could have had on my early design process in
those projects. I have also practicing refactoring in the large rather than the
small fairly often, favoring a Red-Green-Red-Green-...-Red-Green-Refactor
pattern which almost certainly lead to more brittle tests and implementations
than I might have been able to come up with if I were more disciplined.
Throughout this three month trial period, I plan to think long and hard before
making any deviations from standard practice, and will be sure to note whenever
I do so.

The benefit of revisiting this methodology as an experienced developer is that I
have a whole lot more confidence in my ability to be diligent in my efforts. In
particular, I plan to take careful notes during each and every coding session
about my TDD struggles and triumphs, which I will associate with particular
changesets on particular projects. Before writing this article I did a test run
of how this might work out, and you can 
[check out these notes](https://gist.github.com/2286918) to get a sense of what 
I am shooting for. I think the [github compare
view](https://github.com/sandal/puzzlenode-solutions/compare/9070...3b79) will 
really come in handy for this kind of note-taking, as it will allow me to track 
my progress with a high degree of precision. 

I don't plan to simply use these notes for subjectively analyzing my own
progress, but also expect to use them as a way to seek out advice and help from
my friends who seem to have strongly integrated test-driven development into their
working practices. Having particular code samples to share along with some additional 
context will go a long way towards helping me ask the right kinds of 
questions that will move me forward. Each time I reach a stumbling point or
discover a pattern that is influencing my work (for better or for worse), I will
request some feedback from someone who might be able to help. When I was
learning TDD the first time around I might have avoided asking "stupid
questions" as a way to hide my ignorance, but this time I am intentionally
trying to expose my weaknesses so that they can be dealt with.

After this 90 day period of disciplined study and practice of test-driven
development, I will collect my notes and attempt to summarize my findings.
If I have enough interesting results to share, I will publish them in Issue 4.12
of Practicing Ruby towards the end of July 2012. At that time, I will also
attempt to take a slightly more informed guess at the "cost and benefits"
question that lead me to write this article in the first place, and will comment
on how this disciplined period of practice has influenced my assumptions about
TDD.

### Predictions about what will be discovered

While certain things are best left to be a mystery, there are a few predictions
can make about the outcomes of this project. These are mostly "just
for fun", but also may help reveal some of my biases and expectations:

* I expect that I will reverse my position on several criticisms of test-driven
  development as I learn more about practicing it properly.

* I expect that I will understand more of the claims that I feel are either
  overstated or lacking in context, and will either be able a more balanced
  view of them or meaningfully express my reservations about them.

* I expect that I will stop exclusively doing pure test-driven development as
  soon as this trial period is over, but think it is very likely that I will
  use TDD more often and more skillfully in the future.

* I expect to be just as frustrated about the extra work involved in TDD
  by the end of this study as I am now.

* I expect that simply by measuring my progress and reflecting on it, that I
  will learn a lot of interesting things that aren't related to TDD at all,
  and that will help me write better Practicing Ruby articles!

I will do my best not to allow these predictions to become self-fulfilling
prophecies and just go with the flow, but I feel it is important to expose 
the lens that I will be viewing my experiences through.

### Limitations of this method of study

The method I am using to reflect on my studies is to some extent a legitimate
form of qualitative research that may be useful for more than just improving
my own skillset. I am essentially conducting a diary study, which is 
the [same technique that Donald Knuth used](http://books.google.com/books?id=DxuGi5h2-HEC&lpg=PA58&dq=Reading%20Qualitative%20Research%20Knuth&pg=PA58#v=onepage&q=Reading%20Qualitative%20Research%20Knuth&f=false)
in an attempt to categorize the different kinds of errors found in TeX. This 
technique is also used in marketing and usability
research, and can provide interesting insights into the experiences of
individuals with sufficient context to be analyzed in a fairly rigorous way.
However, I am not a scientist and this is not a scientific study, and so there
are a ton of limitations can threaten the validity of any claims made about 
the results of this project.

The first and most obvious limitation is that this is a self-study, and that I
am already chock full of my own assumptions and biases. My main goal is to learn
more about TDD and come up with better reasons for the decisions I make about
how I practice software testing, but it is impossible for me to wipe the slate
completely clean and serve as an objective source of information on this topic.

On top of this, I will be discussing things entirely in terms of my experiences
and won't have many objective measures to work with. My hope is that tagging
my notes with links back to particular changesets will make it possibly to apply 
some quantitative measures after this study is completed, but it is hard to say
whether that will be feasible or whether it would even mean anything if I
attempted to do that. Without hard numbers, my results will not be
directly comparable to anyone else's nor can it say anything about the average
developer's experience.

Lastly, when I look back on my notes from the 90 day period, it may be hard for
me to reestablish the context of the early days of the study. This means that my
final report may be strongly biased by whatever ends up happening towards the 
end of the trial period. While I expect that I will be able to make some high-level 
comparisons across the whole time period, I will not be able to precisely 
compare my experiences on day 5 with my experiences on day 85 even if I take
very detailed notes. This may cause some important insights to get lost in the
shuffle.

My hope is that by staying communicative during this study and by sharing most
or all of my raw data (code, notes, etc.), the effects of these limitations will
be reduced so that others can still gain something useful from my
efforts. At the very least, this transparency will allow individuals 
to decide for themselves to what extent my conclusions match up with my
evidence, and whether my results are relevant to other contexts.

### Some things you can do to help me

One thing I know about Practicing Ruby readers is that you folks really enjoy 
improving the craft of software development. That is the reason why I decided 
to announce my plans for this study via an article here rather than 
somewhere else. If you would like to support this project,
there are a few ways you can contribute.

**If you have a few seconds to spare:** You can spread the word about this
project by sharing this article with your friends and colleagues. This will help
me make sure to get adequate critical review from the community, which is a key
part of the improvement process. To create a share link, just click the handy
dandy robot down in the bottom right corner of the screen.

**If you have a few minutes to spare:** You can leave a comment sharing your
thoughts on this article as well as any questions or suggestions you might have
for me. I take all reader feedback to heart, and comments are one of the best
ways that you can support my work on these articles. 

**If you have a few minutes to spare each week:** You can subscribe to the
[mendicant-research](http://lists.rubymendicant.org/listinfo.cgi/mendicant-research-rubymendicant.org)
mailing list, where I plan to post my questions about TDD as
I study, as well as any interesting problems I run into or helpful learning
resources I end up using. I am also going to invite a few folks from the Ruby
community that I think have specific skills that will help me with this study,
but I feel that every practicing Rubyist could be a meaningful contributor to
these discussions.

**If you have a large amount of free time:** You can try to do this study along with me.
I can't promise that I'll have time during the 90 day period to regularly review
your progress, but I can definitely help you get set up and also would love to
compare notes at the end of the trial period. If this is something that
interests you, please post to the
[mendicant-research](http://lists.rubymendicant.org/listinfo.cgi/mendicant-research-rubymendicant.org) mailing list and I'll provide additional details.

Any little bit of effort you spend on helping me make this project better will
absolutely be appreciated! Our readers are what make this journal what it is, I
just work here. :wink:


================================================
FILE: articles/v4/002-moving-beyond-the-first-release.md
================================================
In Issue 2.10, I described the path I took [from requirements discovery to
release](http://practicingruby.com/articles/10) for a small game I created,
including various corners I had cut to get an initial release out the door.
While that article was very well received, it left an important question
unanswered: **How do you transform a prototype into a product?**

To answer that question, I have built a new little game for us to play with. But
this time around, rather than outlining the path from the idea phase to a basic
proof-of-concept, I will instead focus on what it takes to turn a rough prototype into a
game that might actually be fun to play. Because that is a pretty big topic, I
have split this article into two parts. This first part describes the initial
prototype and my plans for improving it, and the second part will reflect on the
challenges I encountered while trying to make those improvements. 

### Blind: A Minesweeper-inspired game with a massive twist

The game I created for this article is based on a very simple concept:
Navigating a mine field in search of an exit. While the premise is a bit
different from the classic Minesweeper game, the basic idea of carefully trying
to figure out where the mines are without getting yourself blown up is
preserved. The graphic below gives a rough approximation what the structure of
the Blind world is like:

<div align="center">
<img src="http://i.imgur.com/lvRaj.png">
</div>

The game starts with the player positioned at the center of the safe zone, a
buffer zone that exists to prevent mines from being detonated as soon as the
player spawns. Within the mine field itself, a large quantity of mines are
randomly spawned and there is no way for the player to know in advance exactly
where they will be. This area is also where the exit gets randomly positioned,
forcing the player to navigate the mine field to find their way out of the
level. 

There is one way to win and two ways to lose a game of Blind. As you might imagine,
getting too close to a mine is one of the ways to lose; the other is to wander off
the edge of the map into deep space. To do this, the player needs to ignore a
pretty obvious warning sign, but this loss condition helps make sure the player
stays within a well defined perimeter throughout the game. The only way to win
is to find the exit, which can be easy or hard depending on the positions and
quantities of mines in the minefield.

From this description, you might be imagining some sort of simple
two-dimensional arcade game, complete with a hard to control little ship
(perhaps similar to Asteroids), but as I mentioned before, this game has a
massive twist: It has no graphics at all! Instead, it relies entirely on
[positional audio](http://en.wikipedia.org/wiki/3D_audio_effect) to represent its world. To get a taste of what the gameplay is like, grab a pair of headphones and play the video shown below.

> **NOTE:** I have turned on the debugging output so that this video can be played
on mute by those who either dislike loud noises or are reading this article
in a place where they can't play audio. However, it is worth noting that the
game is designed to be played without any visual feedback at all,
and that while testing it I have typically played with my eyes closed! 

<div align="center">
<iframe width="640" height="480" src="//www.youtube.com/embed/cM0WHWgdmQk" frameborder="0" allowfullscreen></iframe>
</div>

The "audio only" twist is enough to inject some excitement into a very boring
game concept, but right now the game is not particularly enjoyable to play because it is poorly
balanced. I will talk about ideas I have for improving that later, but for now
we should move on to discuss the implementation code that got me to the point
where I could make this short video.

### An overview of Blind's initial implementation

I was originally going to use Blind as a demonstration how to implement
layered applications that have clean separation between their business logic and
UI code, so the code quality is better than what I usually start out with in proof-of-concept projects. 
While you might be able to read the [full implementation]() without too much effort, 
the following outline will give you an overview of how the codebase is structured without 
dragging you too far out into the weeds.

**[Blind::Game]() triggers events in response to the player's movements**

One decision I made early on in the process of building Blind was that I wanted
the main `Blind::Game` object to be based on a publish/subscribe model. Whenever
I have worked on games in the past, I have always struggled to figure out how to
encapsulate the rules without writing extremely brittle code, and this time
around I think I found a nice happy medium. 

If you look at the `Game#move`
method below, you can see that while it captures all the different kinds of
events that can occur within the game, it leaves it up to someone else to
determine what should happen in response to those events. This flexibility will
hopefully come in handy as the game rules evolve over time.

```ruby
module Blind
  class Game
    # ...
    
    def move(dx, dy)
      x,y = world.current_position.to_a

      r1 = world.current_region
      r2 = world.move_to(x + dx, y + dy)

      if r1 != r2
        broadcast_event(:leave_region, r1)
        broadcast_event(:enter_region, r2)
      end

      mines = world.mine_positions

      if mines.find { |e| world.distance(e) < MINE_DETONATION_RANGE }
        broadcast_event(:mine_detonated)
      end

      if world.distance(world.exit_position) < EXIT_ACTIVATION_RANGE
        broadcast_event(:exit_located)
      end
    end
  end
end
```

While I will show some more examples of this event system when we discuss Blind's
presentation layer, the following tests hint at how the publish/subscribe
system works. In a nutshell, the `Game#on_event` method sets up callbacks that get executed whenever `Game#broadcast_event` is called with a matching key.

```ruby
  it "must trigger an event when a mine is detonated" do
    detonated = false

    game.on_event(:mine_detonated) { detonated = true }

    mine = world.mine_positions.first

    game.move(mine.x - Blind::Game::MINE_DETONATION_RANGE, mine.y)

    refute detonated, "should not be detonated before " +
                      "player is in the mine's range"

    game.move(1, 0)

    assert detonated, "should detonate when player is in the mine's range"
  end
```

While I am fairly happy with the implementation of `Blind::Game`, both the
implementation code and the tests hint at the current dependency on a pair of
magic numbers stored in the `EXIT_ACTIVATION_RANGE` and
`MINE_DETONATION_RANGE` constants. This was done purely for the sake of
convenience, but I imagine they will need to be parameterized at some point in
future to make the game rules more customizable.

Another potential pitfall of this design is that its open-ended flexibility may
prove to be a double edged sword. I will be looking at this closely as I
increase the complexity of the game rules, but what I want to avoid is having a
big chunk of the game logic spill over into the client code unnecessarily. This
wasn't a major concern with the initial implementation, but it is definitely
something to look out for later.

Lastly, the event system as it currently is implemented assumes that there is a
single subscriber for each published event. This is an arbitrary limitation that
can easily be lifted, but right now is a limitation to be aware that may
need to be dealt with later.

**[Blind::World]() models the layout of the game world**

In practice, the `Blind::World` class has been working out reasonable well.
However, it is a bit of a structural method due to its broad spectrum of
responsibilities. In particular, `Blind::World` can be used to:

* Track where the mines, exit, and player are in the world
* Compute the distance between the player and any other object in the world
* Determine the region the player is currently in
* Move the player to an arbitrary location in the world
* Generate random locations within the minefield

While you might be able to group some of these concepts together, it is clear
that when taken as a set, these features fail to represent a single cohesive
object. In particular, the methods provided by this object don't have much in
common when it comes to the level of abstraction they operate at. For example,
the `World#distance` method and the `World#random_minefield_position` have so
little in common that it is hard to look at them side by side without getting
a bit of a headache.

```ruby
module Blind
  class World

    # simple delegation to the underlying Point object
    def distance(other)
      current_position.distance(other)
    end

    # ...

    # a non-trivial trigonometric function
    def random_minefield_position
      angle = rand(0..2*Math::PI)
      length = rand(MINE_FIELD_RANGE)

      x = length*Math.cos(angle)
      y = length*Math.sin(angle)

      Blind::Point.new(x.to_i,y.to_i)
    end
  end
end
```

Another major limitation of the `Blind::World` object is that similar to 
`Blind::Game`, it is chock full of magic numbers representing the sizes
and position of the various regions. While it might be reasonable to 
provide sensible defaults, locking these values down to exact numbers
will make world customization harder. I imagine this is something 
I will need to refactor sooner rather than later if I want to make
the game more interesting to play.

With all of these structural problems, you might expect the `Blind::World`
object to be quite cumbersome to work with, but so far I haven't really
had problems with it. I expect that the next round of improvements to Blind 
will change that, but I prefer to refactor based on needs rather than
gut feelings about what a good design might look like.

**[Blind::Point]() implements a simple generic point structure**

This object is simple enough where you can read its full implementation before we discuss it further:

```ruby
require "matrix"

module Blind
  class Point
    def initialize(x,y)
      @data = Vector[x,y]
    end

    def x
      data[0]
    end

    def y
      data[1]
    end

    def distance(other)
      (self.data - other.data).r
    end

    def ==(other)
      distance(other).zero?
    end

    def to_a
      [x,y]
    end

    def to_s
      "(#{x}, #{y})"
    end

    protected
    
    attr_reader :data
  end
end
```

The idea of having a generic class for representing points makes perfect sense for this game, because we need to do point math all over the place. I like that `Blind::Point` is an immutable object, because it reduces the possibility for weird corruptions to happen. However, the current implementation is not really putting the dependency on Ruby's `Vector` object to good use. I had originally built the class this way because I expected to be doing a lot of calculations, but then somehow got in the habit of manually doing the math on the individual components. 

I assume that as Blind gets more complex, I will do more and more point math, and that will inspire me to delegate a few more operations to the `Vector` object. I may also refactor several of Blind's API calls to take `Blind::Point` objects rather than explicit x and y arguments, which would encourage better use of this object. This kind of inconsistency is common in the early stages any project, because the boundary lines between the various objects in the system have not quite solidified yet. The good news is that these tensions tend to work themselves out gradually over time.


**[Blind::UI::JukeBox]() is responsible for constructing the various sounds used in the game**

The highly dynamic nature of the sounds in Blind made it worthwhile to introduce a small abstraction for loading and manipulating audio files. The example below demonstrates how both simple and complex sounds can be created by `Blind::UI::JukeBox`:

```ruby
module Blind
  module UI
    JukeBox = Object.new
    
    class << JukeBox 
      def explosion
        new_sound("grenade")
      end

      def phone(position)
        new_sound("telephone") do |s|
          s.pos     = [position.x, position.y, 0]
          s.looping = true

          s.play
        end
      end

      def mines(positions)
        step      = 0
        step_size = 1/positions.count.to_f

        positions.map do |pos|
          new_sound("beep") do |s|
            s.pos     = [pos.x, pos.y, 1]
            s.looping = true
            s.pitch   = step + rand(step_size)

            s.play

            step += step_size
          end
        end
      end

      # ... several other sounds
    end
  end
end
```

One thing I like about this code is that it gave me a chance to use one of my favorite techniques: hiding ugly code via block-based APIs. While it looks pretty, the `JukeBox#new_sound` method is in reality nothing more than a tiny bit of syntactic sugar built on top of the underlying `Ray::Sound` object:

```ruby
module Blind
  module UI
    JukeBox = Object.new
    
    class << JukeBox
      def new_sound(name) 
        filename = "#{File.dirname(__FILE__)}/../../../data/#{name}.wav"
        
        Ray::Sound.new(filename).tap do |s|
          yield s if block_given?
        end
      end
    end
  end
end
```

When I had first built this method, it was designed to store the `Ray::Sound` objects in a hash that was keyed by the sound name. However, I eventually ended up deciding that it'd be best to let the client determine if and how sound objects should be cached, and so this method (and the `JukeBox` object as a whole) became a bit more simple as a result of that. Of course, it does complicate things for the `Blind::UI::GamePresenter` object, which has essentially become a dumping ground for all the functionality that didn't fit well in the other components that Blind is made up of.

**[Blind::UI::GamePresenter]() bridges the gap between the game logic and the
UI**

Similar to `Blind::World`, the `Blind::UI::GamePresenter` object suffers from a
bit of an identity crisis. On the one hand, some of its methods do look like
simple presentation-related features:

```ruby
module Blind
  module UI
    class GamePresenter
      def lose_game(message)
        silence_sounds

        sound = sounds[:explosion]
        sound.play

        self.game_over_message = message
      end

      def win_game(message)
        silence_sounds

        sound = sounds[:celebration]
        sound.play

        self.game_over_message = message
      end
    end
  end
end
```

However, there are just as many examples of methods that seem to be tacked
on to this object that perhaps would have been better off on another object:

```ruby
module Blind
  module UI
    class GamePresenter

      # requires domain knowledge about Blind::UI::JukeBox
      def silence_sounds
        sounds.each do |name, sound|
          case name
          when :mines
            sound.each { |s| s.stop }
          else
            sound.stop
          end
        end
      end

      # touches every attribute provided by Blind::World
      def to_s
        "Player position #{world.current_position}\n"+
        "Region #{world.current_region}\n"+
        "Mines\n #{world.mine_positions.each_slice(5)
                         .map { |e| e.join(", ") }.join("\n")}\n"+
        "Exit\n #{world.exit_position}"
      end
    end
  end
end
```

And just for good measure, `Blind::UI::GamePresenter` implements a few
methods that seem to be closer to the logical layer rather than the
presentation layer:

```ruby
module Blind
  module UI
    class GamePresenter
      
      # if we want to change a game rule, we'd need to update
      # the GamePresenter object. That seems a bit strange.
      def setup_events
        game.on_event(:enter_region, :danger_zone) do
          self.in_danger_zone = true
        end

        game.on_event(:leave_region, :danger_zone) do
          self.in_danger_zone = false
        end

        game.on_event(:enter_region, :deep_space) do
          lose_game("you drifted off into deep space! you lose!")
        end

        game.on_event(:mine_detonated) do
          lose_game("you got blasted by a mine! you lose!")
        end

        game.on_event(:exit_located) do
          win_game("you found the exit! you win!")
        end
      end

      # this triggers a "SURPRISE MATH ATTACK!" as in Blind::World
      def detect_danger_zone
        if in_danger_zone
          min = Blind::World::DANGER_ZONE_RANGE.min
          max = Blind::World::DANGER_ZONE_RANGE.max

          sounds[:siren].volume = 
            ((world.distance(world.center_position) - min) / max.to_f) * 100
        else
          sounds[:siren].volume = 0
        end
      end
    end
  end
end
```

This extremely messy design is a consequence of trying to make good design
decisions elsewhere. Whenever I was in doubt about whether something was a
logical concern or a presentation concern, I error on pushing the code out of
the domain models and into the UI. This helped the objects which implemented
pure game logic stay simple and lean, but without finding a good place for all
this other code to go, it ended up getting slapped together in a haphazard 
"procedural programming with objects" style. I am hoping to clean up this
object eventually, but I didn't have any brilliant ideas for how to do so
before publishing this article.

**The [bin/blind]() executable implements trivial UI boilerplate code**

The main benefit of the `Blind::UI::GamePresenter` class is that it makes it
possible for the Ray-based UI code to be almost entirely logic-free. This 
leads to a very clear main program loop:

```ruby
always do
  if game.finished?
    message = game.game_over_message
  else
    game.detect_danger_zone
    
    game.move( 0.0, -0.2) if holding?(:w)
    game.move( 0.0, 0.2)  if holding?(:s)
    game.move(-0.2, 0.0)  if holding?(:a)
    game.move( 0.2, 0.0)  if holding?(:d)

    position = game.player_position

    Ray::Audio.pos = [position.x, position.y, 0]
  end
end
```

My hope is that I will be able to preserve the simplicity of this runner file
even if I end up having to radically restructure the `Blind::UI::GamePresenter`
object. Because the interface used by this script is very narrow, I don't expect
that will be a problem.
 
### What would make Blind a more enjoyable game?

Now that I have given you a ton of context about the various strengths and
weaknesses of Blind's codebase, we can talk about some ways to improve its
gameplay.

**Customizable world maps**

While randomization can help make games have a higher replay value, full
randomization can result in a pretty inconsistent gaming experience. I would
like to add either a mechanism for loading in pre-defined world maps, or
build a more customizable random world generator that allows to define
a bunch of different factors that affect gameplay.

> **CHALLENGES:**  In order to implement this feature, I am going to need to deal with reducing
the dependency on hard-coded numeric values throughout the system. I will
want to be able to control things like the size of the regions or the blast
radius of a mine, and currently those things are not configurable at runtime.

**Level-based organization**

Once I have the ability to support multiple different maps in the game, I would
like to be able to chain them together in a sequence to form levels. This will
make it possible to start with easy maps and progress to harder ones, which may
be a bit less of a disorienting experience for the player than the game
currently is.

> **CHALLENGES:** This feature does not require a ton of rework to the base
implementation, because we will be to easily modify the handler for the `:exit_located`
event and have it advance to the next level rather than end the game. However, I
want to make sure to re-think the `Blind::UI::GamePresenter` object before doing
this so that I can avoid accumulating even more logic in the wrong place.

**Multiple lives per game**

Having to start over from the beginning will become more and more annoying as
more levels are added, so I will introduce the concept of "lives" in some form
or another. I haven't decided yet exactly what the mechanics for this feature
will be like, but it should make death somewhat less tragic and irritating
for the player.

> **CHALLENGES:**  Similar to adding support for levels, this probably won't 
require many structural changes to the game's current implementation. But as
I mentioned before, we need a better home for our event handling code moving
forward.

**Moving enemies that chase the player**

Having to slowly navigate a mine field in two dimensional space is
nerve-wracking enough, but getting chased through one would be terrifying!
I want to add some sort of flying baddies that will chase the player around
the minefield. There are lots of different ways I could possibly implement
this, but those are more game design questions moreso than technical questions.

> **CHALLENGES:** Adding more game elements that require their position to be 
updated will force me to think harder about the limitations of the 
current `Blind::Point` class and the overall design of 
the `Blind::World` class. I may need to end up refactoring both of
those objects, depending on how complex the functionality for these new
game elements are. Additionally, new event types and event handlers will
be created, and new sounds will need to be added. In other words, making
this change will force me to touch pretty much every object in the system.

**A defense mechanism for the player**

I would like to add some way for the player to protect themselves from harm.
This will likely be some sort of limited-use shield that is effective against
mine detonations, flying baddies, or both.

> **CHALLENGES:** Adding this functionality will require some sort of new event
type, and probably some modification to existing events. It will involve less
rework than the flying baddies feature, but will require more changes to the
existing system than most of the other features I have proposed. The good news
is that apart from those changes, the feature itself should be easy to
implement.

I may not get around to implementing all of these features by the time Issue 4.3
is published, but I will definitely tackle at least a few of them between now
and then. Regardless of how things turn out, I think we will end up with some
interesting problems to discuss.

### Some things you can do to help me

**If you have a few seconds to spare** and know someone who has some experience
with game development, please share this article and ask them to get in touch
with me.

**If you have a few minutes to spare**, please share your thoughts about this
article. I would be happy to hear whatever is on your mind, whether it is about
the game itself, its codebase, or just a gut reaction to something I have said
in this article.

**If you can spare an extra hour or two of your time**, please pull down the
code and try to get the game up and running, and then take a closer look
at its implementation. Once you have done that, get in touch with me with
any questions and suggestions about points I didn't manage to cover in
this article.

And lastly, if you want to stay on top of changes as I work towards implementing
the code that will be discussed in Issue 4.3, please hang out in the
**#mendicant** channel on Freenode. I may occasionally ask the folks there to test things
for me or give me feedback on small snippets of code while I am working.


================================================
FILE: articles/v4/004-cheap-counterfeits-jekyll.md
================================================
While it may not seem like it at first, you can learn a great deal about Ruby by building something as simple as a static website generator. Although the task itself may seem a bit dull, it provides an opportunity to practice a wide range of Ruby idioms that can be applied elsewhere whenever you need to manipulate text-based data or muck around with the filesystem. Because text and files are everywhere, this kind of practice can have a profound impact on your ability to write elegant Ruby code.

Unfortunately, there are two downsides to building a static site generator as a learning exercise: it involves a fairly large time commitment, and in the end you will probably be better off using [Jekyll](http://github.com/mojombo/jekyll) rather than maintaining your own project. But don't despair, I wrote this article specifically with those two points in mind!

In order to make it easier for us to study text and file processing tricks, I broke off a small chunk of Jekyll's functionality and implemented a simplified demo app called [Jackal](http://github.com/elm-city-craftworks/jackal). Although it would be a horrible idea to attempt to use this barely functional counterfeit to maintain a blog or website, it works great as a tiny but context-rich showcase for some very handy Ruby idioms.

### A brief overview of Jackal's functionality

The best way to get a feel for what Jackal can do is to [grab it from Github](https://github.com/elm-city-craftworks/jackal) and follow the instructions in the README. However, because it only implements a single feature, you should be able to get a full sense of how it works from the following overview.

Similar to Jekyll, the main purpose of Jackal is to convert Markdown-formatted posts and their metadata into HTML files. For example, suppose we have a file called **_posts/2012-05-09-tiniest-kitten.markdown** with the following contents:

```
---
category: essays
title: The tiniest kitten
---

# The Tiniest Kitten

Is not nearly as **small** as you might think she is.
```

Jackal's job is to split the metadata from the content in this file and then generate a new file called **_site/essays/2012/05/09/tiniest_kitten.html** that ends up looking like this:


```html
<h1>The Tiniest Kitten</h1>

<p>Is not nearly as <strong>small</strong> as you might think she is.</p>
```

If Jackal were a real static site generator, it would support all sorts of fancy features like layouts and templates, but I found that I was able to generate enough "teaching moments" without those things, and so this is pretty much all there is to it. You may want to spend a few more minutes [reading its source](http://github.com/elm-city-craftworks/jackal) before moving on, but if you understand this example, you will have no trouble understanding the rest of this article.

Now that you have some sense of the surrounding context, I will take you on a guided tour of through various points of interest in Jackal's implementation, highlighting the parts that illustrate generally useful techniques.

### Idioms for text processing

While working on solving this problem, I noticed a total of four text processing idioms worth mentioning.

**1) Enabling multi-line mode in patterns**

The first step that Jackal (and Jekyll) need to take before further processing can be done on source files is to split the YAML-based metadata from the post's content. In Jekyll, the following code is used to split things up:

```ruby
if self.content =~ /^(---\s*\n.*?\n?)^(---\s*$\n?)/m
  self.content = $POSTMATCH
  self.data    = YAML.load($1)
end
```

This is a fairly vanilla use of regular expressions, and is pretty easy to read even if you aren't especially familiar with Jekyll itself. The main interesting thing about it that it uses the `/m` modifier to make it so that the pattern is evaluated in multiline-mode. In this particular example, this simply makes it so that the group which captures the YAML metadata can match multiple lines without explicitly specifying the intermediate `\n` characters. The following contrived example should help you understand what that means if you are still scratching your head:

```
>> "foo\nbar\nbaz\nquux"[/foo\n(.*)quux/, 1]
=> nil
>> "foo\nbar\nbaz\nquux"[/foo\n(.*)quux/m, 1]
=> "bar\nbaz\n"
```

While this isn't much of an exciting idiom for those who have a decent understanding of regular expressions, I know that for many patterns can be a mystery, and so I wanted to make sure to point this feature out. It is great to use whenever you need to match a semi-arbritrary blob of content that can span many lines.

**2) Using MatchData objects rather than global variables**

While it is not necessarily terrible to use variables like `$1` and `$POSTMATCH`, I tend to avoid them whenever it is not strictly necessary to use them. I find that using `String#match` feels a lot more object-oriented and is more aesthetically pleasing:

```ruby
if md = self.content.match(/^(---\s*\n.*?\n?)^(---\s*$\n?)/m)
  self.content = md.post_match
  self.data    = md[1]
end
```

If you combine this with the use of Ruby 1.9's named groups, your code ends up looking even better. The following example is what I ended up using in Jackal:

```ruby
if (md = contents.match(/^(?<metadata>---\s*\n.*?\n?)^(---\s*$\n?)/m))
  self.contents = md.post_match
  self.metadata = YAML.load(md[:metadata])
end
```

While this does lead to somewhat more verbose patterns, it helps quite a bit with readability and even makes it possible to directly use `MatchData` objects in a way similar to how we would work with a parameters hash.

**3) Enabling free-spacing mode in patterns**

I tend to be very strict about keeping my code formatted so that my lines are under 80 characters, and as a result of that I find that I am often having to think about how to break up long statements. I ended up using the `/x` modifier in one of Jackal's regular expressions for this purpose, as shown below:

```ruby
module Jackal
  class Post
    PATTERN = /\A(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})-
                (?<basename>.*).markdown\z/x

    # ...
  end
end
```

This mode makes it so that patterns ignore whitespace characters, making the previous pattern functionally equivalent to the following pattern:

```ruby
/\A(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})-(?<basename>.*).markdown\z/x
```

However, this mode does not exist primarily to serve the needs of those with obsessive code formatting habits, but instead exists to make it possible to break up and document long regular expressions, such as in the following example:

```ruby
# adapted from: http://refactormycode.com/codes/573-phone-number-regex

PHONE_NUMBER_PATTERN = /^
  (?:
    (?<prefix>\d)             # prefix digit
    [ \-\.]?                  # optional separator
  )?
  (?:
    \(?(?<areacode>\d{3})\)?  # area code
    [ \-\.]                   # separator
  )?
  (?<trunk>\d{3})             # trunk
  [ \-\.]                     # separator
  (?<line>\d{4})              # line
  (?:\ ?x?                    # optional space or 'x'
    (?<extension>\d+)         # extension
  )?
$/x
```

This idiom is not extremly common in Ruby, perhaps because it is easy to use interpolation within regular expressions to accomplish similar results. However, this does seem to be a handy way to document your patterns and arrange them in a way that can be easily visually scanned without having to chain things together through interpolation.

**4) Making good use of Array#join**

Whenever I am building up a string from a list of elements, I tend to use `Array#join` rather than string interpolation (i.e. the `#{}` operator) if I am working with more than two elements. As an example, take a look at my implementation of the `Jackal::Post#dirname` method:

```ruby
module Jackal
  class Post
    def dirname
      raise ArgumentError unless metadata["category"]

      [ metadata["category"], 
        filedata["year"], filedata["month"], filedata["day"] ].join("/")
    end
  end
end
```

The reason for this is mostly aesthetic, but it gives me the freedom to format my code any way I would like, and is a bit easier to make changes to.

> **NOTE:** Noah Hendrix pointed out in the [comments on this article](http://practicingruby.com/articles/57#comments) that for this particular example, using `File.join` would be better because it would take platform-specific path syntax into account.

### Idioms for working with files and folders

In addition to the text processing tricks that we've already gone over, I also noticed four idioms for doing various kinds of file and folder manipulation that came in handy.

**1) Manipulating filenames**

There are three methods that are commonly used for munging filenames: `File.dirname`, `File.basename`, and `File.extname`. In Jackal, I ended up using two out of three of them, but could easily imagine how to make use of all three.

I expect that most folks will already be familiar with `File.dirname`, but if that is not the case, the tests below should familiarize you with one of its use cases:

```ruby
describe Jackal::Page do
  let(:page) do
    posts_dir = "#{File.dirname(__FILE__)}/../fixtures/sample_app/_posts"
    Jackal::Page.new("#{posts_dir}/2012-05-07-first-post.markdown")
  end

  it "must extract the base filename" do
    page.filename.must_equal("2012-05-07-first-post.markdown")
  end
end
```

When used in conjunction with the special `__FILE__` variable, `File.dirname` is used generate a relative path. So for example, if the `__FILE__` variable in the previous tests evaluates to `"test/units/page_test.rb"`, you end up with the following return value from `File.dirname`:

```ruby
>> File.dirname("test/units/page_test.rb")
=> "test/units"
```

Then the whole path becomes `"tests/units/../fixtures/sample_app/_posts"`, which is functionally equivalent to `"test/fixtures/sample_app/_posts"`. The main benefit is that should you run the tests from a different folder, `__FILE__` would be updated accordingly to still generate a correct relative path. This is yet another one of those idioms that is hardly exciting to those who are already familiar with it, but is an important enough tool that I wanted to make sure to mention it.

If you feel like you understand `File.dirname`, then `File.basename` should be just as easy to grasp. It is essentially the opposite operation, getting just the filename and stripping away the directories in the path. If you take a closer look at the tests above, you will see that `File.basename` is exactly what we need in order to implement the behavior hinted at by `Jackal::Page#filename`. The irb-based example below should give you a sense of how that could work:

```
>> File.basename("long/path/to/_posts/2012-05-09-tiniest-kitten.markdown")
=> "2012-05-09-tiniest-kitten.markdown"
```

For the sake of simplicity, I decided to support Markdown only in Jackal posts, but if we wanted to make it more Jekyll-like, we would need to support looking up which formatter to use based on the post's file extension. This is where `File.extname` comes in handy:

```
>> File.extname("2012-05-09-tiniest-kitten.markdown")
=> ".markdown"
>> File.extname("2012-05-09-tiniest-kitten.textile")
=> ".textile"
```

Typically when you are interested in the extension of a file, you are also interested in the name of the file without the extension. While I have seen several hacks that can be used for this purpose, the approach I like best is to use the lesser-known two argument form of `File.basename`, as shown below:

```
>> File.basename("2012-05-09-tiniest-kitten.textile", ".*")
=> "2012-05-09-tiniest-kitten"
>> File.basename("2012-05-09-tiniest-kitten.markdown", ".*")
=> "2012-05-09-tiniest-kitten"
```

While these three methods may not look especially beautiful in your code, they provide a fairly comprehensive way of decomposing paths and filenames into their parts. With that in mind, it is somewhat surprising to me how many different ways I have seen people attempt to solve these problems, typically resorting to some regexp-based hacks.

**2) Using Pathname objects**

Whenever Ruby has a procedural or functional API, it usually also has a more object-oriented way of doing things as well. Manipulating paths and filenames is no exception, and the example below shows that it is entirely possible to use `Pathname` objects to solve the same problems discussed in the previous section:

```
>> require "pathname"
=> true
>> Pathname.new("long/path/to/_posts/2012-05-09-tiniest-kitten.markdown").dirname
=> #<Pathname:long/path/to/_posts>
>> Pathname.new("long/path/to/_posts/2012-05-09-tiniest-kitten.markdown").basename
=> #<Pathname:2012-05-09-tiniest-kitten.markdown>
>> Pathname.new("long/path/to/_posts/2012-05-09-tiniest-kitten.markdown").extname
=> ".markdown"
```

However, because doing so doesn't really simplify the code, it is hard to see the advantages of using `Pathname` objects in this particular example. A much better example can be found in `Jackal::Post#save`:


```ruby
module Jackal
  class Post
    def save(base_dir)
      target_dir = Pathname.new(base_dir) + dirname
      
      target_dir.mkpath

      File.write(target_dir + filename, contents)
    end
  end
end
```

The main reason why I used a `Pathname` object here is because I needed to make use of the `mkpath` method. This method is roughly equivalent to the UNIX `mkdir -p` command, which handles the creation of intermediate directories automatically. This feature really comes in handy for safely generating a deeply nested folder structure similar to the ones that Jekyll produces. I could have alternatively used the `FileUtils` standard library for this purpose, but personally find `Pathname` to look and feel a lot more like a modern Ruby library.

Although its use here is almost coincidental, the `Pathname#+` method is another powerful feature worth mentioning. This method builds up a `Pathname` object through concatenation. Because this method accepts both `Pathname` objects and `String` objects as arguments but always returns a `Pathname` object, it makes easy to incrementally build up a complex path. However, because `Pathname` objects do more than simply merge strings together, you need to be aware of certain edge cases. For example, the following irb session demonstrates that `Pathname` has a few special cases for dealing with absolute and relative paths:

```
>> Pathname.new("foo") + "bar"
=> #<Pathname:foo/bar>
>> Pathname.new("foo") + "/bar"
=> #<Pathname:/bar>
>> Pathname.new("foo") + "./bar"
=> #<Pathname:foo/bar>
>> Pathname.new("foo") + ".////bar"
=> #<Pathname:foo/bar>
```

Unless you keep these issues in mind, you may end up introducing subtle errors into your code. However, this behavior makes sense as long as you can remember that `Pathname` is semantically aware of what a path actually is, and is not meant to be a drop in replacement for ordinary string concatenation.

**3) Using File.write**

When I first started using Ruby, I was really impressed by how simple and expressive the `File.read` method was. Because of that, it was kind of a shock to find out that simply writing some text to a file was not as simple. The following code felt like the opposite of elegance to me, but we all typed it for years:

```ruby
File.open(filename, "w") { |f| f << contents }
```

In modern versions of Ruby 1.9, the above code can be replaced with something far nicer, as shown below:

```ruby
File.write(filename, contents)
```

If you look back at the implementation of `Jackal::Post#save`, you will see that I use this technique there. While it is the simple and obvious thing to do, a ton of built up muscle memory typically causes me to forget that `File.write` exists, even when I am not concerned at all about backwards compatibility concerns.

Another pair of methods worth knowing about that help make some other easy tasks more elegant in a similar way are `File.binread` and `File.binwrite`. These aren't really related to our interests with Jackal, but are worth checking out if you ever work with binary files.

**4) Using Dir.mktmpdir for testing**

It can be challenging to write tests for code which deals with files and complicated folder structures, but it doesn't have to be. The tempfile standard library provides a lot of useful tools for dealing with this problem, and `Dir.mktmpdir` is one of its most useful methods.  

I like to use this method in combination with `Dir.chdir` to build up a temporary directory structure, do some work in it, and then automatically discard all the files I generated as soon as my test is completed. The tests below are a nice example of how that works:

```ruby
it "must be able to save contents to file" do
  Dir.mktmpdir do |base_dir|
    post.save(base_dir)

    Dir.chdir("#{base_dir}/#{post.dirname}") do
      File.read(post.filename).must_equal(post.contents)
    end
  end
end
```
This approach provides an alternative to using mock objects. Even though this code creates real files and folders, the transactional nature of `Dir.mktmpdir` ensures that tests won't have any unexpected side effects from run to run. When manipulating files and folders is part of the core job of an object (as opposed to an implementation detail), I prefer testing in this way rather than using mock objects for the sake of realism.

The `Dir.mktmpdir` method can also come in handy whenever some complicated work needs to be done in a sandbox on the file system. For example, I [use it in Bookie](https://github.com/sandal/bookie/blob/45e0c4d0a575026deff79732b3c4c737f1c6f15c/lib/bookie/emitters/epub.rb#L19-46) to store the intermediate results of a complicated text munging process, and it seems to work great for that purpose.

### Reflections

Taken individually, these text processing and file management idioms only make a subtle improvement to the quality of your code. However, if you get in the habit of using most or all of them whenever you have an opportunity to do so, you will end up with much more maintainable code that is very easy to read.

Because many languages make text processing and file management hard, and because Ruby also has low level APIs that work in much the same way as those languages, it is often the case that folks end up solving these problems the hard way without ever realizing that there are nicer alternatives available. Hopefully this article has exposed you to a few tricks you haven't already seen before, but if it hasn't, maybe you can share some thoughts on how to make this code even better!


================================================
FILE: articles/v4/005-scoring-predictions-kata.md
================================================
*This article is written by James Edward Gray II.  James is an old friend of
Greg's, so he was thrilled to contribute. From late 2011 to mid 2012, James 
wrote his own series of programming articles called [Rubies in the Rough][rubies].
Yes, James just stole Greg's good idea.*

[rubies]: http://subinterest.com/rubies-in-the-rough

In this article, we will look at a fun problem that was in a couple of the [Peepcode][peepcode] _Play by Play_ videos. I've played around with this kata a bit and given it to a programming student of mine, so I know it pretty well by now. Its solution touches on a couple of neat programming topics, so dig in and see what you can learn.

[peepcode]: https://peepcode.com

### The challenge

I'm going to simplify the Peepcode task a bit so that we can get right to the heart of the problem. Here's the challenge we're going to work on:

> Write a method that accepts two arguments: an `Array` of five guesses for
> finalists in a race and an `Array` of the five actual finalists.  Each
> position in the lists matches a finishing position in the race, so first place
> corresponds to index `0`.  Return an `Integer` score of the predictions:  `0`
> or more points.  Correctly guessing first place is worth `15` points, second
> is worth `10`, and so on down with `5`, `3`, and `1` point for fifth
> place.  It's also worth `1` point to correctly guess a racer that finishes in
> the top five but to have that racer in the wrong position.

I'm going to jump right into solving this problem, but I encourage everyone to stop and play with the problem a little before reading on.  You'll get more out of what I say if you are familiar with the problem.

OK, ready?

### Complete specing

I test-drove my solution to this code, but probably not as everyone else does it.  Let me show you a trick I like to use for these fixed algorithms. First, let's set up some directories for the code and create the needed files:

```
$ mkdir -p scoring_predictions/{lib,spec}
$ cd scoring_predictions/
$ touch lib/race.rb
$ touch spec/scoring_spec.rb
```

At this point I opened `spec/scoring_spec.rb` in my editor and set to work.  We're supposed to begin with the happy path, so I wrote an example for a set of perfect guesses:

```ruby
require "race"

describe "Race::score" do
  let(:winners) { %w[First Second Third Fourth Fifth] }

  it "add points for each position" do
    Race.score(winners, winners).should eq(15 + 10 + 5 + 3 + 1)
  end
end
```

At this point, most developers would start adding the library code to make this example pass.  However, I don't find that approach very helpful for code like this.

If I do "the right thing," I should just return a hardcoded score.  Then I'll need to write a second example to force me to generalize (or consider the hardcoded score a violation of DRY that I need to refactor).  Either way, the tasks are just busywork that isn't helping me write the code.  Even having the extra example won't raise my confidence that it scores the scenarios correctly.

What would help me is to have an example for each rule in the challenge.  I'm going to need to do some programming to solve this—and nothing is getting me out of that.  All I can do is to make it easier to do that programming.  If the examples codify the rules for me, running them will tell me whether I am getting closer to a right answer just by watching the pass/fail ratio.

With these thoughts in mind, I finished writing examples for the rules of the challenge:

```ruby
require "race"

describe "Race::score" do
  let(:winners) { %w[First Second Third Fourth Fifth] }

  def correct_guesses(*indexes)
    winners.map.with_index { |w, i| indexes.include?(i) ? w : "Wrong" }
  end

  it "add points for each position" do
    Race.score(winners, winners).should eq(15 + 10 + 5 + 3 + 1)
  end

  it "gives 0 points for no correct guesses" do
    all_wrong = correct_guesses  # none correct
    Race.score(all_wrong, winners).should eq(0)
  end

  it "gives 15 points for first place" do
    Race.score(correct_guesses(0), winners).should eq(15)
  end

  it "gives 10 points for second place" do
    Race.score(correct_guesses(1), winners).should eq(10)
  end

  it "gives 5 points for third place" do
    Race.score(correct_guesses(2), winners).should eq(5)
  end

  it "gives 3 points for fourth place" do
    Race.score(correct_guesses(3), winners).should eq(3)
  end

  it "gives 1 point for fifth place" do
    Race.score(correct_guesses(4), winners).should eq(1)
  end

  it "gives one point for a correct guess in the wrong place" do
    guesses = correct_guesses(0)
    guesses.unshift(guesses.pop)  # shift positions by one
    Race.score(guesses, winners).should eq(1)
  end

  it "score positional and misplaced guesses at the same time" do
    guesses                = correct_guesses(0, 3)
    guesses[3], guesses[4] = guesses[4], guesses[3]
    Race.score(guesses, winners).should eq(15 + 1)
  end
end
```

This probably looks like a lot of code, but it's quite trivial.  You already saw the first example.  The next six just specify the score for each position (and one for no positions) with the help of a trivial method I wrote to generate right and wrong guesses.  The next-to-last example is the rule about right guesses in the wrong position.  Finally, I just wanted at least one example testing both scenarios at once.

This gives me plenty of red to work with:


```
$ rspec
FFFFFFFF

Failures:

…

Finished in 0.00417 seconds
9 examples, 9 failures

Failed examples:

rspec ./spec/scoring_spec.rb:8 # Race::score add points for each position
rspec ./spec/scoring_spec.rb:12 # Race::score gives 0 points for …
…
```

From there, I played around with an algorithm until I saw these examples go green.  I could show my process, but the truth is that we all attack this stuff in different ways.

Instead, let's look at a correct but not optimal solution.

### What the iterators can do for you

The first pass my student made at this problem landed on some code like this:

```ruby
module Race
  module_function

  def score(guesses, winners)
    points = 0
    guesses.each_with_index do |guess, i|
      if guess == winners[i]
        points += case i
                  when 0 then 15
                  when 1 then 10
                  when 2 then 5
                  when 3 then 3
                  when 4 then 1
                  end
      else
        winners.each do |winner|
          points += 1 if winner == guess
        end
      end
    end
    points
  end
end
```

The guy has only been studying Ruby a short while, so I thought this was a great first stab at the problem.  I did urge him to refine it, though.

First, I mentioned that you can often tell that you have the wrong iterator if it does extra iterations.  The `else` code in the previous example is a good example of this.  It may find the `guess` in the first position of `winners`, but it would keep looking.  Although it's possible to add a `break` statement to fix this problem, there are iterators that "short-circuit" when they find an answer.  For example, `find()`, which is close to what we want, or `any?()` which is even closer.  What we really want though, is this:

```ruby
module Race
  module_function

  def score(guesses, winners)
    points = 0
    guesses.each_with_index do |guess, i|
      if guess == winners[i]
        points += case i
                  when 0 then 15
                  when 1 then 10
                  when 2 then 5
                  when 3 then 3
                  when 4 then 1
                  end
      elsif winners.include? guess
        points += 1
      end
    end
    points
  end
end
```

Another sign that you're on the wrong track in Ruby is the need to track an index.  Sometimes you really do need one, but that need is quite rare.  Assume that you don't and give in only when you can't find a way around it.

In this case, the path is almost clear.  The first thing you see the index used for is to walk two lists in parallel.  Ruby has an iterator for that.  It's `zip()`.

Unfortunately, we can't switch straight to `zip()`.  If we did, we wouldn't have the score.  It also needs the index in this setup.  That's the problem we need to solve first.

The trick is that `case` statement.  It's really hiding the true nature of those scores.  If you squint hard enough, you'll see that it's really just another `Array`.  It would have been easier to see this if there were more of them (say, 100) because we would be less willing to type that out.

That gives us the first step.  We need to move to something more like this:

```ruby
module Race
  module_function

  def score(guesses, winners)
    points = 0
    guesses.each_with_index do |guess, i|
      if guess == winners[i]
        points += [15, 10, 5, 3, 1][i]
      elsif winners.include? guess
        points += 1
      end
    end
    points
  end
end
```

This code solves one of our problems.  We're now working with `Array` objects all the way down.  That's nice, but I don't really like that change I just made.  It makes it painfully obvious that it's a list of magic numbers.  That makes me want to give them a name:

```ruby
module Race
  SCORES    = [15, 10, 5, 3, 1]
  MISPLACED = 1

  module_function

  def score(guesses, winners, scores = SCORES, misplaced = MISPLACED)
    points = 0
    guesses.each_with_index do |guess, i|
      if guess == winners[i]
        points += scores[i]
      elsif winners.include? guess
        points += misplaced
      end
    end
    points
  end
end
```

That's much better, in my opinion.  The scores now have names.  They are in constants, so you can reflect on them externally.  This approach allows us to update the specs to use these values.  (I'll leave that work as an exercise for the interested reader.)  Finally, because we are passing the constants as defaults to method arguments, they can be overridden as needed, which ends their reign as magic values.

Of course, we took that step to get to this one:

```ruby
module Race
  SCORES    = [15, 10, 5, 3, 1]
  MISPLACED = 1

  module_function

  def score(guesses, winners, scores = SCORES, misplaced = MISPLACED)
    points = 0
    guesses.zip(winners, scores) do |guess, winner, score|
      if guess == winner
        points += score
      elsif winners.include? guess
        points += misplaced
      end
    end
    points
  end
end
```

The switch to `zip()` was straightforward and makes the code read better.  Plus, we're rid of that index.

This code is pretty close to the code I ended up with while fiddling with this problem.

### The point

I don't want to tell you what to get out of this exercise, but I can tell you what I got out of it, which is mainly to remember the true purpose of a thing.  For example:

* Following the proper steps of BDD is meant to **help you write code**.  If it turns into busywork that doesn't help, you are free to go another way.  And maybe you should feel compelled to go another way at that point.
* Iterators are intended to **save you from maintenance and potential error points**, such as:  tracking indexes or other variables and doing too much work.  If you find yourself in either of these scenarios, go spelunking in `Enumerable` to see whether there's a better tool for the job.  Heck, do that anyway... it's fun!  Do you know [what Enumerable#chunk() does][chunk] yet?
* The primary purpose of code is to **communicate with the reader.**  Period.  No, really!  Notice that in all of the steps in this article, I am trying to tease out the underlying meaning of the code, then write the code as close to that intention as possible.  That's when we're at our best, if you ask me.

[chunk]: http://ruby-doc.org/core-1.9.3/Enumerable.html#method-i-chunk


================================================
FILE: articles/v4/006-persisting-relations.md
================================================
*This article is written by Piotr Szotkowski. Greg invited Piotr to contribute
to Practicing Ruby after seeing his RubyConf 2011 talk _Persisting
Relations Across Time and Space_
([slides](http://persistence-rubyconf-2011.heroku.com),
[video](http://confreaks.net/videos/657)). This is not a one-to-one text
version of that talk; Piotr has instead chosen to share some thoughts on the topics of
[polyglot persistence](http://architects.dzone.com/articles/polyglot-persistence-future)
and modeling relations between objects.*

### Persistence: Your Objects’ Time Travel

> If the first thing you type, when writing a Ruby app, is `rails`, you’ve
> already lost the [architecture game](http://confreaks.com/videos/759).
>
> <cite>Uncle Bob Martin</cite>

The first thing we need to ask ourselves when thinking about object persistence
is how we can dehydrate an object into a set of simple values—usually
strings, numbers, dates, and boolean ‘flags’—in a way that will let us
rehydrate it at some point, often on a completely unrelated
run of our application. With the bulk of contemporary Ruby programs being Rails
web apps, this issue is so obvious that we usually don’t even think about it; the
persistence is conveniently taken care of by ActiveRecord, and we often actually
_start_ writing the application by defining database-oriented models of our
objects: 

```bash
$ rails generate model person name bio:text height:float born:date vip:boolean
$ rake db:migrate
```

This simple two-line command sequence takes care of all the behind-the-scenes 
machinery required to persist instances of our `Person` class. The main 
problem with the previous example is that it puts us into a tight tunnel
of relational database-driven design. Although many came back saying that the
light at the end is a truly glorious meadow and we should speed up to
get there faster, our actual options of taking detours, driving on the
shoulders, and stopping for a bit to get a high-altitude view of the road ahead
are even more limited than the run of this metaphor. ActiveRecord’s handling of
model relations (`belongs_to`, `has_many`, etc.)
sometimes complicates the problem by giving us seemingly all-purpose solutions that
are often quite useful but end up requiring just-this-little-bit-more tweaking, which accumulates over time.

### Persistence in practice

> A database is a black hole into which you put your data. If you’re lucky,
> you’ll get it back again. If you’re very lucky, you’ll get it back in a form
> you can use.
>
> <cite>Charlie Gibbs</cite>

As mentioned previously, persisting an object means dehydrating it into a set of
simple values. The way we do this depends heavily on the database backend
being used.

When it comes to the most popular case of relational databases (such as MySQL,
PostgreSQL or SQLite), we use tables for classes, rows for objects, and columns
to hold a given object property across all instances of the same class. To
persist an object, we serialize the given object’s properties down into table
cells with column types supported by the underlying database—but even in this
seemingly obvious case, it’s worth it to stop for a second and think.

Should we go for the lowest common denominator (strings, numbers, and dates—
even booleans are not really cross-engine; for instance, MySQL presents them as one-bit
integers, `0` and `1`), should we use a given ORM’s ‘common ground’ (here
booleans are usually fair game, and the ORM can take care of exposing them as
`true` and `false`), or should we actually limit the portability while
leveraging a given RDBMS’s features? For example, PostgreSQL exposes not only 
‘real’ booleans but also [a lot of other very useful
types](http://www.postgresql.org/docs/9.1/static/datatype.html), including
geometric points and paths, network addresses, and XML documents that
can be searched and filtered via XPath. It even supports arrays, which means
that we can store a given blog post’s tags in a single column in the 
`posts` table and query by inclusion/exclusion just as well as we could 
with a separate join table.

> Database research has produced a number of good results, but the relational
> database is not one of them.
>
> <cite>Henry G. Baker</cite>

Persisting objects in document databases (such as CouchDB or MongoDB) is
somewhat similar, but often also quite a bit different; classes are usually mapped
to collections, objects to documents, and object properties to these documents’
fields. Although strings, numbers, and dates are serialized similarly to relational
databases, document databases also usually allow us to store properties that
are arrays or hashes and allow easy storage of related objects as nested
documents (the canonical example being comments for a blog post, in cases when
they’re most often requested only in the context of the given post). This
results in all sorts of trade-offs. For example, you might end up needing to
do fewer joins overall, but the ones you do have to do come at a higher 
cost in both performance and upfront design work.

Other kinds of databases have still other approaches for serializing objects:

* Key-value stores (like Redis) usually need the objects to be in an
already serialized form (e.g., represented as JSON strings), but there are
gems like [ROC](https://github.com/benlund/roc) that map simple objects
directly to their canonical Redis representations. 

* Graph databases (such as Neo4j) are centered around object relations 
and often allow persisting objects as schema-less nodes, akin to 
document databases. 

* Many other storage types have their own object/persistence 
mapping specifics as well. For example, as a directory service,
LDAP does things in a way that is different from how general-purpose 
persistence methods tend to work. 

From just this short overview, it should be fairly clear that there are no
shortage of options when it comes to deciding how your objects should
be persisted. In fact, even Ruby itself ships with a simple object store!

### Ruby's built-in persistence mechanism 

One of my personal favorite ways of persisting objects is the `PStore`
library (which is distributed with Ruby) coupled with YAML serialization. Despite being
highly inefficient (compared to powerhouses like relational or document
databases), it’s often more than good enough for small applications, and its
simplicity can be quite a benefit.

Let’s assume for a second that we want to write [a small application for
handling quotes](https://github.com/chastell/signore): what would be the
simplest way to persist them? See for yourself:

```ruby
require 'yaml/store'
store = YAML::Store.new 'quotes.yml'

# quotes are author + text structures
Quote = Struct.new :author, :text

store.transaction do   # a read/write transaction...
  store['db'] ||= []
  store['db'] << Quote.new('Charlie Gibbs',
    'A database is a black hole into which you put your data.')
  store['db'] << Quote.new('Will Jessop',
    'MySQL is truly the PHP of the database world.')
end                    # ...is atomically committed here

# read-only transactions can be concurrent
# and raise when you try to write anything
store.transaction(true) do
  store['db'].each do |quote|
    puts quote.text
    puts '-- ' + quote.author
    puts
  end
end
```

Saving the previous example file and running it prints the two quotes just fine:

```
$ ruby quotes.rb
A database is a black hole into which you put your data.
-- Charlie Gibbs

MySQL is truly the PHP of the database world.
-- Will Jessop
```

But a real treat awaits when we inspect the `quotes.yml` file:

```
---
db:
- !ruby/struct:Quote
  author: Charlie Gibbs
  text: A database is a black hole into which you put your data.
- !ruby/struct:Quote
  author: Will Jessop
  text: MySQL is truly the PHP of the database world.
```

This approach allows us to have an automated way to persist and rehydrate our `Quote`
objects while also allowing us to easily edit them and fix any typos right
there in the YAML file. Is it scalable? Maybe not, but [my current
database of email
signatures](https://github.com/chastell/dotfiles/blob/aee1d31618e2e4ea88186eda163f29ebd72702d1/.local/share/signore/signatures.yml)
consists of 4,000 entries and works fast enough.

> **NOTE:** If you’re eager to try YAML as a storage backend, check out [YAML Record](https://github.com/nico-taing/yaml_record) and [YAML Model](http://www.darkarts.co.za/yaml-model).

### Sweet relations: how do they work?

Now that I’ve covered the idea of object persistence using various backends,
it’s finally time to talk about relations between objects. Quite often the
relations are the crux of our application (even when we’re not building
another social network...), and the problem of their persistence is usually
overlooked and simplified to ‘Let’s just use foreign keys and join tables where
needed.’

The way relations are canonically persisted depends greatly on the type of the
database. Contrary to their name, relational databases are not an ideal
solution for storing relations: their name comes from relations between the
rows of a single table (which translates to the assumption that objects of the
same class have the same property types), not from relations between objects of
potentially different classes, which end up being rows in separate tables.

Modeling relations in relational databases is quite complicated and depends on
the type of relation, its directionality, and whether it carries any
relation-specific data. For example, an object representing a person can have
the relations such as having a particular gender (one-to-many relation), having
a hobby (many-to-many), having a spouse (many-to-many, with the relation
carrying additional data, such as start date of the relationship),
participating in an event (many-to-many, with additional data such as
participation role), being on two different ends of a parental relation (having
parents and children), and so on. Some of these relations (gender) can be stored
right in the `people` table; some need to be represented by having a foreign
key; others require a separate join table (potentially carrying any
relation-specific data). Dereferencing such relations means crafting and
executing (potentially complicated) SQL `JOIN` queries.

![relations](http://i.imgur.com/CO6gf.png)

_An example set of relations (arrows) between ‘typical’ objects in a system._

Modeling relations in document databases is quite different from modeling for
a RDMS. Some of the relations (like the above-mentioned post/comments example) 
are best modeled using embedded documents. Despite being very useful in certain scenarios (e.g., retrieving
a post with all of its comments), this approach might cause problems when new
features require cross-cutting through all of such embedded documents. For
example, retrieving all of the comments by a given person or getting the list of the
most recent comments means scanning through the whole `posts` collection.

Although some document databases employ implicit, foreign-key-like references
(e.g., MongoDB’s DBRefs, which are two-key documents of the form `{ $ref:
<collection>, $id: <object_id> }`), dereferencing relations is usually a bigger
problem (due to the lack of standard approaches like SQL `JOIN` queries) and is
often done on the client side, even if it’s greatly simplified by tools like
[MongoHydrator](https://github.com/gregspurrier/mongo_hydrator).

Key-value stores are, by definition, the least relation-friendly backends—and
using them for modeling relations requires explicit foreign keys that need to
be managed on the client side. On the other end of the spectrum are graph databases:
relations (modeled as edges) can usually carry any data required, can as
easily point in either or both directions, and are represented in the same way
regardless of whether they model a one-to-one, one-to-many, or many-to-many
relation. Graph databases also allow for all kinds of data analysis/querying
based on the relations themselves, making things like graph traversal or proximity
metrics easier and faster than they would be with a relational database.

### Modeling relations as proper objects

Now that you know the different ways (and issues with) persisting objects and
relations between them, is there a way to model relations that could be deemed
‘persistence independent’, or at least ‘not persistence driven’? One such approach 
would be to model relations as proper objects in the system, akin to
how they’re modeled in graph databases.

In this approach, relations would be objects that reference two other objects
and carry any additional data particular to a given relation (such as
participation role in a relation between a person and an event, start/end dates
of the given relation, etc.). This approach is the most flexible in schema-less
databases—document databases could have a separate collection of relations,
and different relations could store different types of data. In relational
databases, this design could be modeled by either separate tables (one per relation
type) or a common `relations` table storing the references to the related
objects and a relation type pointing to a table holding data for all relations
of this particular type/schema.

The main drawback of this approach is dereferencing—getting other objects
related to the object at hand would be a two-step process: getting all of
the object’s relations (potentially only of a certain type) and then getting
all of the ‘other’ objects referenced by these relations. Note, however, that
this is exactly what we do every day with join tables for many-to-many
relations, so the drawback is mostly that this approach would apply to all of
the relations in the given system, not only many-to-many ones.

The main advantages of this approach are its simplicity (everything is an
object; relations just happen to carry certain properties, like the identifiers
of the objects they reference) and its potential higher portability (in that it doesn't tie
the way relations are modeled to a given persistence approach). Having
relations as proper objects can also help in producing aggregated statistics
about the system (like ‘what are the hubs of the system—the most connected
objects, regardless of relation type’).

Additionally, when all of the objects in the system have unique identifiers
(_of course_ [PostgreSQL has a native type for
UUIDs](http://www.postgresql.org/docs/9.1/static/datatype-uuid.html)),
relations no longer need to carry the information about the table/collection of
the referenced object; assuming the system has a way to retrieve an object
solely based on its UUID, relations become—in their simplest form—just
triples of 128-bit UUIDs (one identifying the relation and the other two identifying the
referenced objects) plus some information about the relation type.

### Object databases

> Now that people are considering NoSQL, will more people consider no-database?
>
> <cite>Martin Fowler</cite>

A different approach to solving problems with persisting relations
between objects is to persist the objects not in a way that requires explicit
mapping, but by using an object database.

In the past, there were a few approaches to solving this problem in Ruby—
notable contestants being [Madeleine](http://madeleine.rubyforge.org),
[ODB](http://zeropluszero.com/software/odb/), and
[HybridDB](https://github.com/pauliephonic/hybriddb); unfortunately, all of
these seem to be no longer maintained (although some birds at the wroc\_love.rb
conference earlier this year suggested that it might be revived if enough interest
is expressed!). Currently the most promising solution for straight object
persistence is [MagLev](http://maglev.github.com), a recently released Ruby
implementation built on top of the GemStone/S Virtual Machine known as _the_
Smalltalk object persistence solution. Although it probably won’t be
a widely adopted silver bullet for some time, I have high hopes for MagLev and
the changes that object persistence can bring to the way we think about giving our
objects immortality.

Unfortunately, because the use of object databases is not widespread at all,
there is not much more to say about them except that they may prove to be an
interesting option in the future.

### Not your usual persistence models

I will wrap up this article with two examples of object persistence
that are not related to persisting relations but rather to hiding persistence
altogether. ActiveRecord gives us a nice abstraction for wrting SQL, but
these two examples show how persistence can be abstracted even more.

The first example is [Candy](https://github.com/SFEley/candy). Although it is
currently unmaintained and in need of a fix to get running with the current
mongo gem, Candy is a nice and/or crazy example of how object persistence can be hidden
from our eyes with a single `include Candy::Piece` line:

```ruby
require 'candy'

class Conference
  include Candy::Piece
end

rubyconf = Conference.new
# connects to localhost:27017 and 'chastell' db if needed
# and saves a new document to the 'Conference' collection

rubyconf.location = 'New Orleans'   # method_missing resaves

rubyconf.events = { parties: { thursday: '&block Party' } }
rubyconf.events.parties.thursday    #=> '&block Party'
```

For a similarly unobtrusive way to _query_ a collection,
[Ambition](https://github.com/defunkt/ambition) provides a way 
to do Ruby-like queries against any supported persistence store. 
Like Candy, it is currently unmaintained but still worth checking out.

To see why Ambition is interesting, compare the following query against 
an ActiveRecord-supported store:

```ruby
require 'ambition/adapters/active_record'

class Person < ActiveRecord::Base
end

Person.select do |p|
  (p.country == 'USA' && p.age >= 21) ||
  (p.country != 'USA' && p.age >= 18)
end
```

with an example query against an LDAP backend:

```ruby
require 'ambition/adapters/active_ldap'

class Person < ActiveLdap::Base
end

Person.select do |p|
  (p.country == 'USA' && p.age >= 21) ||
  (p.country != 'USA' && p.age >= 18)
end
```

Although the code difference lays solely in the `require` and inheritance
clauses, the resulting backend query in the first place is the following SQL:

```sql
SELECT * FROM people
WHERE (
  (people.country =  'USA' AND people.age >= 21) OR
  (people.country <> 'USA' AND people.age >= 18)
)
```

And the query generated by the latter is the equivalent LDAP selector:

```
(|
  (& (country=USA)    (age>=21))
  (& (!(country=USA)) (age>=18))
)
```

These examples demonstrate how the benefits of the cross-platform nature of
using an ORM are preserved even though the syntax makes it appear as if
you are not working with a database at all. Although this style of interface
never quite caught on in the Ruby world, it is at least interesting to
think about.

### Closing thoughts

The problem of persisting object relations is tightly related to the general problem of object
persistence. Rails, with its `rails generate model`–driven development,
teaches us that our domain models should be tied one-to-one to their database
representations, but there are other (potentially better) ways to do persistence
in the object-oriented world.

If this topic sounds intriguing, you might be
interested in another of my talks, which was given at wroc\_love.rb this year (with a
highly revised version scheduled for the Scottish Ruby Conference in Edinburgh):
_Decoupling Persistence (Like There’s Some Tomorrow)_
([slides](http://decoupling-wrocloverb-2012.heroku.com),
[video](https://www.youtube.com/watch?v=w7Eol9N3jGI)).


================================================
FILE: articles/v4/007-confident-ruby.md
================================================
*This article was contributed by [Avdi Grimm](http://avdi.org). Avdi has been wrangling Ruby code for over a
decade and shows no signs of slowing down. He is the author of
[*Exceptional Ruby*](http://exceptionalruby.com) and
[*Objects on Rails*](http://objectsonrails.com). His next book,
[*Confident Ruby*](http://confidentruby.com), focuses on writing Ruby
code with a confident and straightforward style.*

### Losing the plot

Have you ever read a "choose your own adventure" book? Nearly every page ends with a question like this:

> * If you fight the angry troll with your bare hands, turn to page 137.
> * If you try to reason with the troll, turn to page 29.
> * If you don your invisibility cloak, turn to page 6.

You'd pick one option, turn to the indicated page, and the story would
continue.

Did you ever try to read one of those books from front to back? It's a
surreal experience. The story jumps forward and back in
time. Characters appear out of nowhere. One page you're crushed by the
fist of an angry troll, and on the next you're just entering the
troll's realm for the first time.

What if _each individual page_ was this kind of mish-mash? What if
every page read like this:

>   You exit the passageway into a large cavern. Unless you came from
>   page 59, in which case you fall down the sinkhole into a large
>   cavern. A huge troll, or possibly a badger (if you already visited
>   Queen Pelican), blocks your path. Unless you threw a button down the
>   wishing well on page 8, in which case there nothing blocking your
>   way. The [troll or badger or nothing at all] does not look happy to
>   see you.
> 
> * If you came here from chapter 7 (the Pool of Time), go back to the
>   top of the page and read it again, only imagine you are watching the
>   events happen to someone else.
> 
> * If you already received the invisibility cloak from the aged
>   lighthouse-keeper, and you want to use it now, go to page 67. Otherwise, forget you read anything about an invisibility cloak.
> 
> * If you are facing a badger (see above), and you choose to run away,
>   turn to page 93…

Not the most compelling narrative, is it? The story asks you to carry
so much mental baggage for it that just getting through a page is
exhausting.

### Code as narrative

What does this have to do with software? Well, code can tell a story
as well. It might not be a tale of high adventure and intrigue. But
it's a story nonetheless; one about a problem that needed to be
solved, and the path the developer(s) chose to accomplish that task.

A single method is like a page in that story. And unfortunately, a lot
of methods are just as convoluted, equivical, and confusing as that
made-up page above.

In the following sections, we'll take a look at some examples of code
that unnecessarily obscures the storyline of a method. We'll also
explore some techniques for minimizing distractions and writing
methods that straightforwardly convey their intent.

### Secure the borders

Here is some code that's having some trouble sticking to the plot:

```ruby
require 'date'

class Employee
  attr_accessor :name
  attr_accessor :hire_date

  def initialize(name, hire_date)
    @name      = name
    @hire_date = hire_date
  end

  def due_for_tie_pin?
    raise "Missing hire date!" unless hire_date
    ((Date.today - hire_date) / 365).to_i >= 10
  end

  def covered_by_pension_plan?
    # TODO Someone in HR should probably check this logic
    ((hire_date && hire_date.year) || 2000) < 2000
  end

  def bio
    if hire_date
      "#{name} has been a Yoyodyne employee since #{hire_date.year}"
    else
      "#{name} is a proud Yoyodyne employee"
    end
  end
end
```

We can speculate about the history of this class. It looks like over
the course of development, three different developers discovered that
`#hire_date` might sometimes be `nil`. They each chose to handle this
fact in a slightly different way. The one who wrote
`#due_for_tie_pin?` added a check that raises an exception if the hire
date is missing. The developer responsible for
`#covered_by_pension_plan` substituted a (seemingly arbitrary) default
value for `nil`. And the writer of `#bio` went with an `if` statement
switching on the presence of `#hire_date`.

This class has serious problems with second-guessing itself. And the
root of all this insecurity is the fact that the `#hire_date`
attribute is unreliable—even though it's clearly important to the operation of the class!


One of the purposes of a constructor is to establish an object's
invariant: a set of properties which should always hold true for that
object. In this case, it really seems like one of those invariants should be: *employee hire date is a `Date`*.

But the constructor, whose job it is to stand guard against initial
values which are not compatible with the class invariant, has fallen
asleep on the job. As a result, every other method dealing with hire
dates is burdened with the additional responsibility of checking
whether the value is present.

This is an example of a class which needs to set some
boundaries. Since there is no obvious "right" way to handle a missing
hire date, it probably needs to simply insist on having a valid hire
date, thereby forcing the cause of these spurious `nil` values to be
discovered and sorted out. To do this, it should guard its own
integrity by checking the value wherever it is set, either in the
constructor or elsewhere:

```ruby
require 'date'

class Employee
  attr_accessor :name
  attr_reader :hire_date  

  def initialize(name, hire_date)
    @name          = name
    self.hire_date = hire_date
  end

  def hire_date=(new_hire_date)
    raise TypeError, "Invalid hire date" unless new_hire_date.is_a?(Date)
    @hire_date = new_hire_date
  end

  def due_for_tie_pin?
    ((Date.today - hire_date) / 365).to_i >= 10
  end

  def covered_by_pension_plan?
    hire_date.year < 2000
  end

  def bio
    "#{name} has been a Yoyodyne employee since #{hire_date.year}"
  end
end
```

In this version, the `hire_date` attribute is protected by type check
in the setter method. Since the constructor now delegates to this
setter method to initialize the attribute, it is no longer possible to
construct new `Employee` objects without a valid hire date. Now that
the "borders" of the object are guarded, all the other methods can
focus on telling their own stories, without being distracted by
a potentially missing `hire_date`.

### Be assertive

In the last section we saw how assertions in a class' constructor or
setter methods can help keep the other methods focused. But
uncertainty and convoluted code can come from sources
other than input parameters.

Let's say you're working on some budget management software. The next
user story requires the application to pull in transaction data from a
third-party electronic banking API. According to the meager
documentation you can find, you need to use the
`Bank#read_transactions` method in order to load bank
transactions. The first thing you decide to do is to stash the loaded
transactions into a local data store.

```ruby
class Account
  def refresh_transactions
    transactions = bank.read_transactions(account_number)
    # ... now what?
  end
end
```

Unfortunately the documentation doesn't say what the
`#read_transactions` method returns. An `Array` seems likely. But what
if there are no transactions found? What if the account is not found?
Will it raise an exception, or perhaps return `nil`? Given enough time
you might be able to work it out by reading the API library's source
code, but it's pretty convoluted and you might still miss some edge
cases.

You decide to make an assumption… but as insurance, you document your assumption with an assertion.

```ruby
class Account
  def refresh_transactions
    transactions = bank.read_transactions(account_number)
    transactions.is_a?(Array) or raise TypeError, "transactions is not an Array"
    transactions.each do |transaction|
      # ...
    end
  end
end
```

You manually test the code against a test account and it doesn't blow
up, so it seems your suspicion was correct. Next, you move on to
pulling amount information out of the individual transactions.

You ask your teammate, who has had some experience with this API, what
format transactions are in. She says she thinks they are hashes with
string keys. You decide to tentatively try looking at the "amount"
key.

```ruby
transactions.each do |transaction|
  amount = transaction["amount"]
end
```

You look at this for a few seconds, and realize that if there is no
"amount" key, you'll just get a `nil` back. Then you'd have to check
for the presence of `nil` everywhere the amount is used. You'd prefer
to document your assumption more explicitly. So instead, you make an
assertion by using the `Hash#fetch` method:

```ruby
transactions.each do |transaction|
  amount = transaction.fetch("amount")
end
```

`Hash#fetch` will raise a `KeyError` if the given key is not found,
signaling that one of your assumptions about the `Bank API` was
incorrect.

You make another trial run and you don't get any exceptions, so you
proceed onward. Before you can store the value locally, you want to
make sure the transaction amount is in a format that your local
transaction store can understand. Nobody in the office seems to know
what format the amounts come in as. You know that many financial
system store dollar amounts as an integer number of cents, so you
decide to proceed with the assumption that it's the same with this
system. In order to once again document your assumption, you make
another assertion:

```ruby
transactions.each do |transaction|
  amount = transaction["amount"]
  amount.is_a?(Integer) or raise TypeError, "amount not an Integer"
end
```

You put the code through it's paces and… BOOM. You get an error.

```
TypeError: amount not an Integer
```

You decide to drop into the debugger on the next round, and take a
look at the transaction values coming back from the API. You see this:

```ruby
[
 {"amount" => "1.23"},
 {"amount" => "4.75"},
 {"amount" => "8.97"}
]
```

Well that's… interesting. The amounts are reported as decimal strings.

You decide to convert them to integers, since that's what
your internal `Transaction` class uses.

```ruby
transactions.each do |transaction|
  amount = transaction.fetch("amount")
  amount_cents = (amount.to_f * 100).to_i
  # ...
end
```

Once again, you find yourself questioning this code as soon as you
write it. You remember something about `#to_f` being really forgiving
in how it parses numbers. A little experimentation proves this to be
true.

```ruby
"1.23".to_f                     # => 1.23
"$1.23".to_f                    # => 0.0
"a hojillion".to_f              # => 0.0
```

Only having a small sample of demonstration values to go on, you're
not confident that the amounts this API might return will always be in
a format that `#to_f` understands. What about negative numbers? Will
they be formatted as "-4.56"? Or as "(4.56)"? Having an unrecognized
amount format silently converted to zero could lead to nasty bugs down
the road.

Yet again, you want a way to state in no uncertain terms what kind of
values the code is prepared to deal with. This time, you use
Kernel#Float to assert that the amount is in a format Ruby can parse
unambiguously as a floating point number:

```ruby
transactions.each do |transaction|
  amount = transaction.fetch("amount")
  amount_cents = (Float(amount) * 100).to_i
  cache_transaction(:amount => amount_cents)
end
```

`Kernel#Float` is much stricter than `String#to_f`:

```ruby
Float("$1.23")
# ~> -:1:in `Float': invalid value for Float(): "$1.23" (ArgumentError)
# ~>    from -:1:in `<main>'
```

The final code is chock full of assertions:

```ruby
class Account
  def refresh_transactions
    transactions = bank.read_transactions(account_number)
    transactions.is_a?(Array) or raise TypeError, "transactions is not an Array"
    transactions.each do |transaction|
      amount = transaction.fetch("amount")
      amount_cents = (Float(amount) * 100).to_i
      cache_transaction(:amount => amount_cents)
    end
  end
end
```

This code clearly states what it expects. It communicates a great deal
of information about your understanding of the external API at the
time you wrote it. It explicitly establishes the parameters within
which it can operate confidently; as soon as any of its expectations
are violated it fails quickly, with a meaningful exception message.

Unfortunately, as a result it is really telling two stories: one about
refreshing transactions (remember, that's nominally what this method is
about) and one about the format of an external data source. This is
quickly mended, however:

```ruby
class Account
  def refresh_transactions
    fetch_transactions do |transaction_attributes|
      cache_transaction(transaction_attributes)
    end
  end

  # Yields a hash of cleaned-up transaction attributes for each transaction
  def fetch_transactions
    transactions = bank.read_transactions(account_number)
    transactions.is_a?(Array) or raise TypeError, "transactions is not an Array"
    transactions.each do |transaction|
      amount = transaction.fetch("amount")
      amount_cents = (Float(amount) * 100).to_i
      yield(:amount => amount_cents)
    end
  end
end
```

By failing early rather than allowing misunderstood inputs to
contaminate the system, it reduces the need for type-checking and
coercion in other methods. And not only does this code document your
assumptions now, it also sets up an early-warning system should the
third-party API ever change unexpectedly in the future.

### Represent special cases with objects

The most common causes of code that tells a confusing story are
special cases. Let's look at an example of a special case, in the
context of our budgeting application.

You've implemented transaction import and it's working great. Except
for one little problem: users have been reporting bugs about the
reported balances being off. And not just the balances; in fact, all
of the reports seem to have incorrect numbers for some accounts.

You do some investigation into the system logs, and eventually
discover the culprit. It turns out that some banks, when they receive
a authorization for a credit card charge, immediately report it as a
pending transaction in the transaction list. The data looks something
like this:

```ruby
{"amount" => "55.08", "type" => "pending", "id" => "98765"}
```

Then, when the charge is completed or "captured", another transaction
is recorded:

```ruby
{"amount" => "55.08", "type" => "charge", "id" => "98765"}
```

> **Aside:** if you've ever written code to deal with actual banking APIs,
> you've probably figured out by now that I have not. I'm making this up for the
> sake of example. I expect real banking APIs are just as idiosyncratic, though,
> in their own ways.

The result of these "double entries" is that your calculations get
thrown off. Your application has routines for summing transactions,
averaging them, breaking them down by month and quarter, and many
more. And every one of these calculations uses the amount field to
arrive at its results.

You briefly consider simply throwing out pending transactions. But
after a quick consultation with your team you realize this would only
introduce more problems. There is sanity-checking code in place which
checks that the bank servers and the local cache have the same
transaction count and contain the same transaction IDs. And not only
that, you might actually want to use the pending transaction
information for upcoming features.

Your second option is to handle the special case… *specially*,
everywhere that the amount field is referenced. For example:

```ruby
def account_balance
  cached_transactions.reduce(starting_balance) do |balance, transaction|
    if transaction.type == "pending"
      balance
    else
      balance + transaction.amount
    end
  end
end
```

You'll have to carefully audit the code base, adding conditionals to
every use of amount. Not only that, you'll have to make sure anyone
else who works on this code understands the special case.

That doesn't seem like a very attractive option. Thankfully, there is
a third way. And the code above actually gives you the hint you needed
to discover it.

Let's look at that conditional again:

```ruby
if transaction.type == "pending"
```

This code is branching on the type of a value. This is a huge clue. In
an object-oriented language, anytime we branch on an object's type,
we're doing work that the language could be doing for us.

You realize that this special case calls for a special type of object
to represent it.

You decide to try this approach out. You find the code where
transaction objects are being instantiated:

```ruby
# Note: we expect that transaction attributes have already been
# converted to use Symbol keys at this point.
def cache_transaction(attributes)
  cached_transactions << Transaction.new(attributes)
end
```

You change it to instantiate a different kind of object for pending
transactions. Because you want to quickly spike this approach, you use
an `OpenStruct` to create a rough-and-ready ad-hoc object:

```ruby
def cache_transaction(attributes)
  transaction = 
    case attributes[:type]
    when "pending"
      pending_attributes = {
        :amount         => 0,
        :pending_amount => attributes[:amount]
      }
      OpenStruct.new(attributes.merge(pending_attributes))
    else
      Transaction.new(attributes)
    end
  cached_transactions << transaction
end
```

This switches on type as well, but it only does it once. After that,
the transaction can be used as-is in all of your existing algorithms.

You run some tests, and discover this fixes the problem! You consider
simply leaving the code as it is, since it's working now. But on
reflection you decide that the concept of a pending transaction would
be best represented by a proper class. That way you have a place to
put documentation about this special case, as well as any more special
logic you realize you need down the road.

```ruby
# A pending credit-card transaction
class PendingTransaction
  attr_reader :id, :pending_amount

  def initialize(attributes)
    @id             = attributes.fetch(:id)
    @pending_amount = attributes.fetch(:amount)
  end

  def amount
    # Pending transactions duplicate finished transactions, thus
    # throwing off calculations. For the purpose of calculations and
    # reports, a pending transaction always has a zero amount. The
    # real amount is available from #pending_amount.
    0
  end
end
```

You then rewrite `#cached_transactions` to use this new class.

```ruby
def cache_transaction(attributes)
  transaction = 
    case attributes[:type]
    when "pending"
      PendingTransaction.new(attributes)
    else
      Transaction.new(attributes)
    end
  cached_transactions << transaction
end
```

This code solves the immediate problem of a special type of
transaction, without duplicating logic for that special case all
throughout the codebase. But not only that, it is *exemplary*: it sets
a good example for code that follows. When, inevitably, another
special case transaction type turns up, whoever is tasked with dealing
with it will see this class and be guided towards representing the new
case as a distinct type of object.

### Conclusion

Ruby is a language which values expressiveness over just about
everything else. It is optimized to help us programmers say exactly
what we mean, without any extraneous fluff, to both the computer and
to future readers of our code. This is what makes it so much fun to
code in.

When we allow our methods to become cluttered up with ifs and maybes
and provisos and digressions, we let go of that expressiveness. We
start to lose the clear, confident narrative voice. We force the
future maintainers of the code to navigate through a twisty path full
of logical forks in the road in order to understand the purpose of a
method. Reading and updating the code stops being fun.

My challenge to you is this: when you are writing a new method, keep a
clear idea in mind of the story you are trying to tell. When detours
and diversions start to show up along the way, figure out what you
need to do to restore the narrative, and do it. You might get rid of
repetitive data integrity checks by introducing preconditions in the
initializer of a method. Maybe you can surround an external API in
assertions that document your beliefs about it, rather than trying to
handle anything it throws at you. Or perhaps you can eliminate a
family of often-repeated conditionals by representing a special case
as a class in its own right.

However you do it, keep your focus on telling a straightforward
tale. Not only will the future readers of your code thank you for it,
but I think you'll find that it makes your code more robust and easier
to maintain as well.


================================================
FILE: articles/v4/008-implementing-active-record.md
================================================
> (ORM) is one of the most complex things you could ever touch, and we choose it
> over and over again without thinking at all because everybody is doing it. It
>  is really complex! You waste an inordinate amount of your time on it, and
>  you need to look at it. -- [Rich Hickey, RailsConf 2012 (video)](http://www.youtube.com/watch?v=rI8tNMsozo0#t=1289s)

Depending on the kind of work you do, the claim that object-relational mapping
is _"one of the most complex things you could ever touch"_ is just 
as likely to be shocking as it is to be blindingly obvious. Because 
ActiveRecord (and other Ruby ORMs) provide highly abstracted ways of solving
common problems, it is easy to ignore the underlying complexity involved in even
the most simple things that we use ORM for. But just as there is a huge
difference between driving a car and repairing one, the cost of 
understanding ORM is much higher than simply making use of it.

In this two-part article, I will walk you through a minimal 
implementation of the [Active
Record](http://en.wikipedia.org/wiki/Active_record) pattern so that you can more 
easily understand what we take for granted when we use this flavor 
of ORM in our projects.

### Is the Active Record pattern inherently complex?

Whenever we talk about an Active Record in Ruby, it is extremely common for us
to immediately tie our thoughts to the Rails implementation of this pattern,
even though the concept itself was around before Rails was invented. If we 
accept the Rails-centric view of our world, the question of whether
ActiveRecord is a complex piece of software is trivial to answer; we only need
to look at the `ActiveRecord::Base` object to see that it has all of the 
following complecting characteristics:

* Hundreds of instance methods
* Hundreds of class methods
* Over a dozen instance variables
* Over a dozen class instance variables
* Several class variables ([a construct that's inherently complex!](http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html))
* A 40 level deep lookup path for class methods
* A 46 level deep lookup path for instance methods
* Dozens of kinds of method_missing hacks
* No encapsulation whatsoever between mixed in modules

But if you look back at how Martin Fowler defined the concept of an Active
Record in his 2003 book "Patterns of Enterprise Application Architecture", you
will find that the pattern does not necessarily require such a massively complex
implementation. In fact, Fowler's definition of an Active Record included any
object that could do most or all of the following things:

* Construct an instance of the Active Record from a SQL result set row
* Construct a new instance for later insertion into the table
* Use static finder methods to wrap commonly used SQL queries and return
  Active Record objects
* Update the database and insert data into the Active Record
* Get and set fields
* Implement some pieces of business logic

Clearly, the Rails-based ActiveRecord library does all of these things, but it
also does a lot more. As a result, it is easy to conflate the
coincidental complexity of this very popular implementation with the inherent 
complexity of its underlying pattern. This is a major source of
confounding in many discussions about software design for Rails developers, and
is something I want to avoid in this article.

With that problem in mind, I built a minimal implementation of the Active Record pattern called
[BrokenRecord](https://github.com/elm-city-craftworks/broken_record) which will
help you understand the fundamental design challenges involved in implementing this
particular flavor of ORM. As long as you keep in mind that BrokenRecord exists
primarily to facilitate thought experiments and is not meant to be used in
production code, it should provide an easy way for you to explore a number
of questions about ORM in general, and the Active Record pattern in particular.

### The ingredients for implementing an Active Record object

Now that you know what an Active Record is in its most generic form, how would you
go about implementing it? To answer that question, it may help to reflect upon
an example of how Active Record objects are actually used. The following code
is a good place to start, because it illustrates some of the most basic 
features you can expect from an Active Record object.

```ruby
## Create an article with a few positive comments.

article1 = Article.create(:title => "A great article",
                          :body  => "Short but sweet!")


Comment.create(:body => "Supportive comment!", :article_id => article1.id)
Comment.create(:body => "Friendly comment!",   :article_id => article1.id)

## Create an article with a few negative comments.

article2 = Article.create(:title => "A not so great article",
                          :body  => "Just as short")

Comment.create(:body => "Angry comment!",      :article_id => article2.id)
Comment.create(:body => "Frustrated comment!", :article_id => article2.id)
Comment.create(:body => "Irritated comment!",  :article_id => article2.id)

## Display all the articles and their comments 

Article.all.each do |article|
  puts %{
    TITLE: #{article.title}
    BODY: #{article.body}
    COMMENTS:\n#{article.comments.map { |e| "    - #{e.body}" }.join("\n")}
  }
end
```

While this example omits a bit of setup code, it is not hard to see that it
produces the following output:

```
    TITLE: A great article
    BODY: Short but sweet!
    COMMENTS:
    - Supportive comment!
    - Friendly comment!


    TITLE: A not so great article
    BODY: Just as short
    COMMENTS:
    - Angry comment!
    - Frustrated comment!
    - Irritated comment!  
```

Despite its simple output, there is a lot going in this little 
code sample. To gain a better sense of what is happening under
the hood, take a look at how the `Article` and `Comment` objects
are defined:

```ruby
class Article
  include BrokenRecord::Mapping
  
  map_to_table :articles

  has_many :comments, :key   => :article_id,
                      :class => "Comment"
end

class Comment
  include BrokenRecord::Mapping

  map_to_table :comments

  belongs_to :article, :key   => :article_id,
                       :class => "Article"
end
```

Because `BrokenRecord::Mapping` does not implement the naming 
shortcuts that `ActiveRecord::Base` uses, the connection between 
these objects and the underlying database schema is much more 
explicit. If you take a look at how the `articles` and `comments` 
tables are defined, it should be straightforward to understand how 
this all comes together:

```sql
 create table articles ( 
   id     INTEGER PRIMARY KEY,
   title  TEXT,
   body   TEXT,
 );

 create table comments (
   id          INTEGER PRIMARY KEY,
   body        TEXT,
   article_id  INTEGER,
   FOREIGN KEY(article_id) REFERENCES articles(id)
 );
```

If you haven't been paying close attention to what kinds of things you would
need to build in order to make this code work, go ahead and quickly re-read this
section with that in mind. Once you've done that, examine the following grocery 
list of Active Record ingredients and see if they match your own:

1. Storage and retrieval of record data in an SQL database. 
  (e.g. `Article.create` and `Article.all`)
2. Dynamic generation of accessors for record data. (e.g. `article.body`)
3. Dynamic generation of associations methods (e.g. `article.comments`),
  including the ability to dynamically look up the associated class.
  (e.g. `:class => "Comments"`)
4. The ability to wrap all these features up into a single module mix-in.

This list easily demonstrates that a fair amount of complicated code is needed 
to support the most basic uses of Active Record objects, even when the
pattern is stripped down to its bare essentials. But it is one thing
to have a rough sense that a problem is complex, and a different thing
entirely to familiarize yourself with its nuances. The former insight leads you to
appreciate your magical tools; the latter helps you master them.

To help you dig deeper, I will guide you through the code that handles each
of these responsibilities in `BrokenRecord`, explaining how it all works along
the way. We will start by exploring some low level constructs that help
simplify the implementation of Active Record objects, and then in [part 2](http://practicingruby.com/articles/63) 
we will look at how the whole system comes together.

### Abstracting away the database

Using the Active Record pattern introduces tight coupling between classes
containing bits of domain logic and the underlying persistence layer. However,
this does not mean that an Active Record ought to directly tie itself to 
a low-level database adapter. With that in mind, introducing a simple
object to handle basic table manipulations and queries is a good way
to reduce the brittleness of this tightly coupled design.

The following example shows how `BrokenRecord::Table` can be used directly to
solve the same problem that was shown earlier. As you read through it, try to
imagine how the `BrokenRecord::Mapping` module might be implemented using this
object as a foundation.

```ruby
## create a couple table objects

articles = BrokenRecord::Table.new(:name => "articles",
                                   :db   => BrokenRecord.database)

comments = BrokenRecord::Table.new(:name => "comments",
                                   :db   => BrokenRecord.database)

## create an article with some positive comments

a1 = articles.insert(:title => "A great article", 
                     :body  => "Short but sweet")

comments.insert(:body => "Supportive comment!", :article_id => a1)
comments.insert(:body => "Friendly comment!",   :article_id => a1)

## create an article with some negative comments

a2 = articles.insert(:title => "A not so great article", 
                     :body  => "Just as short")

comments.insert(:body => "Angry comment!",      :article_id => a2)
comments.insert(:body => "Frustrated comment!", :article_id => a2)
comments.insert(:body => "Irritated comment!",  :article_id => a2)

## Display the articles and their comments

articles.all.each do |article|
  responses = comments.where(:article_id => article[:id])

  puts %{
    TITLE: #{article[:title]}
    BODY: #{article[:body]}
    COMMENTS:\n#{responses.map { |e| "    - #{e[:body]}" }.join("\n") }
  }
end
```

Despite the superficial similarity between the features provided by
the `BrokenRecord::Mapping` mixin and the `BrokenRecord::Table` class,
there are several key differences that set them apart from one another:

1) `Mapping` assumes that `BrokenRecord.database` holds a
reference to an appropriate database adapter, but `Table` requires
the database adapter to be injected. This means that unlike `Mapping`, the
`Table` class has no dependencies on global state. 

2) Most of the methods in `Mapping` return instances of whatever
object it gets mixed into, but `Table` always returns primitive
values such as arrays, hashes, and integers. This means that
`Mapping` needs to make assumptions about the interfaces of other
objects, and `Table` does not.

3) `Mapping` implements a big chunk of its functionality via class methods, 
but `Table` does not rely on any special
class-level behavior. This means that `Table` can be easily tested
without generating anonymous classes or doing awkward cleanup tasks.

The `Mapping` mix-in is convenient to use because it can introduce 
persistence into any class, but it bakes in a few assumptions that you
can't easily change. By contrast, the `Table` object expects you to wire more
things up by hand, but is conceptually simple and very flexible. This is exactly
the kind of tension to expect between higher and lower levels of abstraction,
and is not necessarily a sign of a design problem.

If these two components were merged into a single entity, the 
conflict between their design priorities would quickly lead 
to creating an object with a split-personality. Whenever that happens, 
complexity goes through the roof, and so does the cost
of change. By allowing `Mapping` to delegate much of its functionality to 
a `Table` object, it is possible to sidestep these concerns and gain 
the best of both worlds.

### Encapsulating record data

One of the defining characteristics of an Active Record is that ordinary
accessors can be used to retrieve and manipulate its data. As a
result, basic operations on Active Record objects end up looking like 
plain old Ruby code, such as in the following example:

```ruby
Article.all.each do |article|
  puts %{
    TITLE: #{article.title}
    BODY: #{article.body}
    COMMENTS:\n#{article.comments.map { |e| "    - #{e.body}" }.join("\n")}
  }
end
```

The interesting part about getters and setters for Active Record objects 
is that they need to be dynamically generated. To refresh your memory, take a
second look at the class definition for `Article`, and note that it contains
no explicit definitions for the `Article#title` and `Article#body` methods.

```ruby
class Article
  include BrokenRecord::Mapping
  
  map_to_table :articles

  has_many :comments, :key   => :article_id,
                      :class => "Comment"
end
```

In the above code, `map_to_table` ties the `Article` class to a database 
table, and the columns in that table determine what accessors need to 
be defined. Through a low-level call to `BrokenRecord::Table`, it 
is possible to get back an array of column names, as shown below:

```ruby
  table.columns.keys #=> [:id, :title, :body]
```

If you assume that `Article` will not store field values directly, but instead
delegate to some sort of value object, Ruby's built in `Struct` object might
come to mind as a way to solve this problem. After all, it does make 
dynamically generating a value object with accessors quite easy:

```ruby
  article_container = Struct.new(:id, :title, :body)

  article = article_container.new
  article.title = "A fancy article"
  article.body  = "This is so full of class, it's silly"

  # ... 
```

Using a `Struct` for this purpose is a fairly standard idiom, and it is not
necessarily a bad idea. But despite how simple they appear to be on the surface,
the lesser known features of `Struct` objects make them very complex. In
addition to accessors, using a `Struct` also gives you all of the 
following functionality:

```ruby
  # array-like indexing
  article[1] #=> "A fancy article"

  # hash-like indexing with both symbols and strings
  article[:title] == article[1]       #=> true
  article[:title] == article["title"] #=> true 

  # Enumerability
  article.count       #=> 3
  article.map(&:nil?) #=> [true, false, false]

  # Pair-wise iteration
  article.each_pair { |k,v| p [k,v] }
  
  # Customized inspect output
  p article #=> #<struct id=nil, title="A fancy article", 
                # body="This is so full of class, it's silly">
```

While this broad interface makes `Struct` very useful for certain data
processing tasks, they are much more often used in scenarios in which a simple
object with dynamic accessors would be a much better fit. The
`BrokenRecord::FieldSet` class implements such an object while 
maintaining a minimal API: 

```ruby
module BrokenRecord
  class FieldSet
    def initialize(params)
      self.data = {}

      attributes  = params.fetch(:attributes)
      values      = deep_copy(params.fetch(:values, {}))

      attributes.each { |name| data[name] = values[name] }

      build_accessors(attributes)
    end

    def to_hash
      deep_copy(data)
    end

    private

    attr_accessor :data

    def deep_copy(object)
      Marshal.load(Marshal.dump(object))
    end

    def build_accessors(attributes)
      attributes.each do |name|
        define_singleton_method(name) { data[name] }
        define_singleton_method("#{name}=") { |v| data[name] = v }
      end
    end
  end
end
```

 The most important thing to note about this code is that
 `BrokenRecord::FieldSet` makes it just as easy to create 
 a dynamic value object as `Struct` does:

```ruby
article = BrokenRecord::FieldSet.new(:attributes => [:id, :title, :body])
article.title = "A fancy article"
article.body  = "This is so full of class, its silly"

# ...
```

The similarity ends there, mostly because `BrokenRecord::FieldSet` does not 
implement most of the features that `Struct` provides. Another important difference
is that `BrokenRecord::FieldSet` does not rely on an anonymous intermediate class to
implement its functionality. This helps discourage the use of class inheritance
for code reuse, which in turn reduces overall system complexity.

In addition to these simplifications, `BrokenRecord::FieldSet` also attempts to 
adapt itself a bit better to its own problem domain. Because `FieldSet`
objects need to be used in conjunction with `Table` objects, they need to be 
more hash-friendly than `Struct` objects are. In particular, it must easy 
to set  the values of the `FieldSet` object using a hash, and it must be easy to 
convert a `FieldSet` back into a hash. The following example demonstrates 
that both of those requirements are handled gracefully:

```ruby
article_data = { :id    => 1,
                 :title => "A fancy article",
                 :body  => "This is so full of class, it's silly" }

article = BrokenRecord::FieldSet.new(:attributes => [:id, :title, :body],
                                     :values       => article_data)

p article.title #=> "A fancy title"

p article.to_hash == article_data #=> true

article.title = "A less fancy title"

p article.to_hash == article_data #=> false
p article.to_hash[:title]         #=> "A less fancy title"
```

While it may be a bit overkill to roll your own object for the sole purpose of
removing features from an existing well supported object, the fact that
`BrokenRecord::FieldSet` also introduces a few new features of its own makes it more
reasonable to implement things this way. More could definitely be said about the
trade-offs involved in making this kind of design decision, but they are very 
context dependent, and that makes them a bit tricky to generalize.

### Reflections

The objects described in this article may seem a bit austere,
but they are easy to reason about once you gain some familiarity with them. In
the [second part of this article (Issue
4.10)](http://practicingruby.com/articles/63), you will be able to see these
objects in the context which they are actually used, which will help you
understand them further.

The main theory I am trying to test out here is that I believe simple low level 
constructs tend to make it easier to build simple higher level constructs.
However, there is a very real tension between conceptual simplicity and
practical ease-of-use, and that can lead to some complicated design decisions.

What do you think about these ideas? Are the techniques that I've shown so far more
confusing than they are enlightening? Do you have a better idea for how to
approach this problem? No matter what is on your mind, if you have thoughts on
this topic, I want to hear from you!

> **BONUS CONTENT:** If you're curious about what it looks like for me to put the "finishing touches" on a Practicing Ruby article, see [this youtube video](http://www.youtube.com/watch?v=bojXlV1mFNY). Be warned however, I am barely capable of using a computer, and so it's likely to be painful to watch me work.


================================================
FILE: articles/v4/009-the-hidden-costs-of-inheritance.md
================================================
As a Ruby programmer, you almost certainly make use of inheritance-based object
modeling on a daily basis. In fact, extending base classes and mixing
modules into your objects may be so common for you that you rarely 
need to think about the mechanics involved in doing so. If you are like 
most Ruby programmers, your readiness to apply this complex design paradigm 
throughout your projects is both a blessing and a curse.

On the one hand, your ability to make good use of inheritance-based modeling
without thinking about its complexity is a sign that it works well as an
abstraction. But on the other hand, having this familiar tool constantly within
reach makes it harder to recognize alternative approaches that may lead to
greater simplicity in certain contexts. Because no one tool is a golden hammer,
it is a good idea to understand the limitations of your preferred modeling
techniques as well as their virtues.

In this article, I will guide you through three properties of
inheritance-based modeling that can lead to design complications unless they are
given careful consideration. These are meant to be starting points for
conversation more-so than tutorials on what to do and what not to do, so please
attempt some of the homework exercises I've included at the bottom of the
article!

### PROBLEM 1: There is no encapsulation along ancestry chains

Inheritance-based modeling is most commonly used for behavior sharing, but 
what it actually provides is implementation sharing. Among other things,
this means that no matter how many ancestors an object has, all of its 
methods and state end up getting defined in a single namespace. If you 
aren't careful, this lack of encapsulation between objects in an 
inheritance relationship can easily bite you.

To test your understanding of this problem, see if you can spot the bug in the
following example: 

```ruby
require "prawn"

class StyledDocument < Prawn::Document
  def style(params)
    @font_name = params[:font]
    @font_size = params[:size]
  end

  def styled_text(content)
    font(@font_name) do
      text(content, :size => @font_size)
    end
  end
end

StyledDocument.generate("example.pdf") do 
  text "This is the default font size and face"

  style(:font => "Courier", :size => 20)

  styled_text "This line should be in size 20 Courier"

  text "This line should be in the default font size and face"
end
```

This example runs without raising any sort of explicit error, but produces
the following incorrect output:

![](http://i.imgur.com/xjOpU.png)

There aren't a whole lot of things that can go wrong in this example,
and so you have probably figured out the source of the problem by now:
`StyledDocument` and `Prawn::Document` each define `@font_size`, but 
they each use it for a completely different purpose. As a result, calling 
`StyledDocument#style` triggers a side effect that leads to this
subtle defect.

To verify that a naming collision to blame for this problem, you can 
try renaming the `@font_size` variable in `StyledDocument` to 
something else, such as `@styled_font_size`. Making that tiny 
change will cause the example to produce the correct output, 
as shown below:

![](http://i.imgur.com/1O23U.png)

However, this is only a superficial fix, and does not address the root problem.
The real issue is that without true subobjects with isolated state, the chance
of clashing with a variable used by an ancestor increases as your
ancestry chain grows. If you look at the mixins that `ActiveRecord::Base` 
depends on, you'll find examples of a 
[scary lack of encapsulation](https://github.com/rails/rails/blob/master/activerecord/lib/active_record/transactions.rb#L327-345)
that will make you wonder how things don't break more often.

To make matters worse, the lack of encapsulation between objects in an
inheritance relationship also means that methods can clash in the same way that
variables can. A lack of true private methods in Ruby complicates the problem
even further, because there simply isn't a way to write a method in a parent
object that a child object can't clash with or override. One of the homework
questions for this article addresses this issue, but is worth thinking 
about for a moment before you read on.

### PROBLEM 2: Interfaces tend to grow rapidly under inheritance

I am going to attempt a proof without words for this particular problem, and
leave it up to you to figure out *why* this can be a source of maintenance
headaches, but please share your thoughts in the comments:

```ruby
>> (ActiveRecord::Base.instance_methods | 
    ActiveRecord::Base.private_instance_methods)
=> [:logger, :configurations, :default_timezone, :schema_format,
:timestamped_migrations, :init_with, :initialize_dup, :encode_with, :==, :eql?,
:hash, :freeze, :frozen?, :<=>, :readonly?, :readonly!, :inspect, :to_yaml,
:yaml_initialize, :_attr_readonly, :_attr_readonly?, :primary_key_prefix_type,
:table_name_prefix, :table_name_prefix?, :table_name_suffix,
:table_name_suffix?, :pluralize_table_names, :pluralize_table_names?,
:store_full_sti_class, :store_full_sti_class?, :store_full_sti_class=,
:default_scopes, :default_scopes?, :_accessible_attributes,
:_accessible_attributes?, :_accessible_attributes=, :_protected_attributes,
:_protected_attributes?, :_protected_attributes=, :_active_authorizer,
:_active_authorizer?, :_active_authorizer=, :_mass_assignment_sanitizer,
:_mass_assignment_sanitizer?, :_mass_assignment_sanitizer=, :validation_context,
:validation_context=, :_validate_callbacks, :_validate_callbacks?,
:_validate_callbacks=, :_validators, :_validators?, :_validators=,
:lock_optimistically, :attribute_method_matchers, :attribute_method_matchers?,
:attribute_types_cached_by_default, :time_zone_aware_attributes,
:skip_time_zone_conversion_for_attributes,
:skip_time_zone_conversion_for_attributes?, :partial_updates, :partial_updates?,
:partial_updates=, :serialized_attributes, :serialized_attributes?,
:serialized_attributes=, :[], :[]=, :record_timestamps, :record_timestamps?,
:record_timestamps=, :_validation_callbacks, :_validation_callbacks?,
:_validation_callbacks=, :_initialize_callbacks, :_initialize_callbacks?,
:_initialize_callbacks=, :_find_callbacks, :_find_callbacks?, :_find_callbacks=,
:_touch_callbacks, :_touch_callbacks?, :_touch_callbacks=, :_save_callbacks,
:_save_callbacks?, :_save_callbacks=, :_create_callbacks, :_create_callbacks?,
:_create_callbacks=, :_update_callbacks, :_update_callbacks?,
:_update_callbacks=, :_destroy_callbacks, :_destroy_callbacks?,
:_destroy_callbacks=, :auto_explain_threshold_in_seconds,
:auto_explain_threshold_in_seconds?, :nested_attributes_options,
:nested_attributes_options?, :include_root_in_json, :include_root_in_json?,
:include_root_in_json=, :reflections, :reflections?, :reflections=,
:_commit_callbacks, :_commit_callbacks?, :_commit_callbacks=,
:_rollback_callbacks, :_rollback_callbacks?, :_rollback_callbacks=,
:connection_handler, :connection_handler?, :connection,
:clear_aggregation_cache, :transaction, :destroy, :save, :save!,
:rollback_active_record_state!, :committed!, :rolledback!, :add_to_transaction,
:with_transaction_returning_status, :remember_transaction_record_state,
:clear_transaction_record_state, :restore_transaction_record_state,
:transaction_record_state, :transaction_include_action?, :serializable_hash,
:to_xml, :from_xml, :as_json, :from_json, :read_attribute_for_serialization,
:reload, :mark_for_destruction, :marked_for_destruction?,
:changed_for_autosave?, :_destroy, :reinit_with, :clear_association_cache,
:association_cache, :association, :run_validations!, :touch, :_attribute,
:type_cast_attribute_for_write, :read_attribute_before_type_cast, :changed?,
:changed, :changes, :previous_changes, :changed_attributes, :to_key, :id, :id=,
:id?, :query_attribute, :attributes_before_type_cast, :raw_write_attribute,
:read_attribute, :method_missing, :attribute_missing, :respond_to?,
:has_attribute?, :attribute_names, :attributes, :attribute_for_inspect,
:attribute_present?, :column_for_attribute, :clone_attributes,
:clone_attribute_value, :arel_attributes_values, :attribute_method?,
:respond_to_without_attributes?, :locking_enabled?, :lock!, :with_lock, :valid?,
:perform_validations, :validates_acceptance_of, :validates_confirmation_of,
:validates_exclusion_of, :validates_format_of, :validates_inclusion_of,
:validates_length_of, :validates_size_of, :validates_numericality_of,
:validates_presence_of, :errors, :invalid?, :read_attribute_for_validation,
:validates_with, :run_callbacks, :to_model, :to_param, :to_partial_path,
:attributes=, :assign_attributes, :mass_assignment_options,
:mass_assignment_role, :sanitize_for_mass_assignment,
:mass_assignment_authorizer, :cache_key, :quoted_id,
:populate_with_current_scope_attributes, :new_record?, :destroyed?, :persisted?,
:delete, :becomes, :update_attribute, :update_column, :update_attributes,
:update_attributes!, :increment, :increment!, :decrement, :decrement!, :toggle,
:toggle!, :psych_to_yaml, :to_yaml_properties, :in?, :blank?, :present?,
:presence, :acts_like?, :try, :duplicable?, :to_json, :instance_values,
:instance_variable_names, :require_or_load, :require_dependency,
:require_association, :load_dependency, :load, :require, :unloadable, :nil?,
:===, :=~, :!~, :class, :singleton_class, :clone, :dup, :initialize_clone,
:taint, :tainted?, :untaint, :untrust, :untrusted?, :trust, :to_s, :methods,
:singleton_methods, :protected_methods, :private_methods, :public_methods,
:instance_variables, :instance_variable_get, :instance_variable_set,
:instance_variable_defined?, :instance_of?, :kind_of?, :is_a?, :tap, :send,
:public_send, :respond_to_missing?, :extend, :display, :method, :public_method,
:define_singleton_method, :object_id, :to_enum, :enum_for, :psych_y,
:class_eval, :silence_warnings, :enable_warnings, :with_warnings,
:silence_stderr, :silence_stream, :suppress, :capture, :silence, :quietly,
:equal?, :!, :!=, :instance_eval, :instance_exec, :__send__, :__id__,
:initialize, :to_ary, :_run_validate_callbacks, :_run_validation_callbacks,
:_run_initialize_callbacks, :_run_find_callbacks, :_run_touch_callbacks,
:_run_save_callbacks, :_run_create_callbacks, :_run_update_callbacks,
:_run_destroy_callbacks, :_run_commit_callbacks, :_run_rollback_callbacks,
:serializable_add_includes, :associated_records_to_validate_or_save,
:nested_records_changed_for_autosave?, :validate_single_association,
:validate_collection_association, :association_valid?,
:before_save_collection_association, :save_collection_association,
:save_has_one_association, :save_belongs_to_association,
:assign_nested_attributes_for_one_to_one_association,
:assign_nested_attributes_for_collection_association,
:assign_to_or_mark_for_destruction, :has_destroy_flag?, :reject_new_record?,
:call_reject_if, :raise_nested_attributes_record_not_found, :unassignable_keys,
:association_instance_get, :association_instance_set, :create_or_update,
:create, :update, :notify_observers, :should_record_timestamps?,
:timestamp_attributes_for_create_in_model,
:timestamp_attributes_for_update_in_model, :all_timestamp_attributes_in_model,
:timestamp_attributes_for_update, :timestamp_attributes_for_create,
:all_timestamp_attributes, :current_time_from_proper_timezone,
:clear_timestamp_attributes, :write_attribute, :field_changed?,
:clone_with_time_zone_conversion_attribute?, :attribute_changed?,
:attribute_change, :attribute_was, :attribute_will_change!, :reset_attribute!,
:attribute?, :attribute_before_type_cast, :attribute=,
:convert_number_column_value, :attribute, :match_attribute_method?,
:missing_attribute, :increment_lock, :_merge_attributes, :halted_callback_hook,
:assign_multiparameter_attributes, :instantiate_time_object,
:execute_callstack_for_multiparameter_attributes, :read_value_from_parameter,
:read_time_parameter_value, :read_date_parameter_value,
:read_other_parameter_value, :extract_max_param_for_multiparameter_attributes,
:extract_callstack_for_multiparameter_attributes, :type_cast_attribute_value,
:find_parameter_position, :quote_value, :ensure_proper_type,
:destroy_associations, :default_src_encoding, :irb_binding, :Digest,
:initialize_copy, :remove_instance_variable, :sprintf, :format, :Integer,
:Float, :String, :Array, :warn, :raise, :fail, :global_variables, :__method__,
:__callee__, :eval, :local_variables, :iterator?, :block_given?, :catch, :throw,
:loop, :caller, :trace_var, :untrace_var, :at_exit, :syscall, :open, :printf,
:print, :putc, :puts, :gets, :readline, :select, :readlines, :`, :p, :test,
:srand, :rand, :trap, :exec, :fork, :exit!, :system, :spawn, :sleep, :exit,
:abort, :require_relative, :autoload, :autoload?, :proc, :lambda, :binding,
:set_trace_func, :Rational, :Complex, :gem, :gem_original_require, :BigDecimal,
:y, :Pathname, :j, :jj, :JSON, :singleton_method_added,
:singleton_method_removed, :singleton_method_undefined]
```

There is a specific issue I have with interface explosion, and it isn't so much
to do with code organization as it is with state management. Can you 
guess what my concern is?

### PROBLEM 3: Balancing reuse and customization can be tricky

Some ancestors provide methods that are designed to be replaced by 
their descendants. When executed well, this pattern provides a convenient
balance between code reuse and customization. However, because it is 
impossible to account for all possible customizations that descendants 
of a base object will want to make, this approach has its
limitations.

This design problem is best explained by example, and you can find a great 
one in Ruby itself. Start by considering the following trivial code, paying
particular attention to its output:

```ruby
class Person
  def initialize(name, email)
    @name  = name
    @email = email
  end
end

person = Person.new("Gregory Brown", "gregory@practicingruby.com")

p person    #=~
#<Person:0x0000010108bbf8 @name="Gregory Brown", 
#                         @email="gregory@practicingruby.com">

puts person #=~
#<Person:0x0000010108bbf8>     
```

Under the hood, `p` calls `person.inspect`, and `puts` calls
`person.to_s`. What you see above is output from the default implementation 
of each of those methods. Arguably, `Object#inspect` 
provides useful debugging output, but `Object#to_s` is
more of a template method that needs to be overridden in order to be useful. The
following code shows how easy to customize things by simply adding your own `to_s`
definition:

```ruby
class Person
  def initialize(name, email)
    @name  = name
    @email = email
  end

  def to_s
    "#{@name} <#{@email}>"
  end
end

person = Person.new("Gregory Brown", "gregory@practicingruby.com")

puts person #=~ Gregory Brown <gregory@practicingruby.com>  
```

On the surface, there is nothing wrong with this code: this is exactly what a
template-method based extension mechanism should look like. However, due to the
weird way that `Object#inspect` works in Ruby 1.9, defining your own `to_s`
implementation has some unpleasant side effects that are likely to surprise you:

```ruby
p person #=~ Gregory Brown <gregory@practicingruby.com>  
```

If you look at the [definition of
Object#inspect](https://github.com/ruby/ruby/blob/trunk/object.c#L486-511),
you'll find that this behavior is by design. In a nutshell, the method is set
up to provide its default output if `to_s` has not been overridden, but simply
delegate to `to_s` if it has been. This is problematic, because `to_s` is meant
to be used for humanized output such as what you saw in the previous
example, not debugging output.

The unfortunate consequence of this problem is that if you define `to_s` in your
objects, you must also define a meaningful `inspect`, and if you want to 
reproduce the same behavior as `Object#inspect`, you need to implement 
it yourself. While this is mostly a problem of brittle code and it is not
specifically related to inheritance, the problem is compounded by
inheritance-based modeling. For example, suppose the `Person` class was defined
as shown above, and you decided to subclass it:

```ruby
class Employee < Person 
  def initialize(name, email, role)
    super(name, email)
    @role = role
  end
end
```

If `Person` does define its own `inspect` method, `Employee` will inherit the
same problem. On the other hand, if `Person` does implement `inspect`, it needs
to take care to implement it in a way that's suitably general to account for
what its descendants might find useful. This invites the same design challenges
that caused this problem in the first place, which means that `Employee` may end
up cleaning up after its parent object in a similar way. Unfortunately,
brittleness tends to cascade downwards throughout ancestry chains.

### Homework exercises 

This article is on the short-side, and it also leaves out a lot of the story
from each of these points. I did this intentionally to encourage you to
participate in an active discussion on this topic. To get the most out of this
article, please complete at least one of the following homework exercises:

1) Show a realistic example of an accidental method naming collision, in a
similar spirit to the state-based example shown in Problem #1. For bonus
points, choose an example that involves private methods.

2) Post a comment in response to the "interface explosion" example shown 
in Problem #2. You can either try to guess what my main concern about 
it is, or share your own concerns. If instead you feel that there is 
nothing wrong with this kind of design, explain why you think that.

3) Come up with another downside of inheritance-based modeling, and provide an
example of it. If you have trouble coming up with your own, you may want to look
into issues that can arise from overriding methods, or perhaps explore what
happens when you mix traditional inheritance-based modeling with
`method_missing`.

4) Share an example of a library or project which is difficult to work with
because of the way it uses inheritance-based modeling, or describe problems you've run
into with your own projects due to inheritance.

5) Share the conventions and guidelines you follow to avoid the problems
described in this article, as well as other problems you've encountered with
inheritance-based modeling.

Looking forward to seeing your responses! Don't worry about getting the *right*
answers, discussion threads here on Practicing Ruby are about learning, not
necessarily showing off what you already know.


================================================
FILE: articles/v4/010-implementing-active-record.md
================================================
> This two part article explores the challenges involved in
> building a minimal implementation of the Active Record pattern. 
> [Part 1 (Issue 4.8)](http://practicingruby.com/articles/60) provides
> some basic background information about the problem and
> walks through some of the low level structures that are
> needed to build an ORM. Part 2 (this issue) builds on top of
> those structures to construct a complete Active Record
> object.

### Building object-oriented mixins

One thing that makes the Active Record pattern challenging to implement is that
involves shoehorning a bunch of persistence-related functionality into model
objects. In the case of Rails, models inherit from `ActiveRecord::Base` which
has dozens of modules mixed into it. This inheritance-based approach is the
common way of doing complex behavior sharing in Ruby, but comes at a [high
maintainence cost](http://practicingruby.com/articles/62). This is one of the
main design challenges that
[BrokenRecord](https://github.com/elm-city-craftworks/broken_record) attempts to solve.

Because this is a tricky problem, it helps to explore these ideas by
solving an easier problem first. For example, suppose that you have the following trivial
`Stack` object and you want to extend it with some `Enumerable`-like
functionality without mixing `Enumerable` directly into the `Stack` object:

```ruby
class Stack
  def initialize
    @data = []
  end

  def push(obj)
    data.push(obj)
  end

  def pop
    data.pop
  end

  def size
    data.size
  end

  def each
    data.reverse_each { |e| yield(e) }
  end

  private

  attr_reader :data
end
```

You could use an `Enumerator` for this purpose, as shown in the following
example:

```ruby
stack = Stack.new

stack.push(10)
stack.push(20)
stack.push(30)

enum  = Enumerator.new { |y| stack.each { |e| y.yield(e) } }
p enum.map { |x| "Has element: #{x}" } #=~
# ["Has element: 30", "Has element: 20", "Has element: 10"]    
```

This is a very clean design, but it makes it so that you have to interact with
both a `Stack` object and an `Enumerator`, which feels a bit tedious. With a
little effort, the two could be unified under a single interface while keeping
their variables and internal method calls separated:

```ruby
class EnumerableStack
  def initialize
    @stack = Stack.new
    @enum  = Enumerator.new { |y| @stack.each { |e| y.yield(e) } }       
  end

  def respond_to_missing?(m, *a)
    [@stack, @enum].find { |e| e.respond_to?(m) }
  end

  def method_missing(m, *a, &b)
    obj = respond_to_missing?(m)

    return super unless obj
    obj.send(m, *a, &b)
  end
end
```

From the external perspective, `EnumerableStack` still looks and 
feels like an ordinary `Enumerable` object:

```ruby
stack = EnumerableStack.new

stack.push(10)
stack.push(20)
stack.push(30)

p stack.map { |x| "Has element: #{x}" } #=~
# ["Has element: 30", "Has element: 20", "Has element: 10"]  
```

Unfortunately, it is painful to implement objects this way. If you
applied this kind of technique throughout a codebase without introducing some
sort of abstraction, you would end up having to write a ton of very boring
`respond_to_missing?` and `method_missing` calls. It would be better to have
an object that knows how to delegate methods automatically, such as 
the `Composite` object in the following example:

```ruby
class EnumerableStack
  def initialize
    stack = Stack.new
    enum  = Enumerator.new { |y| stack.each { |e| y.yield(e) } }      
    
    @composite = Composite.new
    @composite << stack << enum
  end

  def respond_to_missing?(m, *a)
    @composite.receives?(m)
  end

  def method_missing(m, *a, &b)
    @composite.dispatch(m, *a, &b)
  end
end
```

The neat thing about this approach is that the `EnumerableStack`
object now only needs to keep track of a single variable, even though it is
delegating to multiple objects. This makes it safe to extract some
of the functionality into a mix-in without the code becoming too brittle:

```ruby
class EnumerableStack
  include Composable

  def initialize
    stack = Stack.new
    enum  = Enumerator.new { |y| stack.each { |e| y.yield(e) } }      

    # features is a simple attribute containing a Composite object
    features << stack << enum
  end
end
```

The end result looks pretty clean, but using the `Composable` 
mixin to solve this particular problem is massively overkill. 
Mixing the `Enumerable` module directly into the `Stack` object
is not that hard to do, and is unlikely to have any adverse
consequences. Still, seeing how `Composable` can be used to
replace one of the most common applications of mixins makes
it much easier to understand how this technique can be 
applied in more complex scenarios. The good news is 
that as long as you have a rough idea of how `Composable` 
works in this context, you will have no trouble understanding
how it is used in BrokenRecord.

To test whether or not you understand the basic pattern, take a look at the
following code and see if you can figure out how it works. Don't worry about
the exact implementation details, just compare the following code to the other
examples in this section and think about what the purpose of this module is:

```ruby
module BrokenRecord
  module Mapping
    include Composable

    def initialize(params)
      features << Record.new(params)
    end

    def self.included(base)
      base.extend(ClassMethods)
    end

    module ClassMethods
      include Composable

      def map_to_table(table_name)
        features << Relation.new(:name         => table_name,
                                 :db           => BrokenRecord.database,
                                 :record_class => self)
      end
    end
  end
end
```

If you guessed that mixing `BrokenRecord::Mapping` into a class will cause any
unhandled messages to be delegated to `BrokenRecord::Relation` at the class 
level and to `BrokenRecord::Record` at the instance level, then you guessed
correctly! If you're still stuck, it might help to recall how this mixin 
is used:

```ruby
class Article
  include BrokenRecord::Mapping

  map_to_table :articles
end

article = Article.create(:title => "Great article", :body => "Wonderful!")
p article.title.upcase #=> "GREAT ARTICLE"
```

If you consider that the definition of `BrokenRecord::Mapping` above is its
complete implementation, it becomes clear that the methods being called in this
example need to come from somewhere. Now, it should be easier to see that
`Relation` and `Record` are where those methods come from.

You really don't need to know the exact details of how 
the `Composable` module works, because it is based entirely on the 
ideas already discussed in this article. However, if `Composable` still feels a
bit too magical, go ahead and [study its
implementation](https://github.com/elm-city-craftworks/broken_record/blob/master/lib/broken_record/composable.rb)
before reading on. For bonus points, pull the code down and try to 
recreate the `EnumerableStack` example on your own machine.

Once you feel that you have a good grasp on how `Composable` works, you can
continue on to see how it can be used to implement an Active Record object.

### Implementing basic CRUD operations

The complex relationships that Active Record objects depend upon make them a bit
challenging to understand and analyze. But like any complicated system,
you can gain some foundational knowledge by starting with a very simple example as an
entry point and digging deeper from there.

In the case of BrokenRecord, a good place to start is with a somewhat trivial
model definition:

```ruby
class Article
  include BrokenRecord::Mapping

  map_to_table :articles

  def published?
    status == "published"
  end
end
```

You found out earlier when you looked at `BrokenRecord::Mapping` that it exists
primarily to extend classes with functionality provided by
`BrokenRecord::Relation` at the class level, and `BrokenRecord::Record` at the
instance level. Because `BrokenRecord::Mapping` provides a fairly complicated 
`initialize` method, it is safe to assume that `Article` objects should be
created by factory methods rather than instantiated directly. The following 
code demonstrates how that works:


```ruby
Article.create(:title  => "A great article",
               :body   => "The rain in Spain...",
               :status => "draft")

Article.create(:title  => "A mediocre article",
               :body   => "Falls mainly in the plains",
               :status => "published")

Article.create(:title  => "A bad article",
               :body   => "Is really bad!",
               :status => "published")

Article.all.each do |article|
  if article.published?
    puts "PUBLISHED: #{article.title}"
  else
    puts "UPCOMING: #{article.title}"
  end
end
```

If you ignore what is going on inside the `each` block for the moment, it is
easy to spot two factory methods being used in the previous example:
`Article.create` and `Article.all`. To track down where these methods are coming
from, you need to take a look at `BrokenRecord::Relation`, because that is where
class-level method calls on `Article` are forwarded to if `Article` does not
handle them itself. But before you do that, keep in mind that this is how
that object gets created in the first place:

```ruby
  def map_to_table(table_name)
    features << Relation.new(:name         => table_name,
                             :db           => BrokenRecord.database,
                             :record_class => self)
  end
```

If you note that `map_to_table :articles` is called within the `Article`
class, you can visualize the call to `Relation.new` in the previous example as
being essentially the same as what you see below:

```ruby
features << Relation.new(:name          => :articles,
                         :db            => BrokenRecord.database,
                         :record_class  => Article)
```

Armed with this knowledge, it should be easier to make sense of the
`BrokenRecord::Relation` class, which is shown in its entirety below. Pay
particular attention to the `initialize` method, and just skim the rest of the
method definitions; it isn't important to fully understand them until later.

```ruby
module BrokenRecord
  class Relation
    include Composable

    def initialize(params)
      self.table = Table.new(:name => params.fetch(:name),
                             :db   => params.fetch(:db))

      self.record_class = params.fetch(:record_class)

      features << CRUD.new(self) << Associations.new(self)
    end

    attr_reader :table

    def attributes
      table.columns.keys
    end

    def new_record(values)
      record_class.new(:relation => self,
                       :values   => values,
                       :key      => values[table.primary_key])
    end

    def define_record_method(name, &block)
      record_class.send(:define_method, name, &block)
    end

    private

    attr_reader :record_class
    attr_writer :table, :record_class
  end
end
```

The main thing to notice about `BrokenRecord::Relation` is that its main purpose
is to glue together a `BrokenRecord::Table` object with a user-defined record
class, such as the `Article` class we've been working with in this example. The
rest of its functionality is provided by the `Relation::CRUD` and
`Relation::Associations` objects via composition. Because `Article.all` and
`Article.create` are both easily identifiable as CRUD operations, the `Relation::CRUD` 
object is the next stop on your tour:

```ruby
module BrokenRecord
  class Relation
    class CRUD
      def initialize(relation)
        self.relation = relation
      end

      def all
        table.all.map { |values| relation.new_record(values) }
      end

      def create(values)
        id = table.insert(values)    
      
        find(id)
      end

      def find(id)
        values = table.where(table.primary_key => id).first

        return nil unless values

        relation.new_record(values)
      end

      # ... other irrelevant CRUD operations omitted
      
      private

      attr_accessor :relation

      def table
        relation.table
      end
    end
  end
end
```

At this point, you should have noticed that both `create()` and
`all()` are defined by `Relation::CRUD`, and it is ultimately these 
methods that get called whenever you call `Article.create` 
and `Article.all`. Whether you trace `Relation::CRUD#create` or `Relation::CRUD#all`, you'll find
that they both interact with the `Table` object provided by `Relation`, and that
they both call `Relation#new_record`, and they don't do much more than that. 
To keep things simple, we'll follow the path that `Relation::CRUD#all` takes:

```ruby
def all
  table.all.map { |values| relation.new_record(values) }
end
```

This method calls `BrokenRecord::Table#all`, which as you saw in 
[Issue 4.8](http://practicingruby.com/articles/60) returns an
array of hashes representing the results returned from the 
database when a trivial `select * from articles` query is issued. 
For this particular data set, the following results get 
returned:

```ruby
[
 { :id     => 1, 
   :title  => "A great article", 
   :body   => "The rain in Spain...", 
   :status => "draft" }, 

 { :id     => 2, 
   :title  => "A mediocre article", 
   :body   => "Falls mainly in the plains", 
   :status => "published"}, 

 { :id     => 3, 
   :title  => "A bad article", 
   :body   => "Is really bad!", 
   :status => "published" }
]
```

Taking a second look at the `Relation::CRUD#all` method, it is easy to
see that this is being transformed by a simple `map` call which passes each of
these hashes to `Relation#new_record`. I had asked you to skim over that
method earlier, but now would be a good time to take a second look at its
definition:

```ruby
module BrokenRecord
  class Relation
    def new_record(values)
      record_class.new(:relation => self,
                       :values   => values,
                       :key      => values[table.primary_key])
    end
  end
end
```

If you recall that in this context `record_class` is a reference to `Article`,
it becomes easy to visualize this call as something similar to what is 
shown below:

```ruby
values = { :id     => 1, 
           :title  => "A great article", 
           :body   => "The rain in Spain...", 
           :status => "draft" }

Article.new(:relation => some_relation_obj,
            :values   => values,
            :key      => 1)
```

As you discovered before, `Article` does not provide its own `initialize`
method, and instead inherits the definition provided by 
`BrokenRecord::Mapping#initialize`:

```ruby
module Mapping
  include Composable

  def initialize(params)
    features << Record.new(params)
  end
end
```

If you put all the pieces together, you will find that calls to
`Article.all` or `Article.create` return instances of `Article`, but
those instances are imbued with functionality provided by a `Record`
object, which in turn hold a reference to a `Relation` object 
that ties everything back to the database. By now you're probably feeling like
the Active Record pattern is a bit of a 
[Rube Goldberg machine](http://www.youtube.com/watch?v=qybUFnY7Y8w), and that
isn't far from the truth. Don't worry though, the next section should help 
tie everything together for you.

### Implementing the Active Record object itself

Earlier, I had asked you to ignore what was going on in the `each` block of the
original example that kicked off this exploration, because I wanted to show you
how `Article` instances get created before discussing how they work. Now that you
have worked through that process, you can drop down to the instance level to
complete the journey. Using the same code reading strategy as what you used
before, you can start with the `Article#published?` and `Article#title` 
calls in the following example and see where they take you:

```ruby
Article.all.each do |article|
  if article.published?
    puts "PUBLISHED: #{article.title}"
  else
    puts "UPCOMING: #{article.title}"
  end
end
```

A second look at the `Article` class definition reveals that it implements
the `published?` method but does not implement the `title` method; the latter call gets 
passed along to `BrokenRecord::Record` automatically. Similarly, the internal call 
to `status` gets delegated as well:

```ruby
class Article
  include BrokenRecord::Mapping

  map_to_table :articles

  def published?
    status == "published"
  end
end
```

To understand what happens next, take a look at how the `BrokenRecord::Record` class works:

```ruby
module BrokenRecord
  class Record
    include Composable

    def initialize(params)
      self.key      = params.fetch(:key, nil)
      self.relation = params.fetch(:relation)

      # NOTE: FieldSet (formally called Row) is a simple Struct-like object
      features << FieldSet.new(:values     => params.fetch(:values, {}),
                               :attributes => relation.attributes)
    end

    # ... irrelevant functionality omitted ...

    private

    attr_accessor :relation, :key
  end
end
```

By now you should be able to quickly identify `BrokenRecord::FieldSet` as the object that
receives any calls that `Record` does not answer itself. The good
news is that you already know how `FieldSet` works, because it was discussed in
detail in [Issue 4.8](http://practicingruby.com/articles/60). But if you need a
refresher, check out the following code:

```ruby
values = { :id     => 1, 
           :title  => "A great article", 
           :body   => "The rain in Spain...", 
           :status => "draft" }

fieldset = BrokenRecord::FieldSet.new(:values     => values,
                                      :attributes => values.keys)

p fieldset.title  #=> "A great article"
p fieldset.status #=> "draft"
```

If you read back through the last few examples, you should be able to see how
the data provided by `Relation` gets shoehorned into one of these `FieldSet`
objects, and from there it becomes obvious how the `Article#title` and `Article#status`
messages are handled.

If `FieldSet` is doing all the heavy lifting, you may be wondering why the
`Record` class needs to exist at all. Those details were omitted from the
original example, so it is definitely a reasonable question to ask. To find your
answer, consider the following example of updating database records:

```ruby
articles = Article.where(:status => "draft")

articles.each do |article|
  article.status = "published"
  article.save
end
```

In the example you worked through earlier, data was being read and not written,
and so it was hard to see how `Record` offered anything more than a layer of
indirection on top of `FieldSet`. However, the example shown above changes that
perspective significantly by giving a clear reason for `Record` to hold a
reference to a `Relation` object. While `Article#status=` is provided by
`FieldSet`, the `Article#save` method is provided by `Record`, and is defined as
follows:

```ruby
module BrokenRecord
  class Record
    
    # ... other details omitted ...

    def save
      if key
        relation.update(key, to_hash)
      else
        relation.create(to_hash)
      end
    end 
  end
end
```

From this method (and others like it), it becomes clear that `Record` is
essentially a persistent `FieldSet` object, which forms the essence of what an
Active Record object is in its most basic form.

### EXERCISE: Implementing minimal association support 

The process of working through the low level foundations built up in [Issue
4.8](http://practicingruby.com/articles/60) combined with this article's 
extensive walkthrough of how BrokenRecord implements some basic CRUD 
functionality probably gave you enough learning moments to make you want to 
quit while you're ahead. That said, if you are looking to dig a little deeper, I'd recommend
trying to work your way through BrokenRecord's implementation of associations
and see if you can make sense of it. The following example should serve as a
good starting point:

```ruby
class Article
  include BrokenRecord::Mapping

  map_to_table :articles

  has_many :comments, :key   => :article_id, 
                      :class => "Comment"

  def published?
    status == "published"
  end
end

class Comment
  include BrokenRecord::Mapping

  map_to_table :comments

  belongs_to :article, :key   => :article_id,
                       :class => "Article"
end


Article.create(:title  => "A great articles",
               :body   => "The Rain in Spain",
               :status => "draft")


Comment.create(:body => "A first comment",  :article_id => 1)
Comment.create(:body => "A second comment", :article_id => 1)


article = Article.find(1)

puts "#{article.title} -- #{article.comments.count} comments"
puts article.comments.map { |e| "  * #{e.body}" }.join("\n")
```

Because not all the features used by this example are covered in this article,
you will definitely need to directly reference the [full source of
BrokenRecord](https://github.com/elm-city-craftworks/broken_record) to complete
this exercise. But don't worry, by now you should be familiar with most of its
code, and that will help you find your way around. If you attempt this 
exercise, please let me know your thoughts and questions about it!

### Reflections

Object-oriented mixins seems very promising to me, but also full of open 
questions and potential pitfalls. While they seem to work well in this toy 
implementation of Active Record, they may end up creating as many problems as
they solve. In particular, it remains to be seen how this kind of modeling would
impact performance, debugging, and introspection of Ruby objects. Still, the
pattern does a good enough job of handling a very complex architectural
pattern to hint that some further experimentation may be worthwhile.

Going back to the original question I had hoped to answer in the first part of
this article about whether or not the Active Record pattern is inherently complex, I
suppose we have found out that there isn't an easy answer to that question. My
BrokenRecord implementation is conceptually simpler than the Rails-based
ActiveRecord, but only implements a tiny amount of functionality. I think that
the closest thing to a conclusion I can come to here is that the traditional
methods we use for object modeling in Ruby are certainly complex, and so any
system which attempts to implement large-scale architectural patterns in Ruby
will inherit that complexity unless it deviates from convention.

That all having been said, reducing complexity is about  more than just
preferring composition over inheritance and reducing the amount of magic in our
code. The much deeper questions that we can ask ourselves is whether these very
complicated systems we build are really necessary, or if they are a
consequence of piling [abstractions on top of abstractions](http://timelessrepo.com/abstraction-creep) 
in order to fix some fundamental low-level problem.

While this article was a fun little exploration into the depths of a
complex modeling problem in Ruby, I think its real point is to get us to
question our own tolerance for complexity at all levels of what we do. If you
have thoughts to share about that, I would love to hear them.


================================================
FILE: articles/v4/011-responsibility-vs-data-driven.md
================================================
_This article was contributed by Greg Moeck. Greg is a software 
craftsman who has been working with Ruby since 2004. When this
article was published, he was working on mobile javascript development
at Facebook._

Given that Ruby is an object oriented programming language, all Ruby
programs are going to be composed of many objects. However, techniques 
for breaking the functionality of programs into objects can
vary from programmer to programmer. In this article I'm going to walk
through two common approaches to driving design at a high level:
**data-centric design** and **responsibility-centric design**. I will
briefly sketch the key ideas of each of the design methodologies,
illustrating how one might structure parts of a simple e-commerce
application using each of the methods. I'll then follow up with some 
advice about where I've found the different approaches to be particularly
helpful or unhelpful.

### Data-centric design

In a data-centric design, the system is generally separated into objects
based upon the data that they encapsulate. For example, in an
e-commerce application you are likely to find objects that represent
products, invoices, payments, and users. These objects provide 
methods which operate on that data, either returning its values, 
or mutating its state. A `Product` object might provide a method to 
determine how many of a given product are currently in stock, or possibly
a method to add that product to the current shopping cart.

Names for data-centric objects are often nouns, because
they frequently correspond to real-world objects. This real-worldliness 
is generally also true of the methods that these objects provide. 
The methods either represent accessors to the object's data, 
relationships between objects, or actions that could be taken on 
the object. The following ActiveRecord object serves as a good example
of this style of design:

```ruby
class Product < ActiveRecord::Base
  #relationships between objects
  has_many :categories

  #accessing objects data
  def on_sale?
    not(sale_price.nil?)
  end

  #action to take on the product
  def add_to_cart(cart)
    self.remaining_items -= 1
    save!
    cart.items << self
  end
end
```

Following along these lines, inheritance is generally used as a principle
of classification, establishing a subtype relationship
between the parent and the child. If B inherits from A, that is a
statement that B is a type of A. This is generally described as an **is-a**
relationship. For example, the classes `LaborCharge` and `ProductCharge`
might both inherit from a `LineItem` base class which implements the
features they have in common. The key thing to note about these classes is that 
they share at least some data attributes and the behavior around those
attributes, even if some of that behavior might end up being overridden.

However, not everything can have a counterpart in the real world. There
still needs to be some communication model that is created to describe
the global or system level view of the interactions between objects.
These **controllers** will fetch data from different parts of the system, 
and pipe it into actions in another part.
Since these objects generally are very difficult to classify in a
hierarchical way, it is a good idea to keep them as thin as
possible, pushing as much logic into the actual domain model as you
possibly can.

For those familiar with standard Rails architectures, you should see a
lot of commonalities with the above description. Rails model objects are
inherently structured this way because the ActiveRecord pattern tightly
couples your domain objects to the way in which their data is persisted.
And so all ActiveRecord objects are about some "encapsulated" data, and
operations that can be done on that data. Rails controllers provide the
global knowledge of control, interacting with those models to then
accomplish some tasks.

### Responsibility-centric design

In a responsibility-centric design, systems are divided by the
collection of behaviors that they must implement. The goal of this division is
to formulate the description of the behavior of the system in terms of
multiple interacting processes, with each process playing a
separate **role**. For example, in an e-commerce application with a
responsibility-centric design, you would be likely to find objects
such as a payment processor, an inventory tracker, and a user
authenticator.

The relationships between objects become very similar to the
client/server model. A **client** object will make requests of the server
to perform some service, and a **server** object will provide a public API
for the set of services that it can perform. This relationship is
described by a **contract** - that is a list of requests that can be made
of the server by the client. Both objects must fulfill this contract,
in that the client can only make the requests specified by the API, and
the server must respond by fulfilling those requests when told.

As an example, a responsibility-centric order processing service might look like
what you see below:

```ruby
class StandardOrderProcessor
  def initialize(payment_processor, shipment_scheduler)
    @payment_processor = payment_processor
    @shipment_scheduler = shipment_scheduler
  end

  def process_order(order)
    @payment_processor.debit_account(order.payment_method, order.amount)
    @shipment_scheduler.schedule_delivery(order.delivery_address,
                                          order.items)
  end
end
```

The goal of describing relationships between objects in this way is that
it forces the API for the server object to describe *what* it does for
the client rather than *how* it accomplishes it. By its very nature 
the implementation of the server must be encapsulated, and
locked away from the client. This means that the client object can only
be coupled to the public API of its server objects, which allows developer
to freely change server internals as long as the client still has an
object to talk to that fulfills the contract. 

The practical benefit of this kind of design is that it makes certain kinds of
changes very easy. For example, the following code could be used as a drop-in
replacement for the `StandardOrderProcessor`, because it implements the same
contract:

```ruby
class OrderValidationProcessor
  def initialize(order_processor, error_handler)
    @order_processor = order_processor
    @error_handler = error_handler
  end

  def process_order(order)
    if is_valid_order(order)
      @order_processor.process_order(order)
    else
      @error_handler.invalid_order(order)
    end
  end

  private
  def is_valid_order(order)
    #does some checking for if the order is valid
  end
end
```

The client does not know which sort of
order processor it is talking to, it just knows how to request
that an order gets processed. Validations are skipped when the client is
provided with a `StandardOrderProcessor`, and they are run when it is 
provided with a `OrderValidationProcessor`, but the client does not
know or care about these details. This allows for substantial changes
in order processing behavior without requiring any modifications to 
the client object.

To make them easier to work with, these kinds of service objects would 
generally be composed with a factory that might look something like
what you see below:

```ruby
class OrderProcessor
  # ...

  def with_validation
    OrderValidationProcessor.new(without_validation,
                                 error_handler)
  end

  def without_validation
    StandardOrderProcessor.new(payment_processor, shipment_scheduler)
  end

  # ...
end
```


The notion of client and server are related to what side of a contract 
each object is on, which means that individual objects frequently 
play both roles. For example, a payment processor object may
consume the services of a credit card processor, while providing
services for an order processor. From the perspective of the credit card
processor, the payment processor is a client, but just the opposite is
true for the ordering system. A key feature of this kind of design
is that objects are coupled to an interface rather than an 
implementation, which makes the relationships between objects much
more dynamic than what you can expect from a data-centric design.

As you've probably already noticed, because these kinds of objects represent the
behavior of the system rather than the data, the objects are not
generally named after real-world entities. The roles that an object
plays often represent real-world processes, and the implementation of
these roles are often named after *how* they implement the desired role.
For example, within our system there might be two objects which
can play the role of a shipment scheduler: a `FedexDeliveryScheduler` and
`UPSDeliveryScheduler`. Despite the specificity of their names, the client 
consuming these objects would not know which of the two it was talking to as
long as they implemented a common interface for scheduling deliveries. A natural
consequence of role-based modeling is that method names become more important
while class names become less important, and this example is no exception.

Another core concept of responsibility-centric designs is that data 
tends to flow through the system rather than being centrally managed within the 
system. As a result, data typically takes the form of immutable 
value objects. For example, in the above order processors, the processes
were being passed an order object, which contained the data for a given
order. The objects within the system are not mutating or persisting this
data directly, but passing values around instead. With that in mind,
an object responsible for tracking the current order might look like
what you see below:

```ruby
class CurrentOrderTracker
  def initialize
    @order = Order.new
  end

  def item_selected(item)
    @order = order.add_item(item)
  end

  class Order
    attr_accessor :items

    def initialize(items)
      @items = items || []
    end

    def add_item(item)
      Order.new(@items + item)
    end
  end
end
```
Because any reference to one of these values is guaranteed to be
immutable, any process can read from it at any time without worrying
that it might have been modified by another process. This is not to 
say however that this data is never persisted. When it is
necessary to persist this data, an object playing the role of a
persister must be created, and it must receive messages containing these
values just like any other part of the system. In this way, the
persistance logic generally lives on the boundaries of the system rather
than in the center. Such an object might look something like this:

```ruby
class SQLOrderPersister
  #assuming that AROrder is an active record object
  def persist_order(order)
    order = AROrder.find(order.id)
    if order
      order.update_attributes(order.attributes)
    else
      AR.Order.new(order.attributes).save
    end
  end
end
```

The last thing to note is that in this sort of system using inheritance
as a form of classification doesn't really make much sense. Historically
inheritance has taken the form of "plays the same role as" instead 
of **is-a**. Objects which play the same role have historically inherited
from a common abstract base class which merely implements the role's
public API, and forces any class that inherits from it to do the same.
This relationship expresses that an object implements a certain contract, 
rather than categorically claiming  what the object is.

In Ruby, using inheritance for this sort of relationship isn't 
strictly necessary. Due to duck typing, if something quacks 
like a duck (that is if it implements the same API as a duck), it 
is a duck, and there is no need to have the objects inherit
from a common base class. That being said, it can still be nice to
explicitly name these roles, and an abstract base class can often be
used to do that.

### Comparing and contrasting the two design styles

As with almost any engineering choice, it isn't possible to say that either 
of these two approaches is always superior or inferior. That said, 
we can still walk through some strengths and weaknesses of each approach.

**Strengths of data-centric design:**

1) Because the code is broken into parts around real world entities,
  these entities are easy to find and tweak. All the code relative to a
  certain set of data lives together.

2) Because it has a global flow control, and the fact that it is
  it is centered around data (which people generally understand),
  it is relatively easy for programmers experienced with traditional
  procedural languages to adapt their previous experience into this
  style.

3) It is very easy to model things like create/read/update/destroy
  because the data is found in a single model for all real world
  objects.

4) For systems with many data types and a small amount of behavior, this
  approach evenly distributes the location of the code.

**Weaknesses of data-centric design:**

1) Because the structure of an object is a part of its definition,
   encapsulation is generally harder to achieve.

2) Because the system is split according to data, behavior is often hard
  to track down. Similar operations often span across multiple data
  types, and as such end spread out across the entirety of the system.

3) The cohesion of behavior within an object is often low since every
  object has to have all actions that could be taken upon it, and those
  actions often have very little to do with one another.

4) In practice it often leads to coupling to the structure of the object
  as one needs to violate the Law of Demeter to traverse the
  relationships of the objects. For example, think of often you in Rails
  you see something like the following code:

```ruby
@post.comments.each do |comment|
  if comment.author.something
    ...
  end
end
```

**Strengths of responsibility-centric design:**

1) Objects tend to be highly cohesive around their behavior, because
roles are defined by behavior, not data.

2) Coupling to an interface rather than an implementation makes
it easier to change behavior via composition.

3) As more behaviors are introduced into the system, the number of 
objects increases rather than the lines of code within model objects.


**Weaknesses of responsibility-centric design:**

1) It is often difficult to drop into the code and make simple changes as
  even the simplest change necessitates understanding the architecture
  of at least the module. This means that the on-ramping time for the
  team is generally fairly high.

2) Since there is generally no global control, it is often difficult for
  someone to grasp where things are happening. As Kent Beck, and Ward
  Cunningham have said, "The most difficult problem in teaching object-
  oriented programming is getting the learner to give up the global
  knowledge of control that is possible  with  procedural  programs,
  and rely on the local knowledge of objects to accomplish their
  tasks."

3) Data is not as readily available since the destructuring of the
  application is around behavioral lines. The data can often be
  scattered throughout the system. Which means changing the data
  structure is more expensive than changing the behavior.

### Choosing the right design

Rails has proven how the data centric approach can lead to quickly
building an application that can create, read, update and destroy data.
And for applications whose domain complexity lies primarily in data types,
and the actions that can be taken on those data types, the pattern works
extremely well. Adding or updating data types is fast and easy since the
system is cohesive around its data.

However as some large legacy Rails codebases show, when the complexity
of the domain lies primarily in the behaviors or rules of the domain
then organizing around data leads to a lot of jumbled code. The models
end up needing to have many methods on them in order to process all of
the potential actions that can be taken on them, and many of these
actions end up being similar across data types. As such the cohesion of
the system suffers, and extending or modifying the behavior becomes more and
more difficult over time.

The opposite of course is true as well in my experience. In a system
whose domain complexity lies primarily in its behavior, decomposing the
system around those behaviors makes extending or modifying the behavior
of the system over time to be much faster and easier. However the cost
is that extending or modifying the data of the system can become more
and more difficult over time.

As with most design methods, it comes down to an engineering decision,
which often means you have to guess, and evolve over time. There is no
magic system that will be the right way to model things regardless of
the application. There might even be some subsets of an application
that might be better modeled in a data-centric way, whereas other
sections of the system might be better modeled in a behavior-centric way.
The key thing I've found is to be sensitive to the "thrash" smell, where
you notice that things are becoming more and more difficult to extend or
modify, and be open to refactor the design based on the feedback you're
getting from the system.

### Further references

1) Growing Object Oriented Software Guided By Tests, Steve Freeman, Nat Pryce

2) Object-oriented design: a responsibility-driven approach, R. Wirfs-Brock, B. Wilkerson, OOPSLA '89 Conference proceedings on Object-oriented programming systems, languages and applications

3) The object-oriented brewery: a comparison of two object-oriented development methods, Robert C. Sharble, Samuel S. Cohen, ACM SIGSOFT Software Engineering Notes, Volume 18 Issue 2, April 1993

4) Mock Roles, Not Objects, Steve Freeman, Tim Mackinnon, Nat Pryce, Joe Walnes, OOPSLA '04 Companion to the 19th annual ACM SIGPLAN conference on Object-oriented programming systems, languages, and applications

5) A Laboratory For Teaching Object-Oriented Thinking, Kent Beck, Ward Cunningham, OOPSLA '89 Conference proceedings on Object-oriented programming systems, languages and applications


================================================
FILE: articles/v4/012-tdd-lessons-learned.md
================================================
Test-driven development (TDD) is a topic that never gets old among programmers, 
even though we can hardly agree on what its costs and benefits are. While 
there are no shortage of neatly packaged expert viewpoints on this topic,
very few of them are backed up by solid evidence. Formal research
indicates that TDD does tend to produce code with fewer defects, but no other
significant effects have been found. What these results tell us is that the
burden of proof is on us for everything else we believe about TDD, especially 
when it comes to perceived benefits that have nothing to do with 
regression testing.

Thinking about my own relationship to test-driven development, I came to
realize that my own assumptions about its costs and benefits were 
fuzzy at best. I had plenty of opinions on the topic, but found it hard to 
elaborate on them. As these opinions hardened into beliefs, it became 
much more challenging to meaningfully consider ideas about TDD which
differed from my own. My own cynicism was preventing me from making 
a reasoned argument for my way of doing things. 

Knowing that stubbornness is the enemy of progress, I decided to take 
a fresh look at my use of TDD and how it effects my work. For a period
of 90 days, I challenged myself to practice formal TDD as often as 
possible, and to learn as much about 
it as I could during that time. While the [original plans for my self
study](http://practicingruby.com/articles/28) were much more
rigorous than what I ended up doing, I did maintain a fairly
disciplined TDD workflow throughout the three month period, and that
taught me a few lessons worth sharing.

To make it easier for us to discuss what I've learned, I've decided to 
break the lessons out into individual mini-articles, each with their own comments
thread. Please follow the links below to read them:

* [LESSON 1: End-to-end testing is essential](http://practicingruby.com/articles/66)
* [LESSON 2: Refactoring is not redesign](http://practicingruby.com/articles/67)
* [LESSON 3: Mock objects deeply influence design](http://practicingruby.com/articles/68)
* [LESSON 4: Spiking is not cowboy coding](http://practicingruby.com/articles/69)

If you feel there are other lessons that you have learned from your own work
with TDD, or you have general questions about how my self-study went, feel free
to share them here.


================================================
FILE: articles/v4/012.1-tdd-lessons-learned-lesson-1.md
================================================
> **NOTE:** This is one of [four lessons
> learned](http://practicingruby.com/articles/65) from my 90 day [self-study on
> test-driven development](http://practicingruby.com/articles/28). 
> If this topic interests you, be sure to check out the other lessons!

Perhaps the most significant thing I have noticed about my own TDD habits 
is that I frequently defer end-to-end testing or skip it entirely, and that 
always comes at a huge cost. Now that I have had a chance to watch 
myself get caught in that trap several times, I have a better understanding
of what triggers it.

Most of the time when I work on application development, I start out by 
attempting to treat its delivery mechanism as an implementation detail. 
Thinking this way makes me feel that testing code through the UI 
isn't especially important, provided that I test-drive my domain objects 
and keep their surface as narrow as possible. My first iteration on the
Blind game provides a good example of how I tend to apply this strategy.

My first complete feature in the game was a simple proof of concept: 
I dropped the player into the center of a map, and then allowed them to
move around using the WASD keys on their keyboard. When the player 
reached the edge of the map, the game would play a beeping sound
and then terminate itself. You can check out the [full source for 
this feature](https://github.com/elm-city-craftworks/blind/compare/1f6a...4345)
to see its implementation details, but the important thing to note
is that its delivery mechanism was tiny and almost completely logic-free:

```ruby
Ray.game "Blind" do
  register { add_hook :quit, method(:exit!) }

  scene :main do
    self.frames_per_second = 10

    @game = Blind::Game.new
    @game.on_event(:out_of_bounds) do
      beep = sound("#{File.dirname(__FILE__)}/../data/beep.wav")
      beep.play
      sleep beep.duration

      exit!
    end

    always do
      puts @game.player_position

      @game.move_player(0,-1) if holding?(:w)
      @game.move_player(0,1) if holding?(:s)
      @game.move_player(-1,0) if holding?(:a)
      @game.move_player(1,0) if holding?(:d)
    end
  end

  scenes << :main
end
```

Based on this single code example, it is easy to make the case that end-to-end
testing can be deferred until later, or that perhaps it is not needed at all.
Thinking this way is very tempting, because it frees you from having to think
about how to dig down into the delivery mechanism and run your tests through it.
Already burdened by the idea of writing more tests than I usually do, I was
quick to take that bargain and felt like it was a reasonable tradeoff at the
time.

I couldn't have been more wrong. I encountered my first trivial UI bug 
within 24 hours of shipping the first feature. Several dozen patches 
later when I had a playable game, I had already sunk several hours into 
finding and fixing UI defects that I discovered through manual play testing.
The wheels finally came off the wagon when I realized that I could not
even safely rename methods without playing through the entire game and
triggering each of its edge cases. The following example shows one
of the many "oops" changesets that the projects accumulated in a very short
period of time:

```diff
sounds[:siren].volume = 
-  ((world.distance(world.center_position) - min) / max.to_f) * 100
+  ((world.distance(world.center_point) - min) / max.to_f) * 100
```

By the time I had finally felt the pain of not having any tests running from
end-to-end, the delivery mechanism was no longer a trivial script that could
be scribbled on the back of a napkin. Over the period of a week or so, it had
grown into a [couple hundred lines of code](https://github.com/elm-city-craftworks/blind/tree/776f3462c2244634ccddc22a5473916d6439872c/lib/blind/ui) 
spread across several significant features. The surface of
the domain model also needed to expand to support these new
features, and so the critical path through the system became difficult to 
keep in mind while working on the codebase. This made it much harder
to introduce a change without accidentally breaking something. 

Fed up with chasing down trivial bugs and spending so much time on manual
testing, I finally decided that I needed to implement a player simulator 
which would allow me write tests similar to the one shown below:

```ruby
it "should result in a loss on entering deep space" do
  world  = Blind::Worlds.original(0)
  levels = [Blind::Level.new(world, "test")]

  game  = Blind::UI::GamePresenter.new(levels)

  sim   = Blind::UI::Simulator.new(game)

  sim.move(500, 500)
  
  sim.status.must_equal "You drifted off into deep space! YOU LOSE!"
end
```

As predicted, the `Blind::UI::Simulator` object was [not especially easy to
implement](https://github.com/elm-city-craftworks/blind/blob/2fa2d75216077bdafa556be3c560b3f7c205e672/lib/blind/ui/simulator.rb). 
To get it to work, I had to experiment with several undocumented features in the Ray
framework and cobble them together through a messy trial and error process. This
reminded me of previous projects I had worked on where I had to do the same
thing to introduce end-to-end tests in Rails applications, and is quite possibly
one of my least favorite programming tasks; all this work just feels so
tangential to the task at hand.

Still, it is hard to argue with results. After introducing this little simulator
object, the number of trivial errors I introduced into the system rapidly
declined, even though I was still actively changing things. Occasionally, I'd
make a change which broke the simulator in weird and confusing ways, but all the
time spent working on those issues was less than the total time I spent chasing
down dumb mistakes before making this change. 

As I continued on with my study, I experienced similar situations with both a 
Sinatra application and a command line application, and that is when I realized
that you simply can't get away from paying this tax one way or another. If
nothing else, working on acceptance tests first helps balance out the illusion
of progress in the early stages of a project, and makes it easier to sustain
an even pace of development over time.

At the end of my study, I read the first few chapters of [Growing Object
Oriented Software, Guided by Tests](http://www.growing-object-oriented-software.com/), 
and it gave similar advice to what I had found out the hard way. The authors
presented a somewhat more radical idea about how to build application runners, 
suggesting that they should completely hide the implementation details of the 
underlying application and its delivery mechanism. To try out these ideas, 
I built a small [tic-tac-toe game](https://github.com/elm-city-craftworks/ruby-examples/tree/master/tic_tac_toe) 
using Ray, writing my first end-to-end test before writing any other code: 

```ruby
describe "A tic tac toe game" do
  it "alternates between X and O" do
    run_game do |runner|
      runner.message.must_equal("It's your turn, X")
      runner.move(5)
      runner.message.must_equal("It's your turn, O")
      runner.move(3)
      runner.message.must_equal("It's your turn, X")
    end
  end

  def run_game(&block)
    GameRunner.run(&block)
  end
end
```

Because this test does all of its work through the
[GameRunner
object](https://github.com/elm-city-craftworks/ruby-examples/blob/master/tic_tac_toe/test/helpers/game_runner.rb),
it is both easier to read and more maintainable than the tests that I built for
Blind. Furthermore, I feel like it is much easier write test-first code this
way, as it doesn't require as many decisions to be made up front.

I've been talking about a rather weird domain throughout this article (game
programming in Ray), but I could easily imagine how I might apply what I've
learned to a traditional Rails application. For example, if I were to build a
blog and wanted to write my first test for it, I might start with something like
this:

```ruby
describe "A post" do
  let(:blogger) { SimulatedBlogger.new }

  it "can be created by a logged in blogger" do
    blogger.log_in("user", "password")
    blogger.create_post("Hello World")
  end

  it "cannot be created by a blogger that has not logged in" do
    assert_raises(SimulatedBlogger::AccessDeniedError) do
      blogger.create_post("Hello World")
    end
  end
end
```

I would then move on to implement the `SimulatedBlogger` using something like
capybara or some other web automation tool. On the surface, this at least 
seems like a good idea; in practice it may be more trouble than it's worth for a number 
of reasons.

Since I'm still relatively new to end-to-end testing in general, I am definitely
curious to hear what you think of these ideas. This article summarizes what
I learned from my experiences during my study, but I am not yet confident in my 
own applications of these techniques. If you have an interesting story to share, 
please do so!


================================================
FILE: articles/v4/012.2-tdd-lessons-learned-lesson-2.md
================================================
> **NOTE**: This is one of [four lessons learned](http://practicingruby.com/articles/65) 
from my [90 day self-study on test-driven development](http://practicingruby.com/articles/28). 
If this topic interests you,  be sure to check out the other lessons! 

To maintain a productive TDD workflow, you need understand the difference
between **refactoring** and **redesign**. These two activities are distinct from one
another, but because they are often done in lockstep, it can be 
challenging to mentally separate them. 

The problem I noticed in my own work is that seemingly simple changes
often spiral into much more complex modifications. Whenever that happens,
it is easy to make bad decisions that can cause progress to grind to a halt. 
Having a good way to distinguish between what can be accomplished
via simple refactorings and what requires careful design consideration 
seems to be the key to preventing this problem.

My hope is that by reading what I have learned from my own experiences,
you will be able to avoid some of these obstacles along your own path. 
These lessons are not fun to learn the hard way!

### What is refactoring?

Refactoring in the traditional sense has to do with making [small and safe 
transformations](http://refactoring.com/catalog/index.html) to a codebase 
without altering its external behavior. Because refactorings are designed to be atomic 
and almost trivial, you can apply them whenever you feel that they will make life
 easier for you down the road. For example, it is rarely a bad idea to clean up messy
 code by introducing a couple helper methods:
 
```diff
       def belongs_to(parent, params)
-        mapper.record_class.send(:define_method, parent) do
-          Object.const_get(params[:class]).find(send(params[:key]))
+        define_association(parent) do
+          BrokenRecord.string_to_constant(params[:class])
+                      .find(send(params[:key]))
         end
       end
 
       def has_many(children, params)
         table_primary_key = mapper.primary_key
 
-        mapper.record_class.send(:define_method, children) do
-          Object.const_get(params[:class])
-                .where(params[:key] => send(table_primary_key))
+        define_association(children) do
+          BrokenRecord.string_to_constant(params[:class])
+            .where(params[:key] => send(table_primary_key))
         end
       end
 ```
 
On the surface, this change is very superficial, as a proper refactoring ought
to be. However, it has several immediate advantages worth pointing out:

* The `define_association` helper makes the code reveal its 
intentions much more clearly by hiding some awkward metaprogramming.

* The `BrokenRecord.string_to_constant` method makes it easy
to extend this code so that it handles fully qualified constant names 
(i.e. `SomeProject::Person`), without the need to add a bunch of extra 
noise in multiple places.

* Both helper methods cut down on duplication, eliminating the connascence
of algorithm that was present in the original code.

* Both helper methods reduce the amount of implementation details that
the `belongs_to` and `has_many` methods need to be directly aware of,
making them more adaptive to future changes.

The important thing to notice here is that while making this change opens
a lot of doors for us, and has some immediate tangible benefits, it does 
not introduce any observable functional changes, both from the external
perspective, and from the perspective of the object's collaborators.

### What is redesign?

While the concept of refactoring is easy to define and categorize, the
process of redesigning code is not nearly as straightforward. Rather
than attempting to provide an awkard definition for it, I will
demonstrate what makes redesign different from refactoring by
showing you a real example from my study.

When working on BrokenRecord (my toy implementation of 
the Active Record pattern), I initially designed it so that a 
single object was responsible for running queries against
the database and mapping their results to user-defined 
models. This worked fine as a proof of concept, and the
[code was pretty easy to follow](https://github.com/elm-city-craftworks/broken_record/blob/e5bd9fb676361b97c9c27d46efd812b826eecbf6/lib/broken_record/table.rb).

However, designing things this way lead to very high
coupling between the query API and the underlying
database implementation, as you can see in the following
code:

```ruby
module BrokenRecord
  class Table
  
   #...

    def create(params)
      escapes = params.count.times.map { "?" }.join(", ") 
      fields  = params.keys.join(", ")

      BrokenRecord.database.execute(
        "insert into #{@table_name} (#{fields}) values (#{escapes})",
        params.values
      )
    end

    def find(id)
      BrokenRecord.database
                  .execute("select * from #{@table_name} where id = ?", [id])
                  .map { |r| @row_builder.build_row(self, r) }.first
    end
  end
end
```

Even though I had no intentions of making BrokenRecord into a 
library that could be used for practical applications, this design was
fundamentally inconsistent with what it means to be an 
object-relational mapper. The lack of abstraction made any sort
of query optimization impossible, and also prevented the 
possibility of introducing support for multiple database backends.

In addition to these concerns about future extensibility, the current
design made it much harder to test this code, and much harder
to do some common queries without directly hijacking the global 
reference to the underlying database adapter. All these things 
combined meant that a redesign was clearly in order.

Taking a first glance at the implementation of 
[BrokenRecord::Table](https://github.com/elm-city-craftworks/broken_record/blob/e5bd9fb676361b97c9c27d46efd812b826eecbf6/lib/broken_record/table.rb),
it was tempting to think that all that was needed here was to [extract
a class](http://refactoring.com/catalog/extractClass.html) to encapsulate the 
database interactions. But because this object had come into existence as
a result of a [broad-based integration test](https://github.com/elm-city-craftworks/broken_record/blob/e5bd9fb676361b97c9c27d46efd812b826eecbf6/test/integration.rb)
 rather than a series of focused unit tests, I was hesitant to perform an extraction 
 without writing a few more tests first.
 
 Thinking about the problem a little more, I noticed that the changes I wanted
 were deeper than just putting together an internal object to hide
 some implementation details and reduce coupling. The fact that `Table` was
 the best name I could think of for my extracted object even though that was
 the name of the original class was a sign that I was in the process of 
 changing some responsibilities in the system, not just grouping related
 bits of functionality together.
 
### Taking a TDD-friendly approach to redesign

The mistake I've made in the past when it comes to redesigning internal
objects is that I tended to make my changes recursively, often without
introducing new tests as I went. So for example, I might take a helper
object that had gotten too complex and break it into two objects, testing
both objects only indirectly through some higher level test. That kind of
change would often reveal to me that I wanted to extract even more classes
or methods, or possibly even change the protocols between the low-level 
collaborators in the system. 

Sooner or later, I would end up with a complicated web of internal objects 
that were all being tested through a single use case at the high level, 
and so any defects I introduced became very hard to track down. Even though
my tests were protecting external defects from creeping into the system, 
I had negated the design and debugging benefits that come along with doing
TDD more rigorously.

After [discussing this bad habit of mine with Eric Hodel](https://github.com/mendicant/mendicantuniversity.org/wiki/TDD-study-session-%282012-May-03%29)
during one of Mendicant University's study sessions, I came to realize that 
there are some simple ways to sidestep this problem. In particular, I realized 
that I could redesign systems by introducing new components from the bottom 
up, cutting over to the new implementation only when it was ready to be integrated.

Wanting to try out these new ideas in BrokenRecord, I started out by renaming
the `BrokenRecord::Table` object to `BrokenRecord::RecordTable`. I put virtually
no thought into the new name, because what I was really trying to do was free
up the `BrokenRecord::Table` name so that I could completely change the
responsibilities associated with it. This allowed me to experience a similar
amount of freedom that simply deleting the original class would have given
me, but without the cost of having to work through a bunch of orphaned
references and broken tests in my system.

I drove the new `BrokenRecord::Table` object test first, mostly mirroring the
ideas from the original object but sticking strictly to the interactions with
the database and representing records as simple Hash objects. I also
added a new feature which provided information about the columns
in a given table. You can get a rough idea for how I sketched out that
feature by checking out the following test:

```ruby
  it "must be able to retrieve column information" do
    columns = table.columns

    columns.count.must_equal(3)

    columns[:id][:type].must_equal("integer")
    columns[:title][:type].must_equal("text")
    columns[:body][:type].must_equal("text")
  end
```

The original `BrokenRecord::Table` object was just a first iteration spike,
and so it expected that all model objects explicitly defined what fields 
were in the tables they mapped to. This helped keep the implementation
simple, which was essential when the class was taking on two 
responsibilities at once. However, in the new `BrokenRecord::Table` 
object, this kind of low level database interaction looked perfectly at
home, and paved the way for removing the tedious `BrokenRecord::RowMapper`
object in the newly designed system.

Throughout the process of building better internals from the bottom
up, I was able to make these kinds of revisions to several objects, and
also introduced a couple more internal objects to help out with various
things. Sooner or later, I reached the point where I was ready to create
an object that could serve as a drop-in replacement for the original
`BrokenRecord::Table` object (the one I renamed `RecordTable`). 
Feeling like I might actually keep this new object around for a while, 
I decided to name it `TableMapper`, which at least sounded slightly
less horrible than `RecordTable`. Its methods ended up looking 
something this:

 
 ```ruby
module BrokenRecord
  class TableMapper

   # ...
    
    def create(params)
      id = @table.insert(params)    
    
      find(id)
    end

    def find(id)
      fields = @table.where(:id => id).first

      return nil unless fields

      @record_class.new(:table => @table,
                        :fields => fields,
                        :key    => id)
    end
  end
end
```

Functionality-wise, the newly created `BrokenRecord::TableMapper` was nearly a
drop in replacement for the original system, even though it had a much better
underlying design. Because it only needed to implement a handful of methods
to maintain API compatibility, integrating it went very smoothly, and required
almost no changes to the original top-level tests. Once I cut things over
and had all the tests passing, I was able to completely remove the
`BrokenRecord::RecordTable` object without any issues.

### Reflections

If I had not taken this more disciplined approach and instead followed my old
ways, I probably would have ended up in about the same place design-wise, but
it would have come at a much higher cost. I would have had fewer tests,
spent more time debugging trivial errors, and probably would have cut corners
in places out of impatience or frustration. The overall codebase would have
still been quite brittle, and future changes would be harder to make rather
than easier. Taking that less disciplined approach might have allowed me
to implement this particular set of changes a little faster, but my past 
experiences have taught me that I always end up having to pay down
my techinical debt sooner or later.

By teaching myself to think of refactoring and redesign as distinct activities,
I am much more likely to stop myself from going on long exploratory cleanup
missions with little guidance from my tests. This has already made a big
difference in my own work, so I'd definitely recommend giving it a try.

If you have questions, or a story of your own to share, please leave me a comment!


================================================
FILE: articles/v4/012.3-tdd-lessons-learned-lesson-3.md
================================================
> **NOTE:** This is one of [four lessons
> learned](http://practicingruby.com/articles/65) from my 90 day [self-study on
> test-driven development](http://practicingruby.com/articles/28). 
> If this topic interests you, be sure to check out the other lessons!

Before this study, I knew that I [rarely used mock objects](http://practicingruby.com/articles/49) 
in my tests, but I didn't clearly understand why that was the case. When asked to explain my 
preferences, I typically would offer some vague argument about keeping things
simple, and then go on to complain about test brittleness. Because I knew many
other people who shared the same view, I assumed my line of reasoning was 
mostly coherent. This left me with no desire to dig any deeper than what my own 
experiences had taught me.

After years of somewhat blissful ignorance, I finally started to second guess
myself after watching Greg Moeck's excellent talk at RubyConf 2011, which was
aptly named [Why You Don't Get Mock Objects](http://www.confreaks.com/videos/659-rubyconf2011-why-you-don-t-get-mock-objects). 
This talk pointed out that the reason why most Rubyists tend to dislike mock
objects is because they try to shoehorn them into existing workflows rather 
than adopting the form of TDD that mocks are meant to promote. I remember being
easily convinced by this talk when I first watched it, but because old habits
die hard, I never ended up changing my way of doing things.

Throughout the entire 90 day period of my study, I found myself [using mock
objects only
once](https://github.com/elm-city-craftworks/broken_record/blob/5c9287e0c6d8211c4a91aee43b26181dfbcc1992/test/record_test.rb), 
even though I had thought about using them in many places. Towards the end, I realized that I
still didn't quite understand how mocks were meant to be used, and so I
decided to study them properly. This inevitably lead me to the excellent [Mock
Roles, Not Objects](http://www.jmock.org/oopsla2004.pdf) paper, which was written in 
2004 by the developers who had pioneered the concept of mock-based testing. In
addition to being a solid introduction to the topic in general, the paper lays
out a number of practical guidelines for avoiding the common problems that can
arise from using mocks incorrectly. In particular, the authors proposed the
following rules:

* Only mock types you own.
* Don't use getters.
* Be explicit about what should not happen.
* Specify as little as possible in a test.
* Don't use mocks to test boundary objects.
* Don't add behavior to mocks.
* Only mock your immediate neighbors.
* Don't create too many mocks.
* Inject all dependencies.

By programming in this style, the promise is that the benefits of mock objects
will be maximized and their drawbacks minimized. The interesting thing is that
while several of these heuristics are meant to improve the testability of code,
nearly as many have a direct influence on software design in general. Taken
together, the following four points strongly favor [responsibility-centric
design](http://practicingruby.com/articles/64):

* Don't use getters.
* Only mock your immediate neighbors.
* Don't create too many mocks.
* Inject all dependencies.

These guidelines will almost certainly lead to code that is more testable, 
and should also lead to code that is easier to change. If you think about
these heuristics a little bit, you'll find they conveniently map onto
the following software design principles:

* [Tell, don't ask](http://robots.thoughtbot.com/post/27572137956/tell-dont-ask)
* [The law of Demeter](http://en.wikipedia.org/wiki/Law_of_Demeter)
* [Single responsibility](http://en.wikipedia.org/wiki/Single_responsibility_principle)
* [Dependency inversion](http://en.wikipedia.org/wiki/Dependency_inversion_principle)

Testing a codebase via mock objects is easy when these design principles are
followed, and challenging when they are not. In that sense, mock objects can be
used as a smoke test for the overall design of a project, which is useful in its
own right. However, most mockists claim that the technique actually inspires 
better design, rather than simply helping you find areas in your code that
suffer from bad design. This is a much broader statement, and isn't nearly as
obvious to those who have not had this experience themselves.

Because I used mock objects so infrequently during my study, I am unable to tell
you whether or not they can actually help improve software design. However, now
that I have a clearer sense of what my own workflow is like, I understand why I
have had so few opportunities to make good use of mock objects. It all boils
down to the fact that I don't practice disciplined outside-in design.

The way I tend to approach design is to choose a very small vertical slice of
functionality and develop an imaginary example of how I expect that feature to
work. This technique is consistent with the outside-in way of doing things, 
but my next steps bring me in a completely different direction. Rather than
starting with my interface and then using mock objects to allow me to discover
collaborators iteratively until I reach the lowest-level objects in my system, 
I build things bottom up instead.

Taking a look back at the projects I worked on during this study, I was able to
see this trend in action. For example, in BrokenRecord, my first test
was for a struct-like object that would be used for storing field
data:

```ruby
describe BrokenRecord::Row do
  it "must create readers for all attributes" do
    row = BrokenRecord::Row.new(:a => 1, :b => 2)

    row.a.must_equal(1)
    row.b.must_equal(2)
  end
end
```

Similarly, when I was working on the Blind game, my first test was for a `Map` object
that allowed you to place named objects at specific coordinates:

```ruby
describe Blind::Map do
  it "must be able to store elements at a position" do
    map = Blind::Map.new
    map.add_object("player", 10, 25)

    pos = map.locate("player")
    [pos.x, pos.y].must_equal([10,25])
  end
end
```

Even though each of these objects were designed with a single external feature
in mind, they are clearly boundary objects; concrete implementation code with 
no collaborators within the system. As I built on top of them, I found no
need for mocks, because using these objects directly was easy enough to do. The
benefit of building things this way is that you can think in terms of concrete
objects at all times, but that is also the drawback: you can't use mock objects
to discover the protocols of your collaborators if those details have already
been locked down. I don't know enough about mock-based TDD to know
whether this is a trade worth making, but this does explain to me why I've
failed to experience some of its benefits.

After I realized that I haven't been working in a way that would support the
effective use of mock objects, I took an interest in figuring out what kind of
workflow mockists tend to follow. Digging back to one of my [favorite articles on
mock objects](http://martinfowler.com/articles/mocksArentStubs.html), I found that 
this is what Martin Fowler had to say:

> Mock objects came out of the XP community, and one of the principal features of XP is its emphasis on Test Driven Development - where a system design is evolved through iteration driven by writing tests.

> Thus it's no surprise that the mockists particularly talk about the effect of mockist testing on a design. In particular they advocate a style called need-driven development. With this style you begin developing a story by writing your first test for the outside of your system, making some interface object your SUT. By thinking through the expectations upon the collaborators, you explore the interaction between the SUT and its neighbors - effectively designing the outbound interface of the SUT.

> Once you have your first test running, the expectations on the mocks provide a specification for the next step and a starting point for the tests. You turn each expectation into a test on a collaborator and repeat the process working your way into the system one SUT at a time. This style is also referred to as outside-in, which is a very descriptive name for it. It works well with layered systems. You first start by programming the UI using mock layers underneath. Then you write tests for the lower layer, gradually stepping through the system one layer at a time. This is a very structured and controlled approach, one that many people believe is helpful to guide newcomers to OO and TDD.

Based on what I learned about mock objects, this style of development does
appear to be a natural way of developing responsibility-centric code that abides
by all the guidelines laid out in the [Mock Roles, Not
Objects](http://www.jmock.org/oopsla2004.pdf) paper. While it sounds intriguing
to me and worth trying out, I doubt that I am smart enough to apply this
style of development effectively. The reason I tend to use a divide-and-conquer, 
think-in-concrete-objects strategy in my projects is that I don't have much
faith in my own abilities to understand the current and future relations 
between my objects. In other words, the disciplined outside-in approach seems 
to require more design confidence than what I typically am able to muster up. 

To make matters worse, I have not yet come across an example that clearly shows how this
technique can be applied throughout an entire project. I think that in addition
to my own experimentation, I'll need to see something like that for these ideas
to finally click. If you know of a source of good large-scale examples of these
techniques, please let me know!

To sum up the overall point of this lesson: mock objects facilitate
a particular design style, and if you're not using that approach in
your projects, you probably will not experience their benefits. I'd love to hear
your thoughts on that conclusion, whether or not you agree with it; I clearly
have a lot more to learn in this area.


================================================
FILE: articles/v4/012.4-tdd-lessons-learned-lesson-4.md
================================================
> **NOTE:** This is one of [four lessons
> learned](http://practicingruby.com/articles/65) from my 90 day [self-study on
> test-driven development](http://practicingruby.com/articles/28). 
> If this topic interests you, be sure to check out the other lessons!

When used in moderation, experimental spikes can be a very powerful tool for
shining some light into the dark corners of your projects. However, there is
a natural tension between chaotic spiking and formal TDD practices 
that needs to be balanced if you want to use the two techniques 
side by side. Equalizing these forces can be very challenging, and
it is something that I have struggled with throughout my career.

Because I started programming as a self-taught hobbyist, I spent many years
writing code without a well defined process. As I started to work
on larger and more important projects, I learned how to program in a
more disciplined way. I developed an interest in object-oriented design 
and also picked up the basics of test-driven development. These methodologies
helped me work in a more controlled fashion when I needed to, but they did not do 
much to change my everyday coding habits. I still relied on lots of messy
experimentation; I just knew how to clean up my code so that I didn't
end up shipping sloppy work in the end.

While I have managed to be very productive over the years, my day to day 
efficiency has been very unpredictable because of the way that I do things. This
is something I have been aware of for some time, and was one of the 
main problems that I wanted to take a closer look at during 
this study. With that in mind, I will now walk you through three 
examples of where I broke away from TDD to try out some experiments and
then share my thoughts on what worked and what didn't work.

### Exploring the unknown

I knew when I started working on [Blind](https://github.com/elm-city-craftworks/blind) 
that I would need to learn how to do two
things with the [Ray](http://mon-ouie.github.com/projects/ray.html) game engine
that I hadn't done before: work with positional 
audio, and write tests against the UI layer. I knew that these things were
supported by Ray because the documentation had examples for them, but I needed
to convince myself that they would work in practice by building a small proof of
concept.

Rather than trying to build realistic examples that matched how I would end up
using these features, I instead focused on their most basic prerequisites. For
example, I knew that I'd never be able to have dynamically positioned sound 
emitters in a three-dimensional space if I couldn't play a simple beeping 
sound without any positioning at all. I also saw from the documentation that
in order to write tests against Ray it was necessary to use its class-based API
rather than using its fancy DSL. Combining those two ideas together lead me to
build the following (almost trivial) spike solution:

```ruby
require "ray"

class MainScene < Ray::Scene
  scene_name :main

  def setup
    @sound = sound("beep.wav")
  end

  def register
    always do
      @sound.play
      sleep @sound.duration
    end
  end

  def render(win)
  end
end

class Game < Ray::Game
  def initialize
    super "Awesome Game"

    MainScene.bind(self)

    scenes << :main
  end
end

Game.new.run
```

While this code was little more than the end result of mixing a
couple examples from Ray's documentation together, it helped me verify 
that there weren't any problems playing sounds on my system, and that
the documentation I was reading was up to date.

Coincidentally, this tiny script helped me notice that my wife's
laptop was missing the core audio dependencies that Ray needed; 
which is a perfect example of what this kind of spike is made to test. It also
gave me an opportunity to answer some questions that the documentation
didn't make clear to me. For example, removing the `sleep` call made me realize
that playing a sound was a non-blocking operation, and deleting the `render`
method made me realize that it only needed to be provided if it was doing
something useful. In a fairly complex and immature project like Ray, this
kind of trial-and-error based investigation is often a faster way to 
find answers than digging through source code.

I was actually very happy with the outcomes from this spike, and the effort I
put into it was minimal compared to what I got out of it. While I can't say
the same for the other experiments I am about to show you, this little script
serves as a nice example of spiking done right.

### Trying out a new design

Mid-way through working on Blind, I decided to completely change the way I
was modeling things. All elements in the game were originally modeled as 
rectangles, but as I tweaked the game rules, I started to realize that all I
really cared about was point-to-point distance between the player and various
locations in the world. The hoops I was having to jump through to work with
rectangular game elements eventually got annoying enough that I decided to try
out my new ideas on an experimental branch.

I started working on this redesign from the bottom up, test-driving a couple
supporting objects that I knew I'd need, including a very boring `Point` class.
Despite the smooth start, it eventually became clear to me that this approach 
would only take me so far: the original `Game` class was tightly 
coupled to a particular representation of Blind's world. To make matters 
worse, the UI code I had written was a messy prototype 
that I hadn't cleaned up or tested properly yet. These issues 
left me stuck between a rock and a hard place.

I had already sunk a lot of time into building the new object model, but didn't
want to keep investing in it without being reasonably sure that it was the right
way to go. To build up my confidence, I decided to do a quick spike to transform
the old UI into something that could work on top of the new object model.

Within an hour or two, I had a working game running on top of the new codebase. 
I made several minor changes and added a couple new features
to various objects in the process of doing so, without writing any tests for
them. I originally assumed that I didn't need to write tests because I expected
to throw all this code away, but after wrapping up my experiment I decided that
the code was good enough to merge could be easily cleaned up later. This decision
eventually came back to haunt me.

Over the next several days, I ran into small bugs in various edge case
scenarios in the code that had been implemented during the spike. For example,
the randomized positioning of mines and exit locations had not been rewritten to
account for the fact that the game no longer defined regions as rectangles, and
that would occasionally cause them to spawn in the wrong regions. The following
patch was required to fix that problem:

```diff
       @current_position = Blind::Point.new(0,0)
 
       @mine_positions   = mine_count.times.map do
-        Blind::Point.new(rand(MINE_FIELD_RANGE), rand(MINE_FIELD_RANGE))
+        random_minefield_position
       end
 
-      @exit_position = 
-        Blind::Point.new(rand(MINE_FIELD_RANGE), rand(MINE_FIELD_RANGE))
+      @exit_position = random_minefield_position
     end
 
     attr_reader :current_position, :mine_positions, :exit_position
@@ -42,5 +41,15 @@ def current_region
         :outer_rim
       end
     end
+
+    private
+    
+    def random_minefield_position
+      begin 
+        point = Blind::Point.new(rand(MINE_FIELD_RANGE), rand(MINE_FIELD_RANGE))
+      end until MINE_FIELD_RANGE.include?(@center.distance(point))
+
+      point
+    end
   end
 end
```

Similarly, whenever I wanted to refactor some code to introduce a change or
extend functionality in some way, I needed to write tests to fill the coverage gaps 
that were introduced during my experiment. This lead to a temporary but
sharp rise in the cost of change, and that caused my morale to plummet.

Looking back on what happened, I think the problem was not that I created an
experimental branch with some untested code on it, but that I decided to keep
that code rather than throwing it out and starting fresh. Wiring up my new data
model to the UI and seeing a playable game come out of it was a huge confidence
booster, and it only cost me a couple hours to get to that point. But because I
decided to merge that code into master, I inherited several more hours of
unpredictable maintenance work that might have been avoided if I had redone the
work in a more disciplined way.

### Sketching out an idea

About mid-way through my study, I had an idea for a project that I knew I
wouldn't have time for right away: an abstract interface for describing vector
drawings. However, because I couldn't stop thinking about the problem, I decided
I needed to make a simple prototype to satisfy my curiosity. An entire evening
of hacking got me to the point where I was able to generate the following image
in PDF format using [Prawn](https://github.com/prawnpdf/prawn):

![](http://i.imgur.com/OW5gT.png)

The basic idea of my abstract interface was that rather than making direct calls
to Prawn's APIs, you could instead describe your diagrams in a general way, such
as in the following example:

```ruby
drawing = Vellum::Drawing.new(300,400)

drawing.layer(:box) do |g|
  g.rect(g.top_left, g.width, g.height)
end

drawing.layer(:x) do |g|
  g.line(g.top_left,  g.bottom_right)
   .line(g.top_right, g.bottom_left)
end

drawing.layer(:cross) do |g|
  g.line([g.width / 2, 0], [g.width / 2, g.height])
   .line([0, g.height / 2], [g.width, g.height/2])
end

drawing.style(:x,     :stroke_color => "ff0000") 

drawing.style(:box,   :line_width   => 2, 
                      :fill_color   => "ffffcc")

drawing.style(:cross, :stroke_color => "00ff00")
```

A `Vellum::Renderer` object would then be used to turn this abstract
representation into output in a particular format, using some simple 
callbacks. A Prawn-based implementation is shown below:

```ruby
require "prawn"

pdf      = Prawn::Document.new
renderer = Vellum::Renderer.new

renderer.on(Object) do |shape, style|
  pdf.stroke_color = style.fetch(:stroke_color, "000000")
  pdf.fill_color   = style.fetch(:fill_color, "ffffff")
  pdf.line_width   = style.fetch(:line_width, 1)
end

renderer.on(Vellum::Line) do |shape, style|
  pdf.stroke_line(shape.p1, shape.p2)
end

renderer.on(Vellum::Rectangle) do |shape, style|
  pdf.fill_and_stroke_rectangle(shape.point, shape.width, shape.height)
end

renderer.render(drawing)

pdf.render_file("foo.pdf")
```

Looking back on this code, I'm still excited by the basic idea, because it
would make it possible for backend-agnostic graphics code to be written, and
would allow for more than a few interesting manipulations of the abstract
structures prior to rendering. However, I can't help but think that for a
throwaway prototype, there is far too much detail here.

If you take a closer look at [how I actually implemented Vellum](https://gist.github.com/2732815), 
you'll find that I shoved together 
several classes into a single file, which I stowed away on a gist. I never
bothered to record the history of my experiment, which I assume was actually
built up incrementally rather than designed all at once. Without a single test
to guide me, I would need to study the implementation code all over again if I
wanted to begin to understand what I had actually learned from my experiment.

While it is hard to say whether this little prototype was worth the effort or
not, it underscores a bad habit of mine that bites me from time to time: I can
easily get excited about an idea and then dive into it with reckless abandon. In
this particular situation, I ended up with some working code at the end of
my wild hacking session, but there were several other ideas I worked on during
my study that I ended up getting nowhere with.

### What makes spiking different from cowboy coding?

The main thing I learned from taking a look at how I work on experimental ideas
is that there is a big difference between spiking and cowboy coding.

When you are truly working on a spike, you have a specific question in mind that 
you want to answer, you know roughly how much you're willing to invest in 
finding out that answer, and you cut as many corners as possible to get that
answer as quickly as possible. The success of a spike is measured by what you
learn, not what code you produce. Once you feel that you understand what
needs to be done, you pull yourself out of spike mode and return to your 
more disciplined way of doing things.

Cowboy coding, on the other hand, is primarily driven by gut feelings, past
experiences, and on-the-spot decision making. This kind of programming can be
fun because it allows you to write code quickly without thinking deeply about
its consequences, but in most circumstances, you end up needing to pay for your
lack of discipline somewhere down the line.

Of the three examples I discussed in this article, the first one looks and feels
like a true spike, and the third one is the result of straight-up guns-blazing
cowboy coding. The second example lies somewhere between those two extremes, and
perhaps represents a spike that turned into a cowboy coding session. I think
scenarios like that are what we really need to look out for, because it is
very easy to drop our good practices but much harder to return to them.

Now that I've laid this all out on the line for you, I'd love to hear some
of your own stories! Please leave a comment if you have an interesting
experience to share, or if you have any questions for me.

> **NOTE:** While doing some research for this article, I stumbled across a nice excerpt
from "The Art of Agile Development" which describes [how to safely make use
of spike solutions](http://jamesshore.com/Agile-Book/spike_solutions.html). It's
definitely worth checking out if you're interested in studying this topic more.


================================================
FILE: articles/v4/README.md
================================================
These articles are from Practicing Ruby's fourth volume, which ran from
April 2012 to July 2012. The manuscripts in this folder correspond to the
following articles on practicingruby.com:

* [Issue 4.1: What are the costs and benefits of TDD?](https://practicingruby.com/articles/shared/pbflvfoiawak) (2012.04.10)
* [Issue 4.2: From prototype to minimal product](https://practicingruby.com/articles/shared/rvzglzyshhuu) (2012.04.18)
* (Issue 4.3 was cancelled due to the birth of my son)
* [Issue 4.4: Tricks for working with text and files](https://practicingruby.com/articles/shared/zmkztdzucsgv) (2012.05.10)
* [Issue 4.5: Solving the "Scoring Predictions" kata](https://practicingruby.com/articles/shared/zpahykypgpvv) (2012.05.15, by James Edward Gray II)
* [Issue 4.6: Persisting relations in a polyglot world](https://practicingruby.com/articles/shared/spminlhmvvhr) (2012.05.21, by Piotr Szotkowski)
* [Issue 4.7: Confident  Ruby](https://practicingruby.com/articles/shared/zsnvgupzifil) (2012.06.05, by Avdi Grimm)
* [Issue 4.8: Implementing the Active Record pattern, part 1](https://practicingruby.com/articles/shared/cpqewwhqoaeq) (2012.06.12)
* [Issue 4.9: The hidden costs of inheritance](https://practicingruby.com/articles/shared/goiwglvezuip) (2012.06.19)
* [Issue 4.10: Implementing the Active Record pattern, part 2](https://practicingruby.com/articles/shared/ucqsaohjxddv) (2012.07.03)
* [Issue 4.11: Responsibility-centric vs. data-centric design](https://practicingruby.com/articles/shared/lrwkumltjnxr) (2012.07.10, by Greg Moeck)
* [Issue 4.12.1: (TDD Study) End-to-end testing is essential](https://practicingruby.com/articles/shared/bgtfdvbtvdnl) (2012.07.18)
* [Issue 4.12.2: (TDD Study) Refactoring is not redesign](https://practicingruby.com/articles/shared/fdaikyllpsya) (2012.07.19)
* [Issue 4.12.3: (TDD Study) Mock objects deeply influence design](https://practicingruby.com/articles/shared/hpeujsdoiehq) (2012.07.24)
* [Issue 4.12.4: (TDD Study) Spiking is not cowboy coding](https://practicingruby.com/articles/shared/nirmcxfrrpgx) (2012.07.26)

If you enjoy what you read here, please subscribe to [Practicing Ruby](http://practicingruby.com). These articles would not exist without the support of our paid subscribers.


================================================
FILE: articles/v5/001-statistical-modeling.md
================================================
> This issue was a collaboration with my wife, Jia Wu. Jia is an associate
> scientist at the Yale Child Study Center, where she spends a good portion of
> her time analyzing brainwave data from various EEG experiments. Although
> this article focuses on very basic concepts, her background in 
> statistical programming was very helpful whenever I got stuck on 
> something. That said, if you find any mistakes in this article, you
> can blame me, not her.

Statistics and programming go hand in hand, but the kinds of problems we tend to
work on in Ruby make it easy to overlook this point. If your work does not 
involve a lot of data analysis, you may not feel much pain even if you have a very 
limited math background. However, as our world becomes increasingly data-driven, 
a working knowledge of statistics can really come in handy.

In this article, I will walk you through a simple example of how you can use 
two very basic statistical methods (correlation + significance testing) to 
explore your own questions about the patterns you notice in the world. 
Although we won't dig too deeply into underlying math involved in these
concepts, I will try to provide you with enough background information to 
start trying out your own experiments even if you have never formally 
studied statistics before.

The example that I'll share with you explores the connection between
the economic strength and population of nations and their performance in 
recent Olympic games. In order to interpret the (rudimentary) analysis I did, 
you'll need to understand what a correlation coefficient is, and what it means 
for a result to be statistically significant. If you are familiar with those 
concepts, feel free to skim or skip the next two sections. Otherwise, just read 
on, and I'll do my best to fill you in on what you need to know.

### Measuring the strength of relationships between datasets

Put simply, correlation measures the dependency relationship between two
datasets. When two datasets are fully dependent on each other, there
exists a pattern which can be used to predict the elements in either set
based on the elements in the other. When datasets are completely independent 
from one another, it is impossible to come up with a mapping between 
them that describes their relationship any better than a completely 
randomized mapping would. Virtually all real world datasets that are not 
generated from purely mathematical models fall somewhere between these 
two extremes, and that means that in practice correlation needs to be 
treated as continuum rather than a boolean property. This relative dependency
relationship between datasets is typically represented by a correlation coefficient.

Correlation coefficients can be computed in a number of ways, but the most
common and straightforward way of doing so is by establishing a trend line
and then calculating how closely the data fits that line on average. 
This measure is called the [Pearson correlation coefficient](http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient), 
and is denoted by the variable `r`.

When two datasets are perfectly linearly correlated, the mapping
between them is perfectly described by a straight line. However, when no 
correlation exists, there will be no meaningful linear pattern to the data at all. An
example of both extremes is shown below; the graph on the left describes perfect
correlation, and the graph on the right describes (almost) no correlation:

![](http://i.imgur.com/G1laY.png)

Notice that in the graph on the left, each and every point is perfectly
predicted by the line, but in the graph on the right, there is little to
separate the trend line shown from any other arbitrary line you could draw
through the data. If we compute the correlation coefficient for these
two examples, the left diagram has `r=1`, and the
right diagram is very close to `r=0`.

Real world data tends to be noisy, and so in practice you only find datasets
with correlation coefficients of 0.0 or 1.0 in deterministic mathematical
models. With that in mind, the following example shows a messy but strongly 
correlated dataset, with a coefficient of `r=0.767`:

![](http://i.imgur.com/TauIx.png)

You can see from this graph that while the trend line does not directly
predict where the points will fall in the scatter plot, it reflects the
pattern exhibited by the data, and most of the
points within the image fall within a short distance of that line. Taken
together with its relatively high correlation coefficient, this picture
shows a fairly strong relationship between the two datasets.

If you are struggling with mapping the concept of correlation
coefficients to real world relations, it may help to consider the 
following examples (from the book [Thinking, Fast and
Slow](http://en.wikipedia.org/wiki/Thinking,_Fast_and_Slow)):

* The correlation between the size of objects measured in English units
or metric units is 1. 

* The correlation between SAT scores and college GPA is close to 0.60.

* The correlation between income and education level in the United States is
close to 0.40.

* The correlation between family income and the last four digits of their
phone number is 0.

We'll talk more about what correlation does and does not measure in a little
while, but for now we can move on to discuss what separates genuine 
patterns from coincidences. 

### Establishing a confidence factor for your results 

Because correlation only establishes the relationships between samples in
an individual experiment, it is important to sanity check your findings
to see how likely it will be that they will hold in future trials. When 
combined with other considerations, statistical significance testing 
can be a useful way of verifying that what you have observed is more than 
pure happenstance.

Methods for testing statistical significance can vary depending on the
relationships you are trying to verify, but they ultimately boil down to being a
way of computing the probability that you would have achieved the same results
by chance. This is done by assuming a default position called a null
hypothesis, and then examining the likelihood that the same results would be
observed if that effect held true.

In the context of correlation testing, the null hypothesis is that your two
datasets are completely independent from one another. Assuming independence
allows you to compute the probability that the effect you observed in 
your real data could be reproduced by chance. The result of this computation 
is called a p-value, and is denoted by the variable `p`.  

Whether or not a p-value implies statistical significance depends on the context
of what is being studied. For example, in behavioral sciences, a significance 
test that yields a value of `p=0.05` is typically considered to be a solid 
result. The data from behavioral experiments is extremely noisy and hard to
isolate, and that makes it reasonable from a practical standpoint to accept a
1 in 20 chance that the same correlation could have been observed in 
completely independent datasets. However, in more stable environments, a much
higher standard is imposed. For particle physics discoveries (such as that of
the [Higgs Boson](http://en.wikipedia.org/wiki/Higgs_boson)), a significance 
of 5-sigma is expected, which is approximately `p = 0.0000003`. These kinds of 
discoveries have less than 1 in 3.5 million chance of being reproduced by 
happenstance, which is an extremely robust result.

The important thing to note about statistical significance is that it can
neither imply the likelihood that an observed result was a fluke, nor can it be
used to verify the validity of an observed pattern. While significance testing
has value as a loose metric for establishing confidence in the plausibility of a
result, it is [frequently misunderstood](http://en.wikipedia.org/wiki/P_value#Misunderstandings) 
to mean much more than that. This point is important to keep in mind as you
conduct your own experiments or read about what others have studied,
because cargo cult science is every bit as dangerous as cargo cult programming.

### Exploring statistical concepts in practice

Now that we've caught up with all the background knowledge, we can finally dig
into a practical example of how to make use of these ideas. I will start by 
showing you the results of my experiment, and then discuss how I went about 
implementing it. 

[The full report is a four page
PDF](https://github.com/elm-city-craftworks/olympics/blob/master/olympic_report.pdf?raw=true), 
covering the 1996, 2000, 2004, and 2008 Summer Olympic games. The following 
screenshot shows the Beijing 2008 page, which includes 
a pair of scatterplots and their associated `r` and `p` values. For this 
dataset, I analyzed 152 teams, excluding all those that were missing either
GDP or population information in my raw data:

![](http://i.imgur.com/c25L6.png)

What this report shows is that there is a strong correlation between a nation's
GDP and its Olympic medal wins (`r ~= 0.832`), and a moderate correlation between 
population and medal wins (`r ~= 0.494`). While there is some variation in these 
effects over the years, the general conclusion remains the same for all four 
of the Olympic games I analyzed, as shown below:

![](http://i.imgur.com/Rw1Y4.png)

While it would be possible with some effort to do this kind of data analysis in
pure Ruby, I chose to make use of [RSRuby](https://github.com/alexgutteridge/rsruby)
to interface with the R language instead. R is a fantastic language for 
statistics applications, and so it makes sense to use it when you are doing 
this kind of work.

Because my needs were extremely simple, I did not need to write much glue code
to get what I needed from R. In fact, the complete implementation of my 
`Olympics::Analysis` singleton object ended up being just a couple paragraphs
of code, as shown below:

```ruby
module Olympics
  class << (Analysis = Object.new)
    attr_accessor :r

    def correlation(params)
      r.assign("x", x=params.fetch(:x))
      r.assign("y", y=params.fetch(:y))

      data = r.eval_R %{ cor.test(x, y) }

      { :n => x.size, :r => data["estimate"]["cor"], :p => data["p.value"] }
    end

    def plot(params)
      [:file, :x, :y, :x_label, :y_label].each do |key|
        r.assign(key.to_s, params.fetch(key))
      end

      r.eval_R %{
        jpeg(filename=file, width=400, height=400)
        plot(x=x, y=y, xlab=x_label, ylab=y_label)
        abline(lm(y ~ x), col="red")
        dev.off()
      }

      nil
    end
  end
end
```

In the `Olympics::Analysis#correlation` method, I make a call to 
R's [cor.test](http://stat.ethz.ch/R-manual/R-patched/library/stats/html/cor.test.html)
function via an RSRuby object, and it returns a nested 
hash containing way more information that what I could possibly 
need for the purposes of this report. With that in mind, I grab
the two values I need from that structure and return a hash with
the values of the `n`, `r`, and `p` variables.

In the `Olympics::Analysis#plot` method, I call a few R functions to 
generate a scatter-plot with a line of best fit in JPEG format. The
way that R handles graphing is a bit weird, but it 
is extremely powerful. The thing I found particularly interesting as 
someone new to R is that its [linear modeling functions](http://stat.ethz.ch/R-manual/R-patched/library/stats/html/lm.html) 
use a formulaic syntax to define custom models for plotting trend 
lines. For our purposes, the simple `y ~ x` relationship works 
fine, but complicated fit lines can also be described using this 
syntax. As a special-purpose language, this is perhaps not surprising, 
but I found it fascinating from a design perspective.

The rest of the code involved in generating these reports is just a hodgepodge
of miscellaneous data munging, using the CSV standard library to read data in as
a table, and access it by column. For example, I'm able to get all of the
country names by executing the following code:

```ruby
>> table = CSV.table("data/1996_combined.csv", :headers => true)
>> table[:noc]
=> ["Afghanistan", "Albania", ..., "Zambia", "Zimbabwe"]
```

The CSV standard library really makes this kind of work easy, and its `Table`
object even automatically converts numeric columns into their appropriate 
Ruby objects by default:

```ruby
>> table[:all_medals].reduce(:+)
=> 837
```

I won't go into much of the details about the reporting code used to 
generate the PDF, because it isn't especially related to the main topic of this 
article. However, it is worth pointing out that in order to make the data
I got back from `Olympic::Analysis.correlation` display friendly, I needed to 
do some extra transformations on it:

```ruby
module Olympics
  class Report
    # ...

    private

    def correlation_summary(x, y)
      stats = Analysis.correlation(:x => x, :y => y)
      n = "n = #{stats[:n]}"
      r = "r ~= #{'%.3f' % stats[:r]}"
      
      if stats[:p] < 0.001
        p = 'p < 0.001'
      else
        p = "p ~= #{'%.3f' % stats[:p]}"
      end

      [n,r,p].join(", ")
    end
  end
end
```

The formatting of the `n` and `r` values are very straightforward, and so it
should be clear what is going on there. However, to display `p` in a way that
is consistent with statistical reporting, I need to check to see if its value
is lower than the threshold I've chosen, and display `p < 0.001` rather
than `p ~= 0.000`. This requires just a little bit of extra effort, but it makes
the report a whole lot nicer looking.

I had originally planned to show all of these values out to float precision, but
it turns out that R's `cor.test` function returns `p=0` for any value of `p`
that is smaller than hardware float epsilon. This is a bit of an awkward
behavior, and so I was happy to sidestep it by displaying an inequality 
instead. For what it's worth, the inner math geek in me cringes at the 
idea of displaying arbitrarily small values in the neighborhood of zero 
as if they were actually zeroes.

While it isn't especially important for understanding the main concepts in this
article, if you feel like you want to know how this report works, 
you can start with the [olympic_report.rb](https://github.com/elm-city-craftworks/olympics/blob/master/olympic_report.rb) 
script and then trace the path of execution from there through to the actual
PDF generation. If you have questions about its implementation, feel free
to leave me a comment.

So far, I have provided you with some very basic background information on a
couple of statistical methods, and demonstrated how to make use of them in
practice. However, what I haven't spent much time talking about is all the
things that can go wrong when you do this kind of analysis. Let's take a bit
of time now to discuss that before we wrap things up.

### Maintaining a healthy level of skepticism

In the process of researching this article, I learned that even statisticians
can be a bit trollish from time to time. If you don't believe me, take a look at
[Anscombe's Quartet](http://en.wikipedia.org/wiki/Anscombe%27s_quartet):

![](http://i.imgur.com/0QV6V.png)

All four of these figures have an identical trend line, and an identical
correlation (`r = 0.816`), as well as several other properties in common.
However, visual inspection reveals that they are clearly displaying wildly
different patterns from one another. The point of this diagram is obvious: 
simple statistics on their own are no substitute for actually
looking at what the data is telling you.

With this idea in mind, it is important to take a close look at the patterns you see
in your data, and look for outliers and groups of points that may be skewing
results. If excluding those values keeps the effect that you observed intact,
you can feel a bit more confident in the strength of your evidence. However, if
your effect disappears, that means you may need to do some thinking about why
that is the case, and possibly come up with some new questions to ask.

Looking back at my report, it is easy to spot a few things
that could influence its results:

![](http://i.imgur.com/AJdUf.png)

To see how what effect these factors were having on my results, I re-ran the
correlation and significance calculations on a number of variations of the
original Beijing 2008 dataset:

![](http://i.imgur.com/lvVrX.png)

All of the variations left the strong correlation between GDP and medal wins
intact, although some changes did make some major impacts on the `r` value. This
tells me that at least for the issues we identified, the trend is fairly
robust.

The relationship between population and medal wins is less
stable, and simply excluding the US and China data points pushes it to the point
of not having much of a correlation at all. When removing all the major
identified influencing factors, the moderate correlation is preserved, 
but we end up with `p=0.002`. While it seems reasonable to accept 1 in
500 odds on a dataset that is bound to be influenced by any number of external
factors, this result does still stand out when you note that most of our other
p-values were infinitesimal.

Even if we accept that this investigation seems to support the notion of a
strong link between GDP and Olympic medal wins, and a somewhat dubious but plausible
relationship between population and Olympic medal wins, we still need to think
of all of the things that could of gone wrong before we even reached the point
of conducting our analysis. Without knowing that our source data is reliable, we
can't trust the results of our analysis.

The data I used for this report is cobbled together from CSVs I found via
web search, scraped data from Wikipedia, and copy and pasted data from
Wikipedia. To assemble the combined CSV documents that these reports run
against, I wrote a bunch of small throwaway scripts and wasn't particularly
careful about avoiding data loss or corruption in the process. So in the end,
there is a very real possibility that the effect I found means nothing at all.

The lesson to take away from this point about data integrity is that fitness for
purpose should always be on your mind. If you are throwing together a couple
graphs to get a rough picture of a phenomenon to see if there is anything
interesting worth saying about it, then you probably don't need to worry about
hunting down perfectly clean data and processing it flawlessly. However, if you
are tasked with building a statistical report which is actually meant to
influence people in some way, or to inform a decision making process, you need
to double and triple check that you're not feeding garbage data into your
analysis process. In other words, statistics can only be as reliable as the
raw facts you use to generate them.

If we suppose that the raw data for this report was accurate in the first place, 
was not corrupted in the process of analyzing it, and that the results we 
generated are significant and trustworthy, we still must accept that 
[correlation does not imply
causation](http://en.wikipedia.org/wiki/Correlation_is_not_causation).
Nonetheless, knowing what patterns exist out there in the world can be very
helpful to us as we contemplate *why* things are the way they are, and that
makes these very simple statistical methods useful in their own right.

### Reflections

While I hope that this article has some direct practical value for you, now that
I have written it I feel that it is just as useful as an exercise in developing
a more rigorous and skeptical way of thinking about the work that we do. 

On the one hand, statistics offers us the promise that we can make sense of the myriad
data streams that make up our lives. On the other hand, statistical thinking
requires us to be precise, diligent, and realistic about what we can expect to 
understand about the world. These kinds of mental states overlap nicely with
what helps us become better at programming, and I think that is what made
writing this article so interesting to me. I hope you enjoyed it too!


================================================
FILE: articles/v5/002-rocket-science-and-lod.md
================================================
The [Law of Demeter](http://www.ccs.neu.edu/home/lieber/LoD.html) is a well-known 
software design principle for reducing coupling between collaborating objects. 
However, because the law exists in many forms, it often means different things 
to different people. As far as laws go, Demeter has been flexible in practice, 
which has lead to some interesting evolutions in its application over time. 
In this article, I will discuss an interpretation of the law that is quite 
literally out of this world.

### An introduction to Smyth's Law of Demeter

[David Smyth](http://mars.jpl.nasa.gov/zipcodemars/bio-contribution.cfm?bid=1018&cid=393&pid=377&country_id=0), 
a scientist who worked on various Mars missions for NASA's Jet 
Propulsion Laboratory, came up with this seemingly innocuous definition
of the Law of Demeter:

> A method can act only upon the message arguments and the state of the receiving object.

On the surface, this formulation is essentially [the object form of the Law
of Demeter](http://www.ccs.neu.edu/research/demeter/demeter-method/LawOfDemeter/object-formulation.html)
stated in much less formal terms. However, Smyth's law is different in the way in which
he interprets it: he assumes that the Law of Demeter implies that 
methods should not have return values. This small twist causes 
the law to have a much deeper effect than its originators had 
anticipated. 

Before we discuss the implications of building systems entirely out of methods
without return values, it is important to understand why Smyth assumed
that value-returning methods were forbidden in the first place. To explore
that point, consider the following trivial example:

```ruby
class Person < ActiveRecord::Base
  def self.in_postal_area(zipcode)
    where(:zipcode => zipcode)  
  end
end
```

The `Person.in_postal_area` method does not violate the 
Law of Demeter itself, as it is nothing more than a simple delegation
mechanism that passes the `zipcode` parameter to a 
lower-level function on the same object. But because it
returns a value, this function makes it easy for its callers
to violate the Law of Demeter, as shown here:

```ruby
class UnsolicitedMailer < ActionMailer::Base
  def spam_postal_area(zipcode)
    people = Person.in_postal_area(zipcode)

    emails = people.map { |e| e.email }

    mail(:to => emails, :subject => "Offer for you!")
  end
end
```

Because the value returned by `Person.in_postal_area` is neither 
a direct part of the `UnsolicitedMailer` object nor a parameter
of the `spam_postal_area` method, sending messages
to it results in a Demeter violation. Depending on the project's 
requirements, breaking the law in this fashion could be 
reasonable, but it is a code smell to watch out for.

In the context of the typical Ruby project, methods that 
return values are common because the convenience of implementing
things this way often outweighs the cost of doing so. However,
whenever you take this approach, you make two fundamental 
assumptions that those who write code for Mars rovers 
cannot: that your value-returning methods will respond
in a reasonable amount of time, and that they will not fail 
in all sorts of complicated ways.

Although these basic assumptions often apply to the bulk of what we do,
even those of us who aren't rocket scientists occasionally
need to work on projects for which temporal coupling is considered
harmful and robust failure handling is essential. In such
scenarios, it is worth considering what Smyth's interpretation
of the Law of Demeter (LoD) has to offer.

### The implications of Smyth's Law of Demeter

Smyth's unique interpretation of how to apply LoD eventually 
caught the eye of Karl Lieberherr, a member of the
Demeter project who published some of the earliest papers
on the topic. Lieberherr took an interest in Smyth's approach 
because it was clearly different than what the Demeter 
researchers had intended—yet potentially useful. 
A correspondence between the two led Smyth to share his 
thoughts about what his definition of LoD brings to 
the table. His six key points from the [original discussion](http://www.ccs.neu.edu/research/demeter/demeter-method/LawOfDemeter/Smyth/LoD-revisited2) 
are listed in an abridged form here:

```
There are actually several wonderful properties that fall out 
from this definition of LoD:

     A method can act only upon the message arguments and the
     existing state of the receiving object.

1. Method bodies tend to be very close to straight-line code. Very
   simple logic, very low complexity.

2. There must be no return values; if there are, the sender of the message
   is not obeying the law.

3. There cannot be tight synchronization, as the sender cannot tell whether
   the message is acted on or not within any "small" period of time
   (perhaps the objects collaborate with a two-way protocol and the
   sender can eventually detect a timeout).

4. Because there are no return values, the objects need to be
   "responsible" objects: they need to handle both nominal and
   foreseeable off-nominal cases. This requirement has the wonderful effect of
   localizing failure handling within the object that has the
   best visibility and understanding of whatever went wrong.
   It also dramatically reduces the complexity of protocols and
   clients.

   ...

5. The law requires an object to subscribe to information so that it has
   what it needs whenever it gets a message. Thus lazy
   evaluation can't be used. Although this requirement may seem like an 
   inefficiency, it becomes one in practice only if the objects don't have 
   concise responsibilities. In such a case, efficiency of communication
   bandwidth isn't the real problem.

   ...

6. Because tight synchronization is out of the picture, the responsible
   objects should be goal oriented. A goal is different from a method
   in that a goal is pursued over some expanse of time and does not
   seem instantaneous. By thinking of goals rather than discrete
   actions, people can derive solutions that don't require tight
   temporal coupling. This sounds like hand waving, and it is—but
   seven years of doing it shows that it really does work.
```

These are deep claims, but the remainder of the discussion between Smyth
and Lieberherr did not elaborate much further on them. However, it is 
fascinating to imagine the kind of programming style that Smyth
is advocating here: it boils down to a highly robust form of
[responsibility-driven development](http://practicingruby.com/articles/64) with 
concurrent (and potentially distributed) objects that communicate almost 
exclusively via callback mechanisms. If Smyth were not an established
scientist working on some of the world's most challenging problems,
it would almost seem as if he were playing object-oriented buzzword bingo.

Although I don't know nearly enough about any of these ideas to speak 
authoritatively on them, I think that they form a great starting point 
for a very interesting conversation. However, if you're like me, you
would benefit from having these ideas brought back down to earth
a bit. With that in mind, I've put together a little example 
program that will hopefully help you do exactly that.

### Smyth's Law of Demeter in practice

Software design principles can be interesting to study in the abstract, but
there is no substitute for trying them out in concrete applications. If you 
can find a project that is a natural fit for the technique you are 
trying to investigate, even the most simple toy application will teach you
more than pure thought experiments ever could.

Smyth's approach to the Law of Demeter originated from his work on software for
Mars rovers, an environment where tight temporal coupling and a lack of 
robust interactions between distributed systems can cause serious problems.
Because it takes about 14 minutes for light to travel between Earth and Mars, 
even the most trivial system interactions require careful design consideration. 
With so much room for things to go wrong, a programming style that claims to 
make it easier to manage these kinds of problems definitely sounds promising.

Of course, you don't need to land robots on Mars to encounter these kind of
challenges. I can easily imagine things such as payment processing systems 
and remote system administration toolchains having a good
degree of overlap with the issues that Smyth's LoD is meant to
address. Still, those problems are not nearly as exciting as driving a
remote control car around on a different planet. Knowing that, I decided
to test Smyth's ideas by building a very unrealistic Mars rover 
simulation. The video below shows me interacting with it over IRC:

<div align="center">
<iframe width="800" height="600"
src="//www.youtube.com/embed/Yqofx6MbYFU?vq=480&rel=0" frameborder="0" allowfullscreen></iframe>
</div>

In the video, the communications delay is set at only a couple of seconds, but it
can be set arbitrarily high, which makes it possible to simulate the full 14-
minute-plus delay between Earth and Mars. No matter what the delay is set at, the
rover queues up commands as they come in and sends its responses one 
at a time as its tasks are completed. The entire simulator is only a couple of
pages of code. It consists of the following objects and responsibilities:

* [SpaceExplorer::Radio](https://github.com/elm-city-craftworks/space_explorer/blob/pr-5.2/lib/space_explorer/radio.rb) relays messages on a time delay.
* [SpaceExplorer::MissionControl](https://github.com/elm-city-craftworks/space_explorer/blob/pr-5.2/lib/space_explorer/mission_control.rb) communicates with the rover.
* [SpaceExplorer::Rover](https://github.com/elm-city-craftworks/space_explorer/blob/pr-5.2/lib/space_explorer/rover.rb) communicates with mission control and updates the map.
* [SpaceExplorer::World](https://github.com/elm-city-craftworks/space_explorer/blob/pr-5.2/lib/space_explorer/world.rb) implements the simulated world map.

As I implemented this system, I took care to abide by Smyth's recommendation
that methods not return meaningful values. Although I wasn't so pedantic as
to explicitly return `nil` from each function, I treated them as void functions
internally, so none of the simulator's features depend on the return value 
of the methods I implemented. To see the effect this approach had on
overall system design, we can trace a command's execution from end to end while
paying attention to what is going on under the hood.

I'd like to walk you through how `SNAPSHOT` works, simply because it has the
largest number of moving parts to it. As you saw in the video, 
`SNAPSHOT` is used to get back a 5x5 ASCII "picture" of the area around the 
rover, which can be used to aid navigation. In the following example, 
`@` is the rover, `-` represents empty spaces, and `X` represents boulders:

```
20:35|  seacreature| !SNAPSHOT
20:35|  roboseacreature| X - - X -
20:35|  roboseacreature| X X - X X
20:35|  roboseacreature| - X @ X X
20:35|  roboseacreature| - - X X -
20:35|  roboseacreature| - - - - -
```

As you may have already guessed, the user interface for this project is
IRC-based, which is a convenient (if ugly) medium for experimenting with
asynchronous communications. A bot that is responsible for running
the simulation monitors the channel for commands, which can be any
message that starts with an exclamation point. When these messages are
detected, they are passed on a `MissionControl` object for processing. The
callback that monitors the channel and passes messages along to that 
object is shown here:

```ruby
bot.on(:message, /\A!(.*)/) do |m, command|
  mission_control.send_command(command)
end
```

The `MissionControl` object is nothing more than a bridge between the UI 
and a `Radio` object, so the `send_command` method passes the 
command along without modification:

```ruby
module SpaceExplorer
  class MissionControl
    def send_command(command)
      @radio_link.transmit(command)
    end
  end
end
```

The `Radio` instance that `@radio_link` points to holds a
reference to a `Rover` object, which is where the `SNAPSHOT` command will be
processed. Before it gets there, `Radio#transmit` enforces a
transmission delay through the use of a very coarse-grained timer mechanism:

```ruby
module SpaceExplorer
  class Radio
    def transmit(command)
      raise "Target not defined" unless defined?(@target)

      Thread.new do
        start_time = Time.now

        sleep 1 while Time.now - start_time < @delay

        @target.receive_command(command) 
      end
    end
  end
end
```

It's important to point out here that `Radio#transmit` is designed to work with
an arbitrary delay, so it isn't practical for it to block execution and
return a value. Instead, it spins off a background thread that will eventually
call the `receive_command` callback method on its `@target` object, which in this case is a
`Rover` instance.

The implementation of the `Rover` object is more interesting than the
objects we've looked at so far because it implements the
[Actor model](http://en.wikipedia.org/wiki/Actor_model). 
Whenever `Rover#receive_command` is called, commands are not
processed directly but are instead placed on a threadsafe queue that then gets
acted upon in a first-come, first-serve basis. This approach allows the `Rover` to do 
its tasks sequentially while continuing to accept requests as they come in. To
understand how that works, think about how `SNAPSHOT` gets handled by the 
following code:

```ruby
require "thread"

module SpaceExplorer
  class Rover
    def initialize(world, radio_link)
      @world      = world
      @radio_link = radio_link

      @queue = Queue.new

      Thread.new { loop { process_command(@queue.pop) } }
    end

    def receive_command(command)
      @queue.push(command)
    end

    def process_command(command)
      case command
      when "PING"
        @radio_link.transmit("PONG")
      when "NORTH", "SOUTH", "EAST", "WEST"      
        @world.move(command)
      when "SNAPSHOT"
        @world.snapshot { |text| @radio_link.transmit("\n#{text}") }
      else
        # do nothing
      end
    end
  end
end
```

When the `receive_command` callback is triggered by the `Radio` object,
the method pushes that command onto a queue, which should happen nearly
instantaneously in practice. At this point, the command has finished its
outbound trip and is ready to be processed.

After the `Rover` object handles any tasks that were already queued up,
`SNAPSHOT` is passed to the `process_command` method, where the 
following line gets executed:

```ruby
@world.snapshot { |text| @radio_link.transmit("\n#{text}") }
```

This code looks a little weird because it isn't immediately obvious why a block
is being used here. Instead, we might expect the following code under 
ordinary circumstances:

```ruby
@radio_link.transmit("\n#{@world.snapshot}") 
```

However, taking this approach would be a subtle violation of Smyth's LoD,
because it would require `World#snapshot` to have a meaningful return value,
introducing additional coupling. In this case, the coupling is
temporal rather than structural, which makes it harder to spot.

The main difference between the two examples is that the latter has a strong
connascence of timing and the former does not. In the value-returning example,
if `@world.snapshot` were not simply generating a trivial ASCII diagram but
actually controlling hardware on a Mars rover to take an image, we might expect
it to take some amount of time to respond. If it were a large enough amount of
time, it wouldn't be practical to block while waiting for a response, so the
call to `RadioLink#transmit` would need to be backgrounded. This would also be
true for any caller that made use of `World#snapshot`.

By using a code block (which is really just a lightweight, anonymous callback
mechanism), we can push the responsibility of whether to run the
computations in a background thread into the `World` object, making that
decision completely invisible to its callers. As an added bonus, `World` can
also be more responsible about failure handling as well, because it decides 
if and when to execute the callback and how to handle unexpected situations.

In practical scenarios, the advantages and disadvantages of whether 
violate Smyth's law would need to be weighed out, but in this case I've
intentionally tried to apply it first and then attempt to justify it. For this
particular example, I can see the approach as being worthwhile even if it 
makes for slightly more ugly code.

Of course, no attempt at purity is ultimately successful, and if you take a look
at `World#snapshot`, you will see that this is where I finally throw Smyth's LoD
out the window for the sake of practicality. Feel free to focus on the structure
of the code rather than the algorithm used to process the map, as that is what 
matters most in this article:

```ruby
module SpaceExplorer
  class World
    DELTAS = (-2..2).to_a.product((-2..2).to_a)

    # ...

    def snapshot
      snapshot = DELTAS.map do |rowD, colD|
        if colD == 0 && rowD == 0
          "@"
        else
          @data[@row + rowD][@col + colD]
        end
      end

      text = snapshot.each_slice(5).map { |e| e.join(" ") }.join("\n")

      yield text
    end
  end
end
```

Among other things, we see here the familiar chain of `Enumerable` methods 
slammed together, all of which return values that are not immediate parts of
the `World` object:

```ruby
text = snapshot.each_slice(5).map { |e| e.join(" ") }.join("\n")
```

Although I could probably have written some cumbersome adapters to make this
code conform to Smyth's LoD, I think that would be a wasteful attempt to follow
the letter of the law rather than its spirit. This is especially true when you
consider that Smyth and many other early adopters of the classical Law of
Demeter were working in languages that had a clear separation between objects
and data structures, so they would not necessarily have considered core
structures to be "objects" in the proper sense. In Ruby, our core structures are
full-blown objects, but that does not mean they need to follow the same rules 
as our domain objects.

I would love it if you'd share a comment with your own thoughts about 
the philosophical divide between data structures and domain objects, and
also encourage you to read [this post from Bob Martin](https://sites.google.com/site/unclebobconsultingllc/active-record-vs-objects) 
on the topic, but I won't dwell on the point for now. We still have
work to do!

With the output in hand, all that remains to be done is to ferry it back to the
IRC channel that requested it. Looking back at the relevant portion of `Rover#process_command`, 
you can see that the yielded text from `World#snapshot` is passed on to 
another `Radio` object:

```ruby
@world.snapshot { |text| @radio_link.transmit("\n#{text}") }
```

This `Radio` object holds a reference to the `MissionControl` object that sent
the original `SNAPSHOT` command, and the path back to it is identical to the
path the command took to get to the `Rover` object, just in reverse. I won't
explain that process again in detail, as all that really matters is that
`MissionControl#receive_command` eventually gets run. This method is just as
boring as the `send_command` method we looked at earlier, serving as a direct
bridge to the UI. I've used a Cinch-based IRC bot in this example, but anything
with a `msg()` method will do:

```ruby
module SpaceExplorer
  class MissionControl
    # ...

    def receive_command(command)
      @narrator.msg(command)
    end
  end
end
```

At this point, a message is sent to the IRC channel and the out-and-back trip is
completed. Despite being a fairly complicated feature, Smyth's LoD was mostly
followed throughout, and things got weird in only a few places. That said,
if you have a devious mind, you are likely to have already realized that the
relative simplicity of this code is deceptive, because there are
so many places things can go wrong. Let's talk a little more about 
that now.

### GROUP PROJECT: Exploring our options for failure handling

Smyth's Law of Demeter promises three main consequences: less complex method
definitions, a decrease in temporal coupling, and a robust way of handling
failures. Although the example I've been using provides some evidence for the
first two claims, I intentionally avoided working on error handling to leave
something for us to think through together.

Your challenge, if you choose to accept it, is to think about what can go wrong
in this simulation and to come up with ways to handle those problems without
violating Smyth's LoD. Off the top of my head, I can think of several trivial
problems that exist in this code, but I'm sure there are many other things that I
haven't considered.

If you want to start with some low-hanging fruit, think about what happens when
an invalid command is sent, or what happens when the rover moves off the edge of
the fixed-size map it is currently using. If you want to get fancy, think about
whether the rover ought to have some safety mechanism that will prevent it from
driving into boulders, which it is currently perfectly happy to do. Or, if you
want to get creative, find your own way of breaking things, and feel free to ask
me clarifying questions as you go.

Any level of participation is welcome, ranging from asking a "What if?"
question after reading through the code a bit to grand-scale patches that make
our clunky little rover bulletproof. As I said at the beginning of this
article, my purpose in introducing Smyth's LoD to you was to start a
conversation, and I think this is a fun way to do exactly that.

The [full source for the simulator](https://github.com/elm-city-craftworks/space_explorer) 
is ready for you to tinker with, so go forth and break stuff!

### Reflections

Although I am fairly happy with how the simulator experiment turned out, it is 
hard to draw very many conclusions from it. In very small greenfield 
projects, it is hard  to see how any design principle will ultimately 
influence the full software development lifecycle. That having been said,
it did serve as a great testbed for exploring these ideas and can be a 
stepping stone toward trying these techniques in more practical settings.

I tend to think of software principles as being descriptive rather than
prescriptive; they provide us with convenient labels for particular approaches
to problems that already exist in the wild. If you've seen or worked on some
code that reminds you of the ideas that Smyth's Law of Demeter attempts to
capture, I'd love to hear about it.

I'd also love to hear about whatever doubts have been nagging you as
you worked your way through this article. Every software design strategy has its
strengths and weaknesses, and sometimes we make the mistake of emphasizing the
good parts while downplaying the bad parts, especially when we study new things.
With that in mind, your curmudgeonly comments are most welcome, as they tend to 
bring some balance along with them.

> **NOTE:** I owe a huge hat-tip to [David
> Black](http://twitter.com/david_a_black), as he was the inspiration for
this article. He and I were collaborating on a more traditional
treatment of the Law of Demeter; we each found our own divergent ideas to
investigate, but I definitely would not have written this article if he hadn't
shared his thoughtful explorations with me.


================================================
FILE: articles/v5/003-evented-io.md
================================================
*This issue of Practicing Ruby was contributed by Magnus Holm ([@judofyr][judofyr]), 
a Ruby programmer  from Norway. Magnus works on various open source 
projects (including the [Camping][camping] web framework),
and writes articles over at [the timeless repository][timeless].*

Working with network I/O in Ruby is so easy: 

```ruby
require 'socket'

# Start a server on port 9234
server = TCPServer.new('0.0.0.0', 9234)

# Wait for incoming connections
while io = server.accept
  io << "HTTP/1.1 200 OK\r\n\r\nHello world!"
  io.close
end

# Visit http://localhost:9234/ in your browser.
```

Boom, a server is up and running! Working in Ruby has some disadvantages, though: we
can handle only one connection at a time. We can also have only one *server*
running at a time. There's no understatement in saying that these constraints
can be quite limiting. 

There are several ways to improve this situation, but lately we've seen an
influx of event-driven solutions. [Node.js][nodejs] is just an event-driven I/O-library
built on top of JavaScript. [EventMachine][em] has been a solid solution in the Ruby
world for several years. Python has [Twisted][twisted], and Perl has so many that they even
have [an abstraction around them][anyevent].

Although these solutions might seem like silver bullets, there are subtle details that
you'll have to think about. You can accomplish a lot by following simple rules
("don't block the thread"), but I always prefer to know precisely what I'm
dealing with. Besides, if doing regular I/O is so simple, why does
event-driven I/O have to be looked at as black magic?

To show that they are nothing to be afraid of, we are going to implement an 
I/O event loop in this article. Yep, that's right; we'll capture the core 
part of EventMachine/Node.js/Twisted in about 150 lines of Ruby. It won't 
be performant, it won't be test-driven, and it won't be solid, but it will 
use the same concepts as in all of these great projects. We will start 
by looking at a minimal chat server example and then discuss 
how to build the infrastructure that supports it.

## Obligatory chat server example

Because chat servers seem to be the event-driven equivalent of a
"hello world" program, we will keep with that tradition here. The
following example shows a trivial `ChatServer` object that uses
the `IOLoop` that we'll discuss in this article:

```ruby
class ChatServer
  def initialize
    @clients = []
    @client_id = 0
  end

  def <<(server)
    server.on(:accept) do |stream|
      add_client(stream)
    end
  end

  def add_client(stream)
    id = (@client_id += 1)
    send("User ##{id} joined\n")

    stream.on(:data) do |chunk|
      send("User ##{id} said: #{chunk}")
    end

    stream.on(:close) do
      @clients.delete(stream)
      send("User ##{id} left")
    end

    @clients << stream
  end

  def send(msg)
    @clients.each do |stream|
      stream << msg
    end
  end
end

# usage

io     = IOLoop.new
server = ChatServer.new

server << io.listen('0.0.0.0', 1234)

io.start
```

To play around with this server, run [this script][chatserver] and then open up
a couple of telnet sessions to it. You should be able to produce something like the
following with a bit of experimentation:

```
# from User #1's console:
$ telnet 127.0.0.1 1234

User #2 joined
User #2 said: Hi
Hi
User #1 said: Hi
User #2 said: Bye
User #2 left

# from User #2's console (quits after saying Bye)
$ telnet 127.0.0.1 1234

User #1 said: Hi
Bye
User #2 said: Bye
```

If you don't have the time to try out this code right now,
don't worry: as long as you understand the basic idea behind it, you'll be fine.
This chat server is here to serve as a practical example to help you 
understand [the code we'll be discussing][chatserver] throughout this article.

Now that we have a place to start from, let's build our event system.

## Event handling

First of all we need, obviously, events! With no further ado:

```ruby
module EventEmitter
  def _callbacks
    @_callbacks ||= Hash.new { |h, k| h[k] = [] }
  end

  def on(type, &blk)
    _callbacks[type] << blk
    self
  end

  def emit(type, *args)
    _callbacks[type].each do |blk|
      blk.call(*args)
    end
  end
end

class HTTPServer
  include EventEmitter
end

server = HTTPServer.new
server.on(:request) do |req, res|
  res.respond(200, 'Content-Type' => 'text/html')
  res << "Hello world!"
  res.close
end

# When a new request comes in, the server will run:
#   server.emit(:request, req, res)

```

`EventEmitter` is a module that we can include in classes that can send and
receive events. In one sense, this is the most important part of our event
loop: it defines how we use and reason about events in the system. Modifying it
later will require changes all over the place. Although this particular
implementation is a bit more simple than what you'd expect from a real 
library, it covers the fundamental ideas that are common to all
event-based systems.

## The IO loop

Next, we need something to fire up these events. As you will see in
the following code, the general flow of an event loop is simple:
detect new events, run their associated callbacks, and then repeat
the whole process again.

```ruby
class IOLoop
  # List of streams that this IO loop will handle.
  attr_reader :streams

  def initialize
    @streams = []
  end
  
  # Low-level API for adding a stream.
  def <<(stream)
    @streams << stream
    stream.on(:close) do
      @streams.delete(stream)
    end
  end

  # Some useful helpers:
  def io(io)
    stream = Stream.new(io)
    self << stream
    stream
  end

  def open(file, *args)
    io File.open(file, *args)
  end

  def connect(host, port)
    io TCPSocket.new(host, port)
  end

  def listen(host, port)
    server = Server.new(TCPServer.new(host, port))
    self << server
    server.on(:accept) do |stream|
      self << stream
    end
    server
  end

  # Start the loop by calling #tick over and over again.
  def start
    @running = true
    tick while @running
  end

  # Stop/pause the event loop after the current tick.
  def stop
    @running = false
  end

  def tick
    @streams.each do |stream|
      stream.handle_read  if stream.readable?
      stream.handle_write if stream.writable?
    end
  end
end
```

Notice here that `IOLoop#start` blocks everything until `IOLoop#stop` is called.
Everything after `IOLoop#start` will happen in callbacks, which means that the
control flow can be surprising. For example, consider the following code:

```ruby
l = IOLoop.new

ruby = i.connect('ruby-lang.org', 80)  # 1
ruby << "GET / HTTP/1.0\r\n\r\n"       # 2

# Print output
ruby.on(:data) do |chunk|
  puts chunk   # 3
end

# Stop IO loop when we're done
ruby.on(:close) do
  l.stop       # 4
end

l.start        # 5
```

You might think that you're writing data in step 2, but the
`<<` method actually just stores the data in a local buffer.
It's not until the event loop has started (in step 5) that the data
actually gets sent. The `IOLoop#start` method triggers `#tick` to be run in a loop, which
delegates to `Stream#handle_read` and `Stream#handle_write`. These methods 
are responsible for doing any necessary I/O operations and then triggering
events such as `:data` and `:close`, which you can see being used in steps 3 and 4. We'll take a look at how `Stream` is implemented later, but for now 
the main thing to take away from this example is that event-driven code 
cannot be read in top-down fashion as if it were procedural code.

Studying the implementation of `IOLoop` should also reveal why it's 
so terrible to block inside a callback. For example, take a look at this 
call graph:

```
# indentation means that a method/block is called
# deindentation means that the method/block returned

tick (10 streams are readable)
  stream1.handle_read
    stream1.emit(:data)
      your callback

  stream2.handle_read
    stream2.emit(:data)
      your callback
        you have a "sleep 5" inside here

  stream3.handle_read
    stream3.emit(:data)
      your callback
  ...
```

By blocking inside the second callback, the I/O loop has to wait 5 seconds 
before it's able to call the rest of the callbacks. This wait is
obviously a bad thing, and it is important
to avoid such a situation when possible. Of course, nonblocking
callbacks are not enough—the event loop also needs to make use of nonblocking
I/O. Let's go over that a bit more now.

## IO events

At the most basic level, there are only two events for an `IO` object:

1. Readable: The `IO` is readable; data is waiting for us. 
2. Writable: The `IO` is writable; we can write data.

These might sound a little confusing: how can a client know that the server
will send us data? It can't. Readable doesn't mean "the server will send us
data"; it means "the server has already sent us data." In that case, the data
is handled by the kernel in your OS. Whenever you read from an `IO` object, you're
actually just copying bytes from the kernel. If the receiver does not read 
from `IO`, the kernel's buffer will become full and the sender's `IO` will 
no longer be writable. The sender will then have to wait until the 
receiver can catch up and free up the kernel's buffer. This situation is
what makes nonblocking `IO` operations tricky to work with.

Because these low-level operations can be tedious to handle manually, the 
goal of an I/O loop is to trigger some more usable events for application
programmers:

1. Data: A chunk of data was sent to us.
2. Close: The IO was closed.
3. Drain: We've sent all buffered outgoing data.
4. Accept: A new connection was opened (only for servers).

All of this functionality can be built on top of Ruby's `IO` objects with
a bit of effort.

## Working with the Ruby IO object

There are various ways to read from an `IO` object in Ruby:

```ruby
data = io.read
data = io.read(12)
data = io.readpartial(12)
data = io.read_nonblock(12)
```

* `io.read` reads until the `IO` is closed (e.g., end of file, server closes the
connection, etc.) 

* `io.read(12)` reads until it has received exactly 12 bytes.

* `io.readpartial(12)` waits until the `IO` becomes readable, then it reads *at
most* 12 bytes. So if a server sends only 6 bytes, `readpartial` will return
those 6 bytes. If you had used `read(12)`, it would wait until 6 more bytes were
sent.

* `io.read_nonblock(12)` will read at most 12 bytes if the IO is readable. It
raises `IO::WaitReadable` if the `IO` is not readable.

For writing, there are two methods:

```ruby
length = io.write(str)
length = io.write_nonblock(str)
```

* `io.write` writes the whole string to the `IO`, waiting until the `IO` becomes
writable if necessary. It returns the number of bytes written (which should
always be equal to the number of bytes in the original string).

* `io.write_nonblock` writes as many bytes as possible until the `IO` becomes
nonwritable, returning the number of bytes written. It raises `IO::WaitWritable`
if the `IO` is not writable.

The challenge when both reading and writing in a nonblocking fashion is knowing 
when it is possible to do so and when it is necessary to wait.

## Getting real with IO.select

We need some mechanism for knowing when we can read or write to our
streams, but I'm not going to implement `Stream#readable?` or `#writable?`. It's 
a terrible solution to loop over every stream object in Ruby and check whether it's
readable/writable over and over again. This is really just not a job for Ruby;
it's too far away from the kernel.

Luckily, the kernel exposes ways to efficiently detect readable and writable
I/O streams. The simplest cross-platform method is called select(2) 
and is available in Ruby as `IO.select`:

```
IO.select(read_array [, write_array [, error_array [, timeout]]])

Calls select(2) system call. It monitors supplied arrays of IO objects and waits
until one or more IO objects are ready for reading, ready for writing, or have
errors. It returns an array of those IO objects that need attention. It returns 
nil if the optional timeout (in seconds) was supplied and has elapsed.
```

With this knowledge, we can write a much better `#tick` method:

```ruby
class IOLoop
  def tick
    r, w = IO.select(@streams, @streams)
    r.each do |stream|
      stream.handle_read
    end
  
    w.each do |stream|
      stream.handle_write
    end
  end
end
```

`IO.select` will block until some of our streams become readable or writable
and then return those streams. From there, it is up to those streams to do 
the actual data processing work.

## Handling streaming input and output 

Now that we've used the `Stream` object in various examples, you may 
already have an idea of what its responsibilities are. But let's first take a look at how it is implemented:

```ruby
class Stream
  # We want to bind/emit events.
  include EventEmitter

  def initialize(io)
    @io = io
    # Store outgoing data in this String.
    @writebuffer = ""
  end

  # This tells IO.select what IO to use.
  def to_io; @io end

  def <<(chunk)
    # Append to buffer; #handle_write is doing the actual writing.
    @writebuffer << chunk
  end
  
  def handle_read
    chunk = @io.read_nonblock(4096)
    emit(:data, chunk)
  rescue IO::WaitReadable
    # Oops, turned out the IO wasn't actually readable.
  rescue EOFError, Errno::ECONNRESET
    # IO was closed
    emit(:close)
  end
  
  def handle_write
    return if @writebuffer.empty?
    length = @io.write_nonblock(@writebuffer)
    # Remove the data that was successfully written.
    @writebuffer.slice!(0, length)
    # Emit "drain" event if there's nothing more to write.
    emit(:drain) if @writebuffer.empty?
  rescue IO::WaitWritable
  rescue EOFError, Errno::ECONNRESET
    emit(:close)
  end
end
```

`Stream` is nothing more than a wrapper around a Ruby `IO` object that
abstracts away all the low-level details of reading and writing that were
discussed throughout this article. The `Server` object we make use of 
in `IOLoop#listen` is implemented in a similar fashion but is focused
on accepting incoming connections instead:

```ruby
class Server
  include EventEmitter

  def initialize(io)
    @io = io
  end

  def to_io; @io end
  
  def handle_read
    sock = @io.accept_nonblock
    emit(:accept, Stream.new(sock))
  rescue IO::WaitReadable
  end

  def handle_write
    # do nothing
  end
end
```

Now that you've studied how these low-level objects work, you should
be able to revisit the full [source code for the Chat Server
example][chatserver] and understand exactly how it works. If you
can do that, you know how to build an evented I/O loop from scratch.

### Conclusions

Although the basic ideas behind event-driven I/O systems are easy to understand, 
there are many low-level details that complicate things. This article discussed some of these ideas, but there are many others that would need
to be considered if we were trying to build a real event library. Among
other things, we would need to consider the following problems:

* Because our event loop does not implement timers, it is difficult to do
a number of important things. Even something as simple as keeping a 
connection open for a set period of time can be painful without built-in
support for timers, so any serious event library must support them. It's
worth pointing out that `IO#select` does accept a timeout parameter, and
it would be possible to make use of it fairly easily within this codebase.

* The event loop shown in this article is susceptible to [back pressure][bp],
which occurs when data continues to be buffered infinitely even if it
has not been accepted for processing yet. Because our event loop 
provides no mechanism for signaling that its buffers are full, incoming
data will accumulate and have a similar effect to a memory leak until
the connection is closed or the data is accepted.

* The performance of select(2) is linear, which means that handling 
10,000 streams will take 10,000x as long as handling a single stream. 
Alternative solutions do exist at the kernel, but many are not 
cross-platform and are not exposed to Ruby by default. If you have 
high performance needs, you may want to look into the [nio4r][nio4r] 
project, which attempts to solve this problem in a clean way by 
wrapping the libev library.

The challenges involved in getting the details right in event loops
are the real reason why tools like EventMachine and Node.js exist. These systems
allow application programmers to gain the benefits of event-driven I/O without
having to worry about too many subtle details. Still, knowing how they work under the hood
should help you make better use of these tools, and should also take away some
of the feeling that they are a kind of deep voodoo that you'll never
comprehend. Event-driven I/O is perfectly understandable; it is just a bit 
messy.

[chatserver]: https://gist.githubusercontent.com/practicingruby/3612925/raw/315e7bfc5de7a029606b3885d71953acb84f112e/ChatServer.rb 
[timeless]: http://timelessrepo.com
[camping]: https://github.com/camping
[judofyr]: http://twitter.com/judofyr
[nodejs]: http://nodejs.org
[em]: http://rubyeventmachine.com
[twisted]: http://twistedmatrix.com
[anyevent]: http://metacpan.org/module/AnyEvent
[libev]: http://software.schmorp.de/pkg/libev.html
[libuv]: https://github.com/joyent/libuv
[nio4r]: https://github.com/tarcieri/nio4r
[bp]: http://en.wikipedia.org/wiki/Back_pressure#Back_pressure_in_information_technology


================================================
FILE: articles/v5/004-service-apis.md
================================================
*This article was contributed by Carol Nichols
([@carols10cents](http://twitter.com/carols10cents),
[carols10cents@rstat.us](https://rstat.us/users/Carols10cents)), one
of the active maintainers of [rstat.us](https://rstat.us). Carol is
also involved in the Pittsburgh Ruby community, and is a co-organizer of the
[Steel City Ruby Conf](http://steelcityrubyconf.org/).*

[Rstat.us](https://rstat.us) is a microblogging site that is similar to Twitter, but 
based on the [OStatus](http://ostatus.org/about) open standard. It's designed to be federated so
that anyone can run an instance of rstat.us on their own domain while still being
able to follow people on other domains. Although rstat.us is an active project 
which has a lot to offer its users, the lack of an API has limited its
adoption. In particular, an API would facilitate the development of mobile
clients, which are a key part of what makes microblogging convenient for many people.

Two different types of APIs have been considered for possible implementation
in rstat.us: a hypermedia API using an open microblogging spec and a JSON API that is
compatible with Twitter's API. In this article, we'll compare these two API styles 
in the context of rstat.us, and discuss the decision that the project's
developers have made after weighing out the options.

## Hypermedia API

Hypermedia APIs currently have a reputation for being complicated and hard to
understand, but they're really nothing to be scared of. There are many, many
articles about what hypermedia is or is not, but the general definition that
made hypermedia click for me is that a hypermedia API returns links in its
responses that the client then uses to make its next calls. This means that the
server does not have a set of URLs with parameters documented for you up front;
it has documentation of the controls that you will see within the responses.

The specific hypermedia API type that we are considering for rstat.us is one
that complies with the [Application-Level Profile Semantics (ALPS) microblogging
spec](http://amundsen.com/hypermedia/profiles/). This spec is an experiment
started by Mike Amundsen to explore the advantages and disadvantages of multiple
client and server implementations agreeing only on what particular values for
the XHTML attributes `class`, `id`, `rel`, and `name` signify. The spec does not
contain any URLs, example queries, or example responses.

Here is a subset of the ALPS spec attributes and definitions; these have to do
with the rendering of one status update and its metadata:

- li.message - A representation of a single message
- span.message-text - The text of a message posted by a user
- span.user-text - The user nickname text
- a with rel 'message' - A reference to a message representation

This is one way you could render an update that is compatible with these attributes:

```html
<li class="message">
  <span class="message-text">
    I had a fantastic sandwich at Primanti's for lunch.
  </span>
  <span class="user-text">Carols10cents</span>
  <a rel="message" href="http://rstat.us/12345">(permalink)</a>
</li>
```

And this is another way that is also compatible:

```html
<li class="message even">
  <p>
    <a rel="permalink message" href="http://rstat.us/update?id=12345">
      <span class="user-text">Carols10cents</span> said:
    </a>
    <span class="message-text">
      I had a fantastic sandwich at Primanti's for lunch.
    </span>
  </p>
</li>
```

Notice some of the differences between the two:

- All the elements being siblings vs some nested within each other
- Only having the ALPS attribute values vs having other classes and rels as well
- Only having the ALPS elements vs having the `<p>` element 
between the `<li>` and the rest of the children
- Simple resource-based routing vs. passing the id as a parameter

All of these are perfectly fine! If a client only depends on the values of the
attributes and not the exact structure that's returned, it will be flexible
enough to handle both responses. For example, you can extract the username 
from either fragment using the following CSS selector:

```ruby
require 'nokogiri'

# Create a Nokogiri HTML Document from the first example, the second example 
# could be substituted and the result would be the same
html = <<HERE
  <li class="message">
    <span class="message-text">
      I had a fantastic sandwich at Primanti's for lunch.
    </span>
    <span class="user-text">Carols10cents</span>
    <a rel="message" href="http://rstat.us/12345">(permalink)</a>
  </li>
HERE

doc = Nokogiri::HTML::Document.parse(html)

# Using CSS selectors
username = doc.css("li.message span.user-text").text 
```

With this kind of contract, we can change the representation
of an update by the server from the first format to the second without breaking
client functionality. While we will discuss the tradeoffs involved in using
hypermedia APIs in more detail later, it is worth noting
that structural flexibility is a big part of what makes them attractive
from a design perspective.

## JSON API

JSON APIs are much more common than hypermedia APIs right now. This style of API
typically has a published list of URLs, one for each action a client may want to
take. Each URL also has a number of documented parameters through which a client can
send arguments, and the requests return data in a defined format. This style is 
similar to a Remote Procedure Call (RPC) --
functions are called with arguments, and values are returned, but the work is
done on a remote machine. Because this style matches the way we code locally,
it feels familiar, and that may explain why the technique is so popular.

[Twitter's API](https://dev.twitter.com/docs/api) is currently implemented in
this RPC-like style. There is a lot of documentation about all the URLs
available, what parameters they take, and what the returned data or resulting
state will be. For example, here is how you would get the text of the 3 most
recent tweets made by user @climagic with Twitter's JSON API ([relevant
documentation](https://dev.twitter.com/docs/api/1/get/statuses/home_timeline)):

```ruby
require 'open-uri'
require 'json'

# Make a request to the home_timeline resource with the format json.
# Pass the parameter screen_name with the value climagic and the 
# parameter count with the value 3.

base = "http://api.twitter.com/1/statuses/user_timeline.json"
uri  = URI("#{base}?screen_name=climagic&count=3")

# The response object is a list of tweets, which is documented at
# https://dev.twitter.com/docs/platform-objects/tweets

response = JSON.parse(open(uri).read)

tweets = response.map { |t| t["text"] }
```

Rendering JSON from the server is usually fairly simple as well, and
the simplicity of providing and consuming JSON in many different languages
is another one of the big reasons why JSON APIs are gaining in popularity. Twitter
actually decided to [drop support for XML, RSS, and
Atom](https://dev.twitter.com/docs/api/1.1/overview#JSON_support_only) in
version 1.1 of their API, leaving ONLY support for JSON. [According to
Programmable
Web](http://blog.programmableweb.com/2011/05/25/1-in-5-apis-say-bye-xml/) 20%
of new APIs released in 2011 offered only JSON support.

That said, popularity is neither the best nor the only metric for evaluating
design strategies; costs and benefits of different approaches 
can only be weighed out in the context of a real project. To illustrate that point, we can consider how 
each of these API styles would impact the development of rstat.us.

### Comparing and contrasting the two styles

There are many clients that have been built against Twitter's current API. There
are even some clients that allow you to change the root URL of all the requests
(ex:
[Twidere](https://play.google.com/store/apps/details?id=org.mariotaku.twidere))
If rstat.us implemented the same parameters and response data,
people could use those clients to interact with both Twitter and rstat.us. 
Even if rstat.us doesn't end up having this level of compatibility with
Twitter's API, a close approximation to it would still feel a lot more 
familiar to client developers, which may encourage them to support rstat.us.

But is it really a good idea to be coupled to Twitter's API design? If Twitter changes a
parameter name, or a URL, or the structure of the data returned, rstat.us will
need to implement those changes or risk breaking its Twitter-compatible clients.
Because one of the reasons rstat.us was developed was to reduce this kind of
dependency of Twitter, this is a big price to pay, and hypermedia APIs can help
guard against this kind of brittleness.

In addition to flexibility in representation on both the client and server side,
another advantage of a hypermedia API is that
it uses XHTML as its media type, and we just so happen to already have an XHTML
representation of rstat.us' functionality: the web interface itself! If
you take a look at the source of [http://rstat.us](http://rstat.us), you can see
that the markup for an update contains the attribute values we've been talking
about. We haven't made rstat.us completely compliant with the ALPS spec yet, 
but adding attributes to our existing output [has been fairly
simple](https://github.com/hotsh/rstat.us/commit/4e234556c73426dc16526883661b3feb1e2f7d9f).
By contrast, building out a Twitter-compatible JSON API would mean reimplementing an almost
entirely separate interface to rstat.us that would need to maintain a mapping
between its core functionality and the external behavior of Twitter's API.

But looking at the source of http://rstat.us again, you'll also see a lot of
other information in the source of the page. Most of it isn't needed for the use
of the API, so we're transferring a lot of unnecessary data back and forth. The
JSON responses are very compact in comparison; over time and with scale, this
could make a difference in performance.

I am also concerned that some operations that are straightforward with a
Twitter-style JSON API (such as getting one user's updates given their username)
seem complex when following the ALPS spec. With the JSON API, there is a
predefined URL with the username as a parameter, and the response contains
the user's updates. With the ALPS spec, starting from the root URL (which is the
only predefined URL in an ideal hypermedia API), we would need to do a minimum
of 4 HTTP requests. That would lead to some very tedious client code:

```ruby
require 'nokogiri'
require 'open-uri'

USERNAME = "carols10cents"
BASE_URI = "https://rstat.us/"

def find_a_in(html, params = {})
  raise "no rel specified" unless params[:rel]

  # This XPath is necessary because @rels could have more than one value.
  link = html.xpath(
    ".//a[contains(concat(' ', normalize-space(@rel), ' '), ' #{params[:rel]} ')]"
  ).first
end

def resolve_relative_uri(params = {})
  raise "no relative uri specified" unless params[:relative]
  raise "no base uri specified" unless params[:base]

  (URI(params[:base]) + URI(params[:relative])).to_s
end

def request_html(relative_uri)
  absolute_uri = resolve_relative_uri(
    :relative => relative_uri,
    :base     => BASE_URI
  )
  Nokogiri::HTML::Document.parse(open(absolute_uri).read)
end

# Request the root URL
# HTTP Request #1
root_response = request_html(BASE_URI)

# Find the `a` with `rel=users-search` and follow its `href`
# HTTP Request #2
users_search_path = find_a_in(root_response, :rel => "users-search")["href"]
users_search_response = request_html(users_search_path)

# Fill out the `form` that has `class=users-search`,
# putting the username in the `input` with `name=search`

search_path = users_search_response.css("form.users-search").first["action"]
user_lookup_query = "#{search_path}?search=#{USERNAME}"

# HTTP Request #3
user_lookup_response = request_html(user_lookup_query)

# Find the search result beneath `div#users ul.search li.user` that has
# `span.user-text` equal to the username
search_results = user_lookup_response.css("div#users ul.search li.user")

result = search_results.detect { |sr|
  sr.css("span.user-text").text.match(/^#{USERNAME}$/i)
}

# Follow the `a` with `rel=user` within that search result
# HTTP Request #4
user_path = find_a_in(result, :rel => "user")["href"]
user_response = request_html(user_path)

# Extract the user's updates using the update attributes.
updates = user_response.css("div#messages ul.messages-user li")
puts updates.map { |li| li.css("span.message-text").text.strip }.join("\n")
```

This workflow could be cached so that the next time we try to get a user's
updates, we wouldn't have to make so many requests. The first two
requests for the root page and the user search page are unlikely to change
often, so when we get a new username we can start with the construction
of the `user_lookup_query` with a cached `search_path` value. That way, we would
only need to make the last two requests to look up subsequent users.
However, if the root page or the user search page do change, subsequent 
requests could fail. In that case, we'd need error handling code that clears 
the cache and and starts from the root page again. Unfortunately, doing 
so would make the client code even more complicated.

We could simplify things by extending the ALPS spec to include a URI
template on the root page with a `rel` attribute to indicate that it's a
transition to information about a user when the template is filled out with
the username. The ALPS spec path would still work, but the shortcut would
allow clients to get at this data in fewer requests.
However, since it wouldn't be an official part of the spec, we'd need to
document it, and all clients that wanted to remain compatible with ALPS would
still need to implement the long way of doing things.

As you can see, there are significant tradeoffs between the two API styles,
and so it isn't especially easy to decide what to do. But because rstat.us
really needs an API in order to be a serious alternative to Twitter, we must 
figure out a way forward!

### Making a decision

After weighing all these considerations, we've decided to concentrate first on
implementing a Twitter-compatible JSON API, because it may allow our users
to interact with rstat.us using the clients they are already familiar with. Even
if those clients end up requiring some modifications, having an API that is easily 
understood by many developers will still be a big plus. For the long term, having a more flexible and
scalable solution is important, but those problems won't need to be solved
until there is more adoption. We may implement a hypermedia API (probably an
extension of the ALPS spec) in the future, but for now we will take the
pragmatic route in the hopes that it will encourage others to use
rstat.us and support its development.

### References

- [rstat.us](http://rstat.us) and its [code on github](https://github.com/hotsh/rstat.us)
- [ALPS microblogging spec](http://amundsen.com/hypermedia/profiles/)
- [Designing Hypermedia APIs](http://designinghypermediaapis.com) by Steve Klabnik
- [A Shoes hypermedia client for ALPS microblogging](https://gist.github.com/2187514)
- [Twitter API docs](https://dev.twitter.com/docs/api)
- [REST APIs must be hypertext-driven](http://roy.gbiv.com/untangled/2008/rest-apis-must-be-hypertext-driven)


================================================
FILE: articles/v5/005-process-spawning-patterns.md
================================================
*This article was contributed by [Jesse Storimer](http://jstorimer.com). He is
the author of [Working with Unix Processes](http://workingwithunixprocesses.com)
and [Working with TCP Sockets](http://workingwithtcpsockets.com), a pair of
ebooks providing fundamental Unix knowledge to Ruby developers. When he's not at
the keyboard, he's often enjoying the great Canadian outdoors with his family.*

Like many of you, I discovered Ruby via Rails and web development. That was my
"in." But before it was popular for writing web apps, Ruby was known for its
object-oriented fundamentals and for being a great scripting language. One of the reasons for
this latter benefit is that it's so easy to marry Ruby with command-line
utilities. Here's an example:

```ruby
task :console do
  `irb -r my_app`
end
```

There's something simple and beautiful in the combination of Ruby and the
command line here--the backticks are barely detectable. This code will technically 
accomplish what you think it will: it will drop you into an app-specific console  that is
basically an `irb` session with your app already required. But do you know what's 
going on inside that backtick method? 

Ruby provides many ways of spawing processes. Why use backticks instead of
`system`?

```ruby
task :console do
  system('irb -r my_app')
end
```

Or what about `exec`? Would that have been better?

```ruby
task :console do
  exec('irb', '-r', 'my_app')
end
```

In order to make this decision, you need to understand what these methods are
doing under the hood. The differences may be trivial for spawning a development
console, but picking one of these methods over another in a production environment can
have major implications.

In this article, we're going to reimplement the key parts of these process-spawning
primitives to get a better understanding of how they work and where they're most
applicable. Afterward, you'll have a greater understanding of how process
spawning works regardless of programming language and you'll have a grip on
which methods are most applicable in different situations.

## Starting somewhere

I have already hinted at a few different process-spawning methods--Ruby has
a ton of them. Off the top of my head, there's: `Kernel#system`,
<code>Kernel#\`</code>, `IO.popen`, `Process.spawn`, `open`, `shell`, `open3`,
`pty`, and probably more. All of these ship with Ruby, some in the core and
others in the standard library.

All of these spawning methods boil down to the same pattern, but we're not going
to implement them all. To save time, we'll stick with implementing `system` and
the backtick method. Either of these methods can be called
with a shell command as the argument. Both handle the command in slightly
different ways with slightly different outputs:

``` 
system('ls -l') #=> true
system('ls -l *.rb | ack Product') #=> true
system('boohoo') #=> nil
`git log -n1 --format=%h^` #=> 51e7a1c
`hostname` #=> jessebook
```

Let's start building them.

## Harnessing ourselves with tests

Before we dive into spawning process head first, let's rein ourselves in a
bit. If we're going to reimplement what Ruby already provides, we're going to
need a way to test our implementation and make sure that it performs the same
way that Ruby does. Enter [Rubyspec](http://rubyspec.org).

> The RubySpec project aims to write a complete executable specification for the
> Ruby programming language that is syntax-compatible with RSpec. RSpec is
> essentially a DSL (domain-specific language) for describing the behavior of
> code. This project contains specs that describe Ruby language syntax, core
> library classes, and standard library classes.

RubySpec provides a specification for the Ruby language itself, and we want to
reimplement a part of the Ruby language; therefore, we can use RubySpec
to test our implementation.

To use these specs to drive our implementation, we need to get two
things: RubySpec itself, and its testing library mspec. You can check
out [this README](https://github.com/rubyspec/rubyspec/blob/master/README) 
for installation instructions. To verify that things are working as 
expected, try running the kernel tests from within the RubySpec project
directory:

```bash
$ mspec core/kernel
```

To run our custom code against these tests, we can use
the familiar `-r` option with `mspec` to require a file that redefines
the methods we want to override. Let's do that, while at the same time 
running the `Kernel.system` specs:

```bash
$ touch practicing_spawning.rb
$ mspec -r ./practicing_spawning.rb core/kernel/system_spec.rb
```

Should be all green so far!

## Breaking the test

Let's begin our implementation by causing the tests to fail:

```ruby
# practicing_spawning.rb
module Kernel
  def system(*args)
  end

  private :system
end
```

The very first spec says that `system` should be private. I set that up right
away because it's not the interesting part. If we run the `system` specs again,
we get our first of several failures:

```console
1)
Kernel#system executes the specified command in a subprocess FAILED
Expected (STDOUT): "a\n"
          but got: ""
```

This failure directly relates to the following spec:

```ruby
it "executes the specified command in a subprocess" do
  lambda { @object.system("echo a") }.should output_to_fd("a\n")
end
```

If you've ever used the `system` method, this test should be easy to
understand. It says that shelling out to `echo` should output the echoed string.
If you [dig into](https://github.com/rubyspec/mspec/blob/master/lib/mspec/matchers/output_to_fd.rb#L68-70)
 the `output_to_fd` method that's part of `mspec`, you'll see that it's
expecting this output on `STDOUT`.

## fork and subprocesses

The failing spec title says that `system` spawns a subprocess. If you're
creating new processes on a Unix system, that means using `fork`:

> ------------------------------------------------------------------------------
>   Kernel.fork  [{ block }]   -> fixnum or nil  
>   Process.fork [{ block }]   -> fixnum or nil
>    
> ------------------------------------------------------------------------------
> 
> Creates a subprocess. If a block is specified, that block is run in the
> subprocess, and the subprocess terminates with a status of zero. Otherwise,
> the fork call returns twice, once in the parent, returning the process ID of
> the child, and once in the child, returning nil.

This bit of Ruby documentation gives you an idea of what `fork` does. It's
conceptually similar to going on a hike and coming to a fork in the trail. The
trail represents the execution of a process over time. Whereas humans can only
pick one path, when a process is forked it literally continues down both
branches of the trail in parallel. What was one process becomes two independent 
processes. This behavior is specified by the fork(2) manpage:

> Fork() causes creation of a new process.  The new process (child process) is
> an exact copy of the calling process (parent process) [...]

When you `fork`, you start with one process and end up with two processes that
are *exactly the same*. In some cases, this means that everything is copied from
one process to the other. But if [copy-on-write
semantics](http://en.wikipedia.org/wiki/Copy-on-write) are implemented,
the two processes may physically share memory until one of them tries to
modify it; then each gets its own copy written out.

Although understanding `fork` is certainly helpful, we still haven't quite figured
out how to implement the `system` method. We know that we can take our Ruby 
process and create a copy of it with `fork`, but how do we then turn the 
new child process into an `echo` process?

## fork + exec

The `fork` + `exec` pattern for spawning processes is the blueprint upon which
most process spawning is built. We've already looked at `fork`, so what
about `exec`?

`exec` transforms the current process into another process. Using
`exec`, you can transform a Ruby process into an `ls` process, another Ruby
process, or an `echo` process:

```ruby
puts 'hi from Ruby'
exec('ls')
puts 'bye from Ruby' # will never be reached
```

This program will never get to the last line of Ruby code. Once it has performed
`exec('ls')`, the Ruby program no longer exists. It has been transformed to `ls`.
So there's no possible way for it to get back to this Ruby program and finish
execution.

## Finally, a passing test

With `fork` and `exec`, we now have the building blocks that we need to implement
our own `system` method. Here's the most basic implementation:

```ruby
# practicing_spawning.rb
module Kernel
  def system(*args)

    # Create a new subprocess that will just exec the requested program.
    pid = fork { exec(*args) }

    # Because fork() allows both processes to work in parallel, we must tell the
    # parent process to wait for the child to exit. Otherwise, the parent would
    # continue in parallel with the child and would be unable to process its
    # return value.
    _, status = Process.waitpid2(pid)
    status.success?
  end

  private :system
end
```

If we run this against the same spec as before, more tests pass, but
not all of them. Still, getting that initial spec to pass means that we're headed
in the right direction.

There are three very simple Unix programming primitives in use here: `fork`,
`exec`, and `wait`. We've already talked about `fork` and `exec`, the
cornerstone of Unix process spawning. The third player here, `wait`, is often
used in unison with these two. It tells the parent process to wait for the child
process before continuing, rather than continuing execution in parallel. This is
a pretty common pattern when spawning shell commands, because you usually want to
wait for the output of the command.

In this case, we collect the status of the child when it exits and return the
result of `success?`. This result is `true` for a successful exit status code (i.e., 0)
and `false` for any other value.

## Getting back to green

Now we need to get the rest of the `system` specs passing. In
the remainder of the failures, we see the following output:

```console
1) 
Kernel#system returns nil when command execution fails FAILED
Expected false to be nil
<snipped backtrace...>

2)
Kernel#system does not write to stderr when command execution fails FAILED
Expected (STDERR): ""
         but got: "/[...]/practicing_spawning.rb:8:in `exec': No such 
         file or directory - sad (Errno::ENOENT)
<snipped backtrace...>
```

These failures relate to the following specs:

```ruby
ruby_version_is "1.9" do
  it "returns nil when command execution fails" do
    @object.system("sad").should be_nil
  end
end

it "does not write to stderr when command execution fails" do
  lambda { @object.system("sad") }.should output_to_fd("", STDERR)
end
```

Both of these specs are testing the same situation: trying to `exec` a command
that doesn't exist. When this happens, it actually raises an exception in
the subprocess, as is evidenced by the previously listed failure #2, which prints an
exception message along with a stacktrace on its `STDERR`, whereas the spec
expected that `STDERR` would be empty.

So when the subprocess raises an exception, we need to notify the parent process
of what went wrong. Note that we can't use Ruby's regular exception handling in
this case because the exception is happening inside the subprocess. The
subprocess got a copy of everything that the parent had, including the Ruby
interpreter. So although all of the code is sourced from the same file, we can't
depend on regular Ruby features because the processes are actually running on
their own separate copies of the Ruby interpreter!

To solve this problem, we need some form of interprocess communication (IPC).
Keeping with the general theme of this article, we'll use a Unix pipe.

## The pipe

A call to `IO.pipe` in Ruby will return two `IO` objects, one readable and
one writable. Together, they form a one-way data 'pipe'. Data is written
to one `IO` object and read from the other:

```ruby
rd, wr = IO.pipe
wr.write "ping"
wr.close

rd.read #=> "ping"
```

A pipe can be used for IPC by taking advantage of `fork` semantics. If you
create a pipe before forking, the child process inherits a copy of the pipe
from its parent. As both have a copy, one process can write to the pipe while
the other reads from it, enabling IPC. Pipes are
backed by the kernel itself, so we can use them to communicate between our independent
Ruby processes.

## Implementing system() with a pipe

Now we can roll together all of these concepts and write our own implementation
of `system` that passes all the specs:

```ruby
# practicing_spawning.rb
module Kernel
  def system(*args)

    rd, wr = IO.pipe

    # Create a new subprocess that will just exec the requested program.
    pid = fork do
      # The subprocess closes its copy of the reading end of the pipe
      # because it only needs to write.
      rd.close

      begin
        exec(*args)
      rescue SystemCallError

        # In case of failure, write a byte to the pipe to signal that an exception
        # occurred and exit with an unsuccessful code.
        wr.write('.')
        exit 1
      end
    end

    # The parent process closes its copy of the writing end of the pipe
    # because it only needs to read.
    wr.close

    # Tell the parent to wait.
    _, status = Process.waitpid2(pid)

    # If the reading end of the pipe has no data, there was no exception
    # and we fall back to the exit status code of the subprocess. Otherwise,
    # we return nil to denote the error case.
    if rd.eof?
      status.success?
    else
      nil
    end
  end

  private :system
end
```

All green!

## Implementing backticks

Now that you've got the fundamentals under your belt, we can apply these concepts to the
implementation of other process-spawning methods. Let's do backticks:

```ruby
# practicing_spawning.rb
module Kernel
  def `(str)
    rd, wr = IO.pipe

    # Create a new subprocess that will exec just the requested program.
    pid = fork do
      # The subprocess closes its copy of the reading end of the pipe
      # because it only needs to write.
      rd.close

      # Anything that the exec'ed process would have written to $stdout will
      # be written to the pipe instead.
      $stdout.reopen(wr)

      exec(str)
    end

    # The parent process closes its copy of the writing end of the pipe
    # because it only needs to read.
    wr.close

    # The parent waits for the child to exit.
    Process.waitpid(pid)

    # The parent returns whatever it can read from the pipe.
    rd.read
  end

  private :`
end
```

Now we can run the backticks spec against our implementation and see that it's
all green!

```console
$ mspec -r ./practicing_spawning.rb core/kernel/backtick_spec.rb
```

The full source for our `practicing_spawning.rb` file is available [as a gist](https://gist.github.com/3730986). 

## Closing notes

I find something special in spawning processes. You get to dig down
below the top layer of your programming language to the lower layer where All
Things Are One. When dealing with things such as `fork`, `exec`, and `wait`, your
operating system treats all processes equally. Any Ruby process can transform
into a C program, or a Python process, or vice versa. Similarly, you can `wait`
on processes written in any language. At this layer of abstraction, there are only
the system and its primitives.

We spend a lot of our mental energy worrying about good principles such as
abstraction, decoupling, efficiency. When digging down a layer and learning what
your operating system is capable of, you see an extremely robust and abstract
system. It cares not how you implement your programs but offers the same
functionality for any running program. Understanding your system at this level
will really show you what it's capable of and give you a good mental
understanding of how your system sees the world. Once you really grasp the
`fork` + `exec` concepts, you'll see that these are right at the core of a Unix system.
Every process is spawned this way. The simplest example is your shell, which uses
this very pattern to launch programs.

I'll leave you with two more tips:

1. Use `exec()` at the end of scripts to save a process. Remember the early example
in which a rake task spawned an `irb` session? The obvious
choice in that case is to use `exec`.

    Any other variant will require forking a new process that then execs and
    has the parent wait for it. Using `exec` directly eliminates the need for an extra
    process by transforming the `rake` process directly into an `irb` process.
    This trick obviously won't work in situations where you need to shell out and then
    work with the output, but keep it in mind if the last line of your script
    just shells out.

2. Pass an `Array` instead of a `String`. The backticks method always takes a
string, but the `system` method (and many other process spawning methods) will
take an array or a string. 

    When passed a string, `exec` may spawn a shell to interpret the
    command, rather than executing it directly. This approach is handy for stuff like
    `system('find . | ack foobar -l')` but is very dangerous when user input is
    involved. An unescaped string makes shell injection possible. Shell
    injection is like SQL injection, except that a compromised shell could provide an
    attacker with root access to your entire system! Using an array will never
    spawn a shell but will pass the elements directly as the `ARGV` of the exec'ed process. 
    Always do this.

Finally, if you enjoyed these exercises, try to implement some of
the other process spawning primitives I mentioned. With RubySpec as your guide,
you can try reimplementing just about anything with confidence. Doing so will
surely give you a better understanding of how process spawning works in Ruby--or 
any Unix environment.

Please leave a comment and share your code if you implement some pure-Ruby versions 
of these spawning methods. I'd love to see them!


================================================
FILE: articles/v5/006-service-quality.md
================================================
Software projects need to evolve over time, but they also need to avoid
collapsing under their own weight. This balancing
act is something that most programmers understand, but it is often 
hard to communicate its importance to nontechnical stakeholders. 
Because of this disconnect, many projects operate under the
false assumption that they must stagnate in order to stabilize. 

This fundamental misconception about how to maintain a stable codebase has some
disastrous effects: it causes risk-averse organizations to produce stale 
software that quickly becomes irrelevant, while risk-seeking organizations ship 
buggy code in order to rush features out the door faster than their 
competitors. In either case, the people who depend on the software produced by
these teams give up something they shouldn't have to.

I have always been interested in this problem, because I feel it is at the 
root of why so many software projects fail. However, my work on Practicing Ruby
has forced me to become much more personally invested in solving it. As someone
attempting to maintain a very high-quality experience on a shoestring budget, I
now understand what it is like to look at this problem from a stakeholder's
point of view. In this article, I will share the lessons that Jordan Byron and 
I have learned from trying to keep Practicing Ruby's web application stable as
it grows.

### Lesson 1: Work incrementally

Inspired by [Lean Software Development][lean] practices, we now
view all work-in-progress code as a form of waste. This way of looking at things 
has caused us to eschew iteration planning in favor of shipping a single
improvement or fix at a time. This workflow may seem a bit unrealistic at 
first glance, but with some practice it gets easier to break very 
complicated features into tiny bite-sized chunks. We now work this way by
habit, but our comment system was the first thing we approached in 
this fashion.

When we first implemented comments, we had Markdown support, but not much else. 
Later, we layered in various improvements one by one, including syntax 
highlighting, email notifications, Twitter-style mentions, and Emoji support. 
With so little development time available each week, it would have taken 
months to ship our discussion system if we attempted to build it all at once.
With that in mind, our adoption of a Lean-inspired deployment strategy was not just a
workflow optimization—it was an absolute necessity. We also eventually came to 
realize that this constraint was a source of strength rather than weakness for
us. Here's why:

> Developing features incrementally reduces the number of moving parts 
to integrate on each deploy. This reduction in turn limits the number of new
defects introduced during development.

> When new bits of functionality do fail, finding the root cause of the problem is usually easy, and even when it isn't, rolling the system
back to a working state is much less traumatic. Together, these approaches result in
a greatly reduced day-to-day maintenance cost, which means that more time can
be spent on value-producing work.

As you read through the rest of the guidelines in this article, you'll find that
although they are useful on their own, they are made much more effective by this
shift in the way we ship things.

### Lesson 2: Review everything

It is no secret that code reviews are useful for driving up quality and
reducing the number of defects that are introduced into production in the first
place. However, figuring out how to conduct a good review is something that
takes a bit of fine-tuning to get right. Here is the set of steps we eventually
settled on:

1. The reviewer attempts to actually use the new feature while its developer 
answers any questions that come up along the way. Whenever an unanticipated
edge case or inconsistency is found, we immediately file a ticket for it. We
repeat this process until all open questions or unexpected issues have been 
documented.

    Unless the feature's developer has specific technical questions for the
    reviewer, we don't bother with in-depth reviews of implementation details until
    all functional issues have been addressed. This prevents us from spending time
    on bikeshed arguments about refactorings or hypothetical sources of
    failure at the code level. Doing things this way also reminds us that the
    external quality of our system is our highest priority and that although clean
    code makes building a better product easier, it is a means, not an end in itself.

2. Once a feature seems to work as expected by both the developer and
the reviewer, we next turn our attention to the tests. It is the
reviewer's responsibility to make sure that the tests cover the issues brought
up during the review and to verify that they exercise the 
feature well enough to prevent it from silently breaking. Sometimes the reviewers will ask the developer
of the feature to write the tests; other times it is easier for the reviewers to
write the tests themselves rather than trying to explain what is needed. 

    In either case, the end result of this round of changes is that the feature's
    requirements become clearer as the tests are updated to cover more subtle
    details. Because many of these tests can be written at the UI level, it is
    common to have not yet discussed implementation details at this stage of a
    review.

3. By now, the feature is tested well enough, and its functionality has been 
exercised more than a few times. That means that a spot check of its source code 
is in order. The goal is not to make the code perfect, 
but to identify both low-cost improvements that can be done right away 
and any serious warning signs of potential problems that may make the 
code hard to maintain or prone to error. Everything else is 
something that can be dealt with later—if and when a feature needs to be 
improved or modified.

Even though these items are listed in order, it's better to think of them as layers
rather than procedural steps. You need to start at the outermost layer, then
dig down as needed to fully answer each question that comes up during a review.
This may sound like a very rigorous procedure, but it isn't as daunting as it
seems. You can get an idea of what this process looks like in practice by reading through 
the conversation on [this pull request][pr-76]. Here's why we invest the extra
effort:

> Reviewing functionality first, tests second, and
> implementation last helps ensure that the right kinds of
> conversations happen at the right time. If a feature isn't implemented
> correctly or is poorly usable, it doesn't matter how well written its tests
> are. Likewise, if test coverage is inadequate, it isn't wise to recommend 
> major refactorings to production code. This simple prioritization keeps
> the focus on improving the *application* rather than the *implementation*.

Even with a very good review process, bad things still happen. That's
why the remaining four lessons focus on what to do when things go wrong, but keep
in mind that actively reviewed projects help prevent unexpected failures from
happening in the first place.

### Lesson 3: Stay alert

When something breaks, we want to know about it as soon as possible.
We rely on many different ways of detecting problems, and we automate as much as
we can.

Our first line of defense is our continuous integration (CI) system. 
We use [Travis CI][travis], but for our purposes pretty much any CI tool would
work. Travis does a great job of catching environmental issues for us: things
such as unexpected changes in dependencies, application configuration problems,
platform-specific failures, and other subtle things that would be hard to 
notice in development. More important, it helps protect us from ourselves: even if we 
forget to run the entire test suite before pushing a set of changes, 
Travis never forgets, and will complain loudly if we've broken the 
build. Most of the mistakes that CI can detect are quite 
trivial, but catching them before they make it to production helps us 
keep our service stable.

For the bugs that Travis can't catch (i.e., most of them), we rely on
the [Exception Notifier][exception-notification] plugin for Rails. Most
notification systems would probably do the trick for us, but we like that Exception
Notifier is email-based; it fits into our existing workflow nicely. The default
template for error reports works great for us because it provides everything
you tend to get from debugging output during development: session data,
environment information, the complete request, and a stack trace. If we start to
notice exception reports rolling in soon after we've pushed a change to the system, this
information is usually all we need in order to find out what caused the problem.

Whenever we're working on features that are part of the critical path of our
application, we tend to use the UNIX `tail -f` command to watch our production 
logs in real time. We also occasionally write ad hoc reports that give us 
insight into how our system is working. For example, we built the following 
report to track account statuses when we rolled out a partial replacement 
for our registration system. We wanted to make sure it was possible for folks to
successfully make it to the "payment pending" status, and the report showed
us that it was:

![Account status report](http://i.imgur.com/NOI0A.png)

Our proactive approach to error detection means that we can rely less on
bug reports from our subscribers and more on automated reports and alerts. This approach
works fairly well most of the time, and we even occasionally send messages
to people who were affected by bugs with either an apology and a note that we
fixed the problem, or a link to a ticket where they can track our progress on
resolving the issue. We do display our email address on all of our error pages,
but we place a high priority on making sure that subscribers need to use
it only to provide extra context for us, rather than to notify us that a
problem exists.

Before we move on to the next lesson, here are a few things to remember about
this topic:

> The main reason to automate error detection as much as possible is that the
people who use your application should not be treated like unpaid QA testers.
The need for an active conversation with your users every time something goes
wrong is a sign that you have poor visibility into your application's failures,
and it will pay off to fix this problem. However, every automated error
detection system requires some fine-tuning to get it right, and you may need
to make pragmatic compromises from time to time.

> Automated error detection is almost always a good thing: the main question is how extensive
> you want it to be. For small projects, something as simple as maintaining a
> detailed log file is enough; for larger projects, much more sophisticated
> systems are needed. The key is to choose a strategy that works for your
> particular context, rather than trying to find a one-size-fits-all
> solution.

If automated error detection interests you, please post a
comment about your experiences after you finish reading this article. It
is a very complex topic, and I feel like I've only scratched the surface
of it in my own work, so I'd love to hear some stories from our readers.

### Lesson 4: Roll back ruthlessly

Working on one incremental improvement at a time makes it easy 
to revert newly released functionality immediately if we find 
out that it is defective. At first, we got into the habit of 
rolling things back to a stable state because we didn't
know when we'd get around to fixing the bugs we encountered. Later, we discovered that this approach allows us to take
our time and get things right rather than shipping quick
fixes that felt like rushed hacks.

In order to make rollbacks painless, good revision control processes
are essential. We started out by practicing [GitHub Flow][gh-flow]
in its original form, which consisted of the following steps:

1. Anything in the master branch is deployable.

2. To work on something new, create a descriptively named branch off of the master.

3. Commit to that local branch and regularly push your work to the server.

4. When you need feedback or help, or you want to merge, open a pull request.

5. After someone else has reviewed the feature, you can merge it into the master.

6. Once it is merged and pushed to the master, you can and should deploy
immediately.

Somewhere down the line, we made a small tweak to the formula by deploying
directly from our feature branches before merging them into the master branch. This
approach allows every improvement we ship to get some live testing time in
production before it gets merged, greatly increasing the stability
of our mainline code. Whenever trouble strikes, we redeploy from our master branch, 
which executes a rollback without explicitly reverting any
commits. As it turns out, this approach is very similar to [GitHub's more recent
deployment practices][gh-deploy-aug-2012], minus their fancy robotic
helpers.

Although this process significantly reduces the amount of defects on our master branch,
we do occasionally come across failures that are in old code rather than in our
latest work. When that happens, we tend to fix the issues directly on the master, 
verify that they work as expected in production, and then attempt to merge those changes into any active
feature branches. Most of the time, these merges can be cleanly applied, so
it doesn't interrupt our work on new improvements all that much. But when things
get messy, it is a reminder for us to take a step back and look at the big
picture:

> In a healthy system, rollbacks should be easy, particularly when feature
branches are used. When this process does not go smoothly, it is usually a 
sign of a deeper problem:

> 1) If lots of bugs need to be fixed on the master branch,
it is a sign that features may have been merged prematurely, or that
the application's integration points have become too brittle and need some 
refactoring.

> 2) If a new feature repeatedly fails in production despite attempts to fix
it, it may be a sign that the feature isn't very well thought out and that
a redesign is in order.

> Although neither of these situations are pleasant to deal with, addressing them
right away helps prevent them from spiraling out of control. This approach makes sure
that small flaws do not evolve into big ones and minimizes the project's
pain points over the long haul.

Despite these benefits, this practice does feel a bit ruthless at times, 
and it definitely takes some getting used to. However, by treating rollbacks 
as a perfectly acceptable response to a newly discovered defect rather 
than an embarrassing failure, a totally different set of priorities are 
established that help keep things in a constant state of health. 

### Lesson 5: Minimize effort

Every time we find a defect in one of our features, we ask ourselves whether
that feature is important enough to us to be worth fixing at all. Properly
fixing even the simplest bugs takes time away from our work on other
improvements, so we are tempted to cut our losses by removing
defective code rather than attempting to fix it. Whether we can get away with
that ultimately depends on the situation.

---

**Critical defects:** Sometimes bugs are severe enough that they need to be dealt with right away,
and in those cases we [stop the line][autonomation] to give the issue the
attention it deserves. The best example of this practice that we've encountered in recent
times was when we neglected to update our omniauth dependency before GitHub shut
down an old version of their API, which disabled logins temporarily for all
Practicing Ruby subscribers. We had an emergency fix out within hours, but it
predictably broke some stuff. Over the next couple days, we added fixes for
the edge cases we hadn't considered until the system stabilized again. Because
this wasn't the kind of defect we could easily work around or roll back from, we
were working under pressure, and attempting to work on other things during that
time would have just made matters worse.

**Trivial defects:** At the other extreme, some bugs are so easy to fix that it makes
sense to take care of them as soon as you notice them. A few weeks before this
article was published, I noticed that our broadcast email system was treating our
plain-text messages as if they were HTML that needed to be escaped, which caused
some text to be mangled. If you don't count the accompanying test, fixing this
problem was [a one-line change][htmlescape] to our production code. Tiny bugs 
like this should be fixed right away to prevent them from accumulating 
over time.

**Moderate defects:** Most of the bugs we discover fall somewhere 
between these two extremes, and figuring out how to deal with them is not nearly so
straightforward. We've gradually learned that it is
better to assume that a feature can be either cut or simplified and then try to
prove ourselves wrong rather than thinking that it absolutely must be fixed.

One area where we failed to keep things simple at first was in our work on
account cancellation. Because we were in the middle of a transition to a 
new payment provider, this feature ended up being more complicated to
implement than we expected. After several hours of discussion and
development, we ended up with something that almost worked but still had
many kinks to be ironed out. Almost immediately after we deployed the feature
to production, we noticed that it wasn't working as expected and
immediately rolled it back.

We thought for some time about what would be needed in order to fix the
remaining issues and eventually came to realize that we had overlooked an
obvious shortcut: instead of fully automating cancellations, we could make it so
the unsubscribe button sent us an email with all the details necessary to close
accounts upon request. This process takes only a few seconds to do manually and
happens only a few times a week. Most important, the semi-automatic
approach was easy to understand with few potential points of failure and could
be designed, implemented, and tested in less time than it took for us to think
through the issues of the more complicated system. In other words, it required
less effort to ship this simple system than it would have taken to fix the
complicated one, so we scrapped the old code.

---

Every situation is different, but hopefully these examples have driven home the
point that dealing with bugs requires effort that might or might not be better spent
elsewhere. In summary:

> Critical flaws and trivial errors both deserve immediate attention: the former
> because of their impact on people, the latter due to the fact that they get
> harder to fix as they accumulate. Unfortunately, most bugs are in-between these two extremes and must be evaluated on a case-by-case basis.

> You can't just decide whether a bug is worth fixing based on the utility of the
> individual feature it affects: you need to think about whether your time would be
> better spent working on other things. It is worth resolving defects only
> if the answer to that question is "No!". Even if it is emotionally challenging
> to do so, sometimes it makes sense to kill off a single buggy feature if doing
> so improves the overall quality of your system.

Of course, if you do decide to fix a bug, you need to do what you can to prevent
that time investment from going to waste. Regression testing can help with that,
and that's why we've included it as the sixth and final lesson in this article.

### Lesson 6: Prevent regressions 

One clear pattern that time has taught us is that all bugs that are not covered by
a test eventually come back. To prevent this from happening, we
try to write UI-level acceptance tests to replicate defects as the first step 
in our bug-fixing process rather than the last.

Adopting this practice was very tedious at first. Even though [Capybara][capybara]
made it easy to simulate browser-based interactions with our application, 
dropping down to that level of abstraction every time we found a new
defect both slowed us down and frustrated us. We eventually realized that we
needed to reduce the friction of writing our tests if we wanted this good habit
to stick. To do so, we started to experiment with some ideas I hinted at
back in [Issue 4.12.1][pr-4.12.1]: application-specific helper objects for 
end-to-end testing. We eventually ended up with tests that look something like
the following example:

```ruby
class ProfileTest < ActionDispatch::IntegrationTest
  test "contact email is validated" do
    simulate_user do
      register(Support::SimulatedUser.default)
      edit_profile(:email => "jordan byron at gmail dot com")
    end

    assert_content "Contact email is invalid"
  end

  # ...
end
```

If you strip away the syntactic sugar that the `simulate_user` method provides,
you'll find that this is what is really going on under the hood:

```ruby
test "contact email is validated" do
  user = Support::SimulatedUser.new(self)

  user.register(Support::SimulatedUser.default)
  user.edit_profile(:email => "jordan byron at gmail dot com")

  assert_content "Contact email is invalid"
end
```

Even without reading the [implementation of Support::SimulatedUser][simulated-user],
you have probably already guessed that it is a simple wrapper around Capybara's
functionality that provides application-specific helpers. This object provides
us with two main benefits: reduced duplication in our tests, and a vocabulary
that matches our application's domain rather than its delivery mechanism. The
latter feature is what reduces the pain of assembling tests to go along 
with our bug reports.

Let's take a moment to consider the broader context of how this email
validation test came into existence in the first place. Like many changes we
make to Practicing Ruby, this particular one was triggered by an exception
report that revealed to us that we had not been sanity-checking email 
addresses before updating them. This problem was causing a 500 error to be 
raised rather than failing gracefully with a useful failure message, which pretty
much guaranteed a miserable experience for anyone who encountered it. The steps
to reproduce this issue from scratch are roughly as follows:

1. The user registers for Practicing Ruby.
2. The user attempts to edit his or her profile with a badly formatted email address.
3. The user *should* see a message saying that the email is invalid but
instead encounters a 500 error and a generic "We're sorry, something went wrong"
message.

If you compare these steps to the ones that are covered by the test, you'll see
that they are almost identical to one another. Although the verbal description is
something that may be easier to read for nonprogrammers, the tests communicate
the same idea at nearly the same level of abstraction and clarity to anyone who
knows how to write Ruby code. Because of this, it isn't as easy for us to
come up with a valid excuse for not writing a test or putting it off until
later.

Of course, old habits die hard, and occasionally we still cut corners when
trying to fix bugs. Every time we encounter an interaction that our
`SimulatedUser` has not yet been programmed to handle, we experience the same
friction that makes it frustrating to write acceptance tests in the first place.
When that happens, it's tempting to put things off or to cobble together a test
in haste that verifies the behavior, but in a sloppy way that doesn't make
future tests easier to write. The lesson here is simple: even the most
disciplined processes can easily break down when life gets too busy or too
stressful.

To mitigate these issues, we rely once again on the same practice that allows 
us to let fewer bugs slip into production in the first place: active peer
review. Whenever one of us fixes a bug, the other person reviews it for quality and
completeness. This process puts a bit of peer pressure on both of us to not be sloppy
about our bug fixes and also helps us catch issues that would otherwise hide
away in our individual blind spots. 

In summary, this approach towards regression testing has taught us the following
lesson:

> Any time not spent hunting down old bugs or trying to pin down new ones is 
time that can be spent on value-producing work. Automated testing can really
help in this context, but only if the friction of writing new high-level tests
is minimized.

> Even with convenient application-level test helpers, it can still be tedious
to test behaviors that haven't been considered before, which makes it tempting
to cut corners or to leave out testing entirely in the hopes that someone else 
will get to it later. To keep us from doing this, bug fixes should be reviewed for 
quality just as improvements are, and their tests should be augmented as
needed whenever they seem to come up short.

It does require a little bit of will power, but this habit can work
wonders over time. The trick is to make practicing it as easy as 
possible so that it doesn't bog you down.

### Reflections

Do we follow all of these practices completely and consistently without fail? Of
course not! But we do try to follow them most of the time, and we have
found that they work best when done together. That's not to say
that removing or changing any one ingredient would spoil the soup, only that
it's hard for us to guess what their effects would be like in isolation.

It's important to point out that we adopted these ideas organically rather
than carefully designing a process for ourselves to rigidly follow. This article
is more of a description of how we viewed things at the time it was
published than a prescription for how people ought to approach all
projects all the time. We've found that it's best to maintain a consistent
broad-based goal (ours is to make the best possible user experience with the
least effort) and to continuously tweak your processes as needed to meet that
goal. Working habits need to be treated with a bit of fluidity because brittle
processes can kill a project even faster than brittle code can.

In the end, much of this is very subjective and context dependent. I've shared
what works for us in the hopes that it'll be helpful to you, but I want to
hear about your own experiences as well. Because our process is
nothing more than an amalgamation of good ideas that other people have come up
with, I'd love to hear what you think might be worth adding to the mix.

> **UPDATE**: Although this article recommends using `tail -f` to watch logs in real
> time, it may be [better to use less +F][less], because it makes scrollbacks
> easier and can resume real-time monitoring at any time. Thanks to @sduckett for the suggestion.

[mendicant]: http://mendicantuniversity.org
[travis]: http://about.travis-ci.org/docs/user/getting-started/
[lean]: http://en.wikipedia.org/wiki/Lean_software_development 
[exception-notification]: https://github.com/smartinez87/exception_notification
[gh-flow]: http://scottchacon.com/2011/08/31/github-flow.html
[capybara]: https://github.com/jnicklas/capybara
[pr-4.12.1]: http://practicingruby.com/articles/66
[simulated-user]: https://github.com/elm-city-craftworks/practicing-ruby-web/blob/f00f89b0a547829aea4ced523a3d23a136f1a6a7/test/support/simulated_user.rb
[autonomation]: http://en.wikipedia.org/wiki/Autonomation
[htmlescape]: https://github.com/elm-city-craftworks/practicing-ruby-web/commit/223ca92a0b769713ce3c2137de76a8f34f06647e
[gh-deploy-aug-2012]: https://github.com/blog/1241-deploying-at-github
[pr-76]: https://github.com/elm-city-craftworks/practicing-ruby-web/pull/76
[less]: http://blog.libinpan.com/2009/07/less-is-better-than-tail/


================================================
FILE: articles/v5/007-dependencies-notifications-and-adjustments.md
================================================
> **CREDIT:** Although this article is my own work, it is based on ideas I got from
> a very different but interesting [early draft][dna-draft] from Practicing Ruby reader
> [Mike Subelsky][subelsky]. I owe him a huge hat tip for suggesting that we cover
> this topic and for helping me get started with it.

The challenge of sensibly connecting a set of objects together to perform a
complex task is one that confounds programmers of all skill levels. In fact, 
it is hard to reason about the relationships between objects without getting 
trapped by analysis paralysis. With that in mind, it is no surprise that so
many of us struggle with this particular aspect of object-oriented programming. 

But like so many other problems we encounter in our work, this one can
be simplified greatly by introducing a common vocabulary and some rough
heuristics that make thinking and communicating about our code easier.
For reasoning about this particular design challenge, the 
"Object Peer Stereotypes" described in [Growing Object-Oriented Software, Guided
by Tests][GOOS] give us some very useful conceptual tools 
to work with.

In this article, we will explore the three stereotypical relationships 
between an object and its peers that were described in GOOS: 
dependencies, notifications, and adjustments. Taken together, these 
rough categorizations do a good job of identifying the kinds of
connections that exist between objects, which makes it easier
to develop a more nuanced view of how they communicate with each other.

The specific examples in this article are based on code from 
[Newman][newman] (my experimental email-based microframework), but the
general concepts that we'll discuss are relevant to all object-oriented 
software. If you keep your own projects in the back of your mind as you
read on, you'll easily find similarities between Newman's design 
challenges and your own.

## Dependencies

> Services that the object requires from its peers so it can perform its
> responsibilities. The object cannot function without these services. It should
> not be possible to create the object without them. (GOOS, pg. 52)

Whether they are internal or external, dependency relationships need to be
carefully managed in order to prevent brittleness. 
Alistair Cockburn's [ports and adapters][ports-and-adapters] pattern provides
one way of dealing with this problem: define abstract *ports* in the 
application's domain language that covers slices of functionality, and then build 
implementation-specific *adapters* with compatible interfaces. This approach allows dependencies 
to be reasoned about at a higher level of abstraction and makes it so that systems 
can be easily changed.

We applied this pattern (albeit without recognizing it by name) when thinking
through how Newman should handle its email dependency. We knew from the outset
that we'd need to support some sort of test mailer and that it should be a
drop-in replacement for its real mailer. We also anticipated that down the line
we might want to support delivery mechanisms other than the `mail` gem and
figured that some sort of adapter-based approach would be a good fit.

Constructing a port involves thinking through the various ways a 
subsystem will be used within your application and then 
mapping a protocol to those use cases. In Newman, we expected that
our email dependency would need to support the following requirements:

1) Read configuration data from a `Newman::Settings` object if necessary.

```ruby
mailer = AnyMailAdapter.new(settings)
```

2) Retrieve all messages from an inbox, deleting them from the server in the
process.

```ruby
mailer.messages.each do |message|
  do_something_exciting(message) 
end
```

3) Construct a complete message and deliver it immediately.

```ruby
mailer.deliver_message(:to      => "test@test.com",
                       :from    => "gregory@practicingruby.com",
                       :subject => "A special offer for you!!!",
                       :body    => "Send me your credit card number, plz!")
```

4) Construct a message incrementally and then deliver it later, if at all. 

```ruby
message = mailer.new_message(:to   => "test@test.com",
                             :from => "gregory@practicingruby.com")

if bank_account.balance < 1_000_000_000
  message.subject = "Can I interest you in some prescription painkillers?"
  message.body    = "Best prices anywhere on the internets!!!"
  messsage.deliver
end
```

Although you can make an educated guess about how to implement adapters
for this port based on the previous examples, there are many
unanswered questions lurking just beneath the surface. This is where
the difference between *interfaces* and *protocols* becomes important:

> An interface defines whether two things can fit together, a protocol 
defines whether two things can *work together* (GOOS, pg. 58)

If you revisit the code examples shown above, you'll notice that the interface
requirements for a Newman-compatible mail adapter are roughly as follows:

* The constructor accepts one argument (the settings object).
* The `messages` method returns an collection that responds to `each` and yields
an object for each message in the inbox.
* The `deliver_message` accepts one argument (a parameters hash).
* The `new_message` method accepts a parameters hash and returns
an object representing the message. At a minimum, the object allows certain fields
to be set (i.e., `subject` and `body`) and responds to a `deliver` method.

Building an object that satisfies these requirements is trivial, but there is
no guarantee that doing so will result in an adapter that conforms to the
*protocol* that Newman expects. Unfortunately, protocols are much harder
to reason about and define than interfaces are.

Like many Ruby libraries, Newman relies on loose [duck typing][duck typing] 
rather than a formal behavioral contract to determine whether one adapter can 
serve as a drop-in replacement for another. The `Newman::Mailer` object is used
by default, so it defines the canonical implementation that 
other adapters are expected to mimic at the functional level -- even if they 
handle things very differently under the hood. This implicit contract makes 
it possible for `Newman::TestMailer` to stand in for 
a `Newman::Mailer` object, even though it stores all incoming and 
outgoing messages in memory rather than relying on SMTP and IMAP. Because
the two objects respond to the same messages in similar ways, the systems
that depend on them are unaware of their differences in implementation -- they
are just two different adapters that both fit in the same port.

If you read through the source of the [Newman::Mailer][newman-mailer] 
and [Newman::TestMailer][newman-testmailer] objects, you will find that
several compromises have been made for the sake of convenience:

1. Arguments for the `new_message` and `deliver_message` methods on both 
adapters are directly delegated to the `Mail::Message` constructor, and the
return value of `messages` on both adapters is a collection 
of `Mail::Message` objects. This design implicitly ties the interface of those 
methods to the mail gem; it's what GOOS calls a *hidden dependency*.

2. The `Newman::TestMailer` object is a singleton object, but it
implements a fake constructor in order to maintain interface compatibility 
with `Newman::Mailer`. This is an example of how constraints 
from dependencies can spill over into client code.

3. Configuration data is completely ignored by `Newman::TestMailer`. Because
all of its operations are done in memory, it has no need for SMTP and IMAP
settings, but it needs to accept the settings object anyway for the 
sake of maintaining interface compatibility.

All of these warts stem from protocol issues. The first issue is due to
underspecification: Newman has a clear protocol for creating, retrieving, and
sending messages, but it does not clearly define what it expects the messages
themselves to look like. The coupling between the interface of `Newman::Mailer`
and that of `Mail::Message` makes it so that other adapters must also inherit
this hidden dependency. Because `Newman::TestMailer` also explicitly depends 
upon `Mail::Message`, this constraint does not complicate its implementation,
but it certainly does make it harder to build adapters that aren't dependent 
on the mail gem.

On the flip side, the second and third issues are a result of 
overspecification. We didn't want to make `Newman::TestMailer` a singleton, 
but because the underlying `Mail::TestMailer` is implemented that way,
we didn't have much of a choice. Our decision to implement a fake constructor
in order to maintain compatibility with `Newman::Mailer` is something I was
never happy with, but I also couldn't think of a better
alternative. I am somewhat less concerned about `Mailer::TestMailer` having to
accept a settings object that it doesn't actually use, but it does feel like one
extra hoop to jump through simply for the sake of consistency.

Despite these rough edges, Newman's way of handling its email dependency is a
good example of the [ports and adapters][ports-and-adapters] pattern in the 
wild. If anything, it serves as a reminder that the hard part of writing loosely
coupled code is not in the creation of duck-typed adapters, but in clearly
defining the protocol for our ports. This concept takes us beyond the idea of "coding to
an interface rather than an implementation" and is worth ruminating
over.

## Notifications

> Peers that need to be kept up to date with the object’s activity. The object
> will notify interested peers whenever it changes state or performs a
> significant action. Notifications are ‘fire and forget’; the object neither
> knows nor cares which peers are listening. (GOOS, pg. 52)

Because Ruby is a message-oriented programming language, it is easy to model
many kinds of object relationships as notifications. Doing so greatly reduces
the coupling between objects and helps establish a straight-line flow from a 
system's inputs to its outputs.

Notification-based modeling is especially useful when designing framework code,
because it is important for frameworks to know as little as possible about the
applications that are built on top of them. The general design of
the extremely popular [rack web server interface][rack] leverages these ideas to
great effect; it assumes that its applications implement a meaningful
`call` method, but otherwise remains blissfully ignorant of their behaviors.

We have designed Newman using a similar
strategy, and the general idea can be understood by tracing the execution of
the `Newman::Server#tick` method:

```ruby
module Newman
  class Server
    # NOTE: the mailer, apps, logger, and settings dependencies
    # are initialized when a Server instance is instantiated

    def tick
      mailer.messages.each do |request|
        response = mailer.new_message(:to   => request.from,
                                      :from => settings.service.default_sender)

        process_request(request, response) && response.deliver
      end

      # ... error handling code omitted
    end


    def process_request(request, response)
      apps.each do |app|
        app.call(:request  => request,
                 :response => response,
                 :settings => settings,
                 :logger   => logger)
      end

      return true

      # ... error handling code omitted
    end
  end
end
```

Did you figure out how it works? Let's walk through the process step by step to
confirm:

1. The `tick` method walks over each incoming message currently queued by the
`mailer` object (i.e., the `request`).

2. A `response` message is constructed and addressed to the sender of
the `request`.

3. The `process_request` method is called, which iterates over a
collection, executing the `call` method on each element and passing along
several dependencies that can be used to finish building a meaningful
`response` message.

4. Once `process_request` completes successfully, the response is delivered.

Because `Newman::Server` has a notification-based relationship with its
`apps` collection, it does not know or care about the structure of those
objects. In fact, the contract is so simple that a trivial `Proc` object 
can serve as a fully functioning Newman application:

```ruby
Greeter = ->(params) { |params| params[:response].subject = "Hello World!" }

server.apps = [Greeter]
server.tick
```

If we wanted to make things a bit more interesting, we could add request
and response logging into the mix, using Newman's built-in features:

```ruby
Greeter = ->(params) { |params| params[:response].subject = "Hello World!" }

server.apps = [Newman::RequestLogger, Greeter, Newman::ResponseLogger]
server.tick
```

These objects make use of a mixin that simplifies email logging, but as you can
see from the following code, they have no knowledge of the `Newman::Server`
object and rely entirely on the parameters being passed into their `#call`
method:

```ruby
module Newman
  class << (RequestLogger = Object.new)
    include EmailLogger

    def call(params)
      log_email(params[:logger], "REQUEST", params[:request]) 
    end
  end

  class << (ResponseLogger = Object.new)
    include EmailLogger

    def call(params)
      log_email(params[:logger], "RESPONSE", params[:response])
    end
  end
end
```

Taken together, these four objects combined form a cohesive workflow:

1. The server receives incoming emails and passes them on to its `apps` for
processing, along with a placeholder `response` object.

2. The request logger inspects the incoming email and records debugging 
information.

3. The greeter sets the subject of the outgoing response to "Hello World".

4. The response logger inspects the outgoing email and records debugging
information.

5. The server sends the response email.

The remarkable thing is not this semimundane process, but that the
objects involved know virtually nothing about their collaborators and are unaware of their position in the sequence of events. Context-independence
(*GOOS, pg. 54*) is a powerful thing, because it allows each object to be reasoned
about, tested, and developed in isolation.

The implications of notification-based modeling extend far beyond
context independence, but it wouldn't be easy to summarize them in 
a few short sentences. Fortunately, this topic has been covered 
extensively in other Practicing Ruby articles, particularly in 
[Issue 4.11][pr-4.11] and [Issue 5.2][pr-5.2]. Be sure to
read those articles if you haven't already; they are among the finest in our
collection.

## Adjustments

> Peers that adjust the object’s behavior to the wider needs of the system. This
includes policy objects that make decisions on the object’s behalf . . . and
component parts of the object if it’s a composite. (GOOS, pg. 52)

Adjustment relationships are hard to summarize, because they can exist in so 
many forms. But regardless of the form they take on, adjustments tend to be
used to bridge the gap between different levels of abstraction. Some are used
to raise the level of abstraction by wrapping a specific object in a more
generic interface, and others are designed to do the opposite. 

For an example of climbing down the ladder of abstraction, consider 
`Newman::EmailLogger`. It is implemented as a module in Newman for convenience,
but it could easily be reimagined as a stateless peer object of `RequestLogger`
and `ResponseLogger`. Such a redesign would yield something similar to the
following code:

```ruby
module Newman
  class << (EmailLogger = Object.new)
    def log_email(logger, prefix, email)
      logger.debug(prefix) { "\n#{email}" }
      logger.info(prefix) { email_summary(email) }
    end

    private

    def email_summary(email)
      { :from     => email.from,
        :to       => email.to,
        :bcc      => email.bcc,
        :subject  => email.subject,
        :reply_to => email.reply_to }
    end    
  end

  RequestLogger = ->(params) {  
    EmailLogger.log_email(params[:logger], "REQUEST", params[:request])
  }

  ResponseLogger = ->(params) {
    EmailLogger.log_email(params[:logger], "RESPONSE", params[:response])
  }
end
```

Though this is a subtle change, it lifts up and centralizes the concept of
"email logging" into a single object, rather than mixing helper methods into
various objects that need that functionality. This adjustment helps define the borders
between distinct concepts within the code and establishes `EmailLogger` as an
adjustment to the much more general `Logger` object it depends upon.

The philosophical distinction between these two objects is what matters here. 
A `Logger` has very abstract responsibilities: it must record arbitrary strings 
at various levels of severity and then format and output them to various 
streams. `EmailLogger`, on the other hand, is extremely concrete in its
responsibilities: it uses a `Logger` to report debugging information about
an email message. The details of how the actual logging happens are hidden from
`EmailLogger`'s clients, making it easier to treat as a black box.

Simple designs can also emerge from climbing the ladder of abstraction, 
that is, moving from a very specific context to a much more general one. For
example, it might not be a bad idea to introduce an object into Newman 
that encapsulates the concept of an email message but leaves the exact
delivery mechanism up to the individual adapters:

```ruby
# this code would be in an adapter or application code
message = Newman::Message.new { |params| Mail::Message.new(params).deliver }

# elsewhere, no knowledge of the dependency on the mail gem would be necessary:
message.to      = "test@test.com"
message.from    = "gregory@practicingruby.com"
message.subject = "You have won twenty bazillion dollars!"
message.body    = "Please send us a hair sample to confirm your ID"

message.deliver
```

This kind of object is trivial to implement because it is nothing more
than a value object with a simple callback mechanism bolted on top of it:

```ruby
module Newman 
  class Message
    def initialize(&delivery_callback)
      self.delivery_callback = delivery_callback
    end

    attr_accessor :to, :from, :subject, :body

    def deliver
      raise NotImplementedError unless delivery_callback

      delivery_callback.(:to      => to,      :from => from, 
                         :subject => subject, :body => body)
    end

    private

    attr_accessor :delivery_callback
  end
end
```

Despite its simplicity, this object provides a useful benefit: it explicitly
separates the protocol of message delivery from its implementation. If all
mail adapters for Newman were expected to return only `Newman::Message` objects, 
then any message-processing code within Newman (either in the server or in
application code) would have a well-defined interface to work against. Although
this requirement would make adapters slightly more cumbersome to write, it would
completely eliminate the hidden dependency issue discussed earlier.

Regardless of which direction they are pointed in, adjustment relationships are
very closely related to the concept of object composition in general. With that
in mind, the authors of GOOS have a useful rule to consider when designing 
composite objects:

> The API of a composite object should not be more complicated than that of any
> of its components. (GOOS, pg. 54)

Notice that in both the `Newman::EmailLogger` example and the `Newman::Message`
object, the result of composition is that a more complex system is being wrapped
by something with fewer methods and concepts to worry about. When applied
repeatedly, this kind of design causes software to become more simple as it
grows.

## Reflections

The benefit I have gained from being able to explicitly label various 
object relationships as dependencies, notifications, and adjustments is that
it forces me to think about my code in a more fine-grained way. Each
kind of object relationship comes with benefits and costs that are easier to
reason about when you recognize them for what they are.

As with most ideas from [Growing Object-Oriented Software, Guided by Tests][GOOS],
I have not yet had a chance to apply this particular set of heuristics
frequently enough to know the full extent of their usefulness. However, it never
hurts to have specific words to describe ideas that previously were hard for me
to express without relying heavily on intuition.

I would love to hear from you if you can think of ways to connect these ideas
back to your own projects or to the open source projects you've worked with. If
you have an interesting story to share, please leave a comment!

[GOOS]: http://www.growing-object-oriented-software.com/
[rack]: http://rack.github.com/
[pr-4.11]: https://practicingruby.com/articles/64
[pr-5.2]: https://practicingruby.com/articles/71
[ports-and-adapters]: http://alistair.cockburn.us/Hexagonal+architecture
[newman-mailer]: http://elm-city-craftworks.github.com/newman/lib/newman/mailer.html
[newman-testmailer]: http://elm-city-craftworks.github.com/newman/lib/newman/test_mailer.html
[duck typing]: http://en.wikipedia.org/wiki/Duck_typing
[newman]: https://github.com/elm-city-craftworks/newman
[subelsky]: http://www.subelsky.com/
[dna-draft]: http://www.subelsky.com/2012/11/ruby-dependencies-notifications-and.html


================================================
FILE: articles/v5/008-clojure.md
================================================
An interesting thing about learning new programming languages is that it takes
much less time to learn how to read programs than it does to write them. While
building non-trivial software in a language you are not familiar with can take weeks
or months of dedicated practice, the same software could be read and understood
in a fraction of that time. 

Because programming languages are much more similar to the formal language of 
mathematics than they are to natural languages, people from diverse backgrounds 
can communicate complex ideas with a surprising lack of friction. Unfortunately,
we often forget this point because we are overwhelmed by the memories of how
hard it is to *write* elegant code in a new language. This tendency puts us at a
tremendous disadvantage, because it causes us to artifically limit our access to
valuable learning resources.

In this article, I will walk you through an example of how I was plagued by this
very fear, how I overcame it, and how that lead me to learn a lot 
about [Clojure][clojure] in a very short period of time. My hope is that by following 
in my footsteps, you'll be able to learn the technique I used and possibly apply
it to your own studies.

## How I finally learned about Ant Colony Optimization

For a few weeks before this article was published, I was busy
researching  [swarm intelligence][swarm]. I have always been fascinated by 
how nature-inspired algorithms can be used to solve surprisingly complex 
computing problems, and I decided that I wanted to try implementing some 
of them myself. I started off by implementing the [Boids algorithm][boids], 
and was surprised at how quickly I was able to get something vaguely 
resembling a flock of birds to appear on my screen. Motivated by that small
win, I decided to try my hand at simulating an [Ant Colony][aco].

On the surface, ant behavior is deceptively simple, even intuitive. At least,
that is what the description provided by Wikipedia would have you believe:

1. An ant (called "blitz") runs more or less at random around the colony;
2. If it discovers a food source, it returns more or less directly to the nest, leaving in its path a trail of pheromone;
3. These pheromones are attractive; nearby ants will be inclined to follow, more or less directly, the track;
4. Returning to the colony, these ants will strengthen the route;
5. If there are two routes to reach the same food source then, in a given amount of time, the shorter one will be traveled by more ants than the long route;
6. The short route will be increasingly enhanced, and therefore become more attractive;
7. The long route will eventually disappear because pheromones are volatile;
8. Eventually, all the ants have determined and therefore "chosen" the shortest route.

Unfortunately, it is hard to find resources that precisely describe the rules
that govern each of these behaviors, and those that do exist are highly abstract
and mathematical. While I'm not one to shy away from theoretical papers, I
usually like to approach them once I understand a concept fairly well in
practice. For this particular problem, I was unable to find the materials 
that would get to that point, and it felt like I was hitting a brick wall.

Although I found tons of examples of applying a generalized form of ant colony 
optimization to the traveling salesman problem, I wanted to start with a more 
direct simulation of the natural behavior. After digging around for a bit, I
found [Rich Hickey's ant simulator][sim], which is implemented 
in [Clojure][clojure]. Check out the video below to see what it looks like in action:

<div align="center">
<iframe width="720" height="480"
src="//www.youtube.com/embed/shm7QcJMvig?rel=0" frameborder="0" allowfullscreen></iframe>
</div>

I knew right away that this was exactly the kind of simulation I wanted to
build, but it honestly didn't even cross my mind to attempt to read the Clojure
code and port it to Ruby. One quick glance at its [source code][sim] reminded me
just how much I wanted to learn a Lisp dialect some day, but it definitely
wasn't going to be today! I didn't have time to go dust off the books on my
shelf that I never read, or to watch the [2.5 hour long video][hickey] of the
talk that this code came from. 

So instead of doing all that, I set off to build my own implementation from
scratch by cobbling together the bits of information I had collected
into something that sort of worked. Using the general description of ant
behavior as my guide, I left it up to my imagination to fill in the details, and
within an hour or so I had built something that had ants moving around the
screen. Unfortunately, my little family of ants seemed to have come from a
failed evolutionary branch, because they didn't do what they were supposed to
do! They'd wander around randomly, get stuck, choose the wrong food sources, and
generally misbehave in all sorts of painful ways. Thinking that I needed a
break, I stepped away from the project for a day so that I could come back to it
with a fresh perspective.

The next day, I did end up getting something vaguely resembling an ant colony to
appear on my screen. The behavior was not perfect, but it illustrated the main 
idea of the algorithm:

<div align="center">
<iframe width="720" height="480"
src="//www.youtube.com/embed/p_XmuRHs57g?rel=0" frameborder="0" allowfullscreen></iframe>
</div>

It was fairly easy to get to this point, but then it became extremely hard to 
improve upon the simulator. Ant colony optimization has a lot of variables to it
(i.e. things like the size of the world, number of ants, number of food sources,
pheremone decay rate, amount of pheremone dropped per iteration, etc). Changing
any one of these things can influence the effectiveness of the others. When you
combine this variability with an implementation where the actual behaviors were 
half-baked and possibly buggy, you end up with a big mess that is hard to debug,
and even harder to understand. Knowing that my code was in really bad shape,
I was ready to give up.

Although it took a lot of rumination to get me there, the lightbulb eventually
turned on: maybe reading the Clojure implementation wasn't such a bad idea after
all! I had initially thought that learning the algorithm would be much easier
than learning the semantics and syntax of a new language, but two days of hard
work and mediocre results lead me to re-evaluate that assumption. At the very
least, I could spend an afternoon with the Clojure code. Even if the ants still frightened
and confused me in the end, I'd at least learn how to read some code from a language 
that I had always wanted to study anyway. 

I think you can guess what happened next: within a couple of hours, I not only
fully understood Rich Hickey's implementation, but I had also learned dozens
upon dozens of Clojure features, including a few that have no direct analogue in
Ruby. While it may have been a result of frustration 
driven development, I was genuinely surprised at what a great way this was to
learn a new language while also studying a programming problem that I was
interested in.

Throughout the rest of this article, I will attempt to demonstrate that given
the right example, even a few dozen lines of code can teach you a tremendous
amount of useful things about a language that you've never worked with before.
If you are new to Clojure programming, you'll be able to follow along
and experience the same benefits that I did; if you already know the
language, you can use this as an exercise in developing a beginner's mindset. In
either case, I think you'll be surprised at how much we can extract
from such a small chunk of code.

To keep things simple, we won't bother to read the complex bits of code that 
implements ant behavior. Instead, we'll start from the bottom up and
take a look at how this simulation models its world and the ants within it.
Although this won't help us understand how things get set into action, it will
give us plenty of opportunities to learn some interesting Clojure
features.

## Modeling the world

The following code is responsible for creating a blank slate world with 80x80
dimensions:

```clojure
(defstruct cell :food :pher) ;may also have :ant and :home

;dimensions of square world
(def dim 80)

;world is a 2d vector of refs to cells
(def world 
   (apply vector 
     (map (fn [_] 
            (apply vector (map (fn [_] (ref (struct cell 0 0))) 
                               (range dim)))) 
          (range dim))))

(defn place [[x y]]
  (-> world (nth x) (nth y)))
```

Even if this is the first time you've ever seen a Clojure program, you could take an
educated guess at what is going on in at least a few of these lines of code:

```clojure
(defstruct cell :food :pher)      ; this defines a Struct-like thing

(def dim 80)                      ; this defines a named value, setting dim=80

(defn place [[x y]]
  (-> world (nth x) (nth y)))     ; this looks like an accessor into a
                                  ; two-dimensional grid
```

The code in the `world` definition is much more complicated, but it 
has a helpful comment that describes what it is: a 2D vector of 
refs to cells. This hint gives us some useful keywords to search for 
in [Clojure's API docs][clojure-doc]. With a bit of effort, it is possible
to use this code sample and Clojure's documentation to learn all the 
following things about the language:

1. The [Map][Map] collection is Clojure's equivalent to Ruby's `Hash` object.
1. The [StructMap][StructMap] collection is a `Map` with some predefined keys
that cannot be removed. They can be defined using `defstruct`, and are instantiated
via `struct`.
1. The `(def ...)` construct is a [special form][def] that defines global variables 
within a namespace, but it is considered bad style to treat these variables as
if they were mutable.
1. The `(defn ...)` construct is a macro which among other things provides
syntactic sugar for defining functions with named parameters.
1. The [Vector][Vector] collection has core functionality which is similar to
Ruby's `Array` object. Vectors can be instantiated using the `vector` function or
via the `[]` literal syntax,  and their elements are accessed using 
the `nth` function.
1. All collections in Clojure implement a [Sequence][Sequence]
interface that is similar to Ruby's `Enumerable` module. It provides various
functions that Ruby programmers are already familiar with, such as `map`, `reduce`, 
`sort` But because most of these functions return lazy sequences, they
behave slightly differently than their Ruby counterparts.
1. The [Ref][Ref] construct is a transactional reference, which is one of
Clojure's concurrency primitives. In a nutshell, wrapping state in a `Ref`
makes it so that state can only be modified from within a transaction, ensuring
thread safety.
1. Among other things, the `range` function provides behavior similar to the enumerator 
form of Ruby's `Integer#times` method. 
1. The `apply` function provides functionality similar to Ruby's splat operator
(`*`), passing the elements of a sequence as arguments to a function.
1. The [-> macro][->] provides syntactic sugar for function composition, which
can make chaining function calls easier.

Based on this laundry list of concepts to learn, it is easy to see from this
example alone that much like Ruby, Clojure is a very rich language that is
capable of concisely expressing very complex ideas. With that in mind, it is
helpful to use Clojure's REPL to experiment while learning, much as we'd do
with `irb` in Ruby. Once again using the code sample as a guide, an 
exploration such as the one that follows can go a long way towards 
verifying our understanding of what we learned from the documentation:

```clojure
user=> (defstruct cell :food :pher)
; #'user/cell
user=> (struct cell 1 4)
; {:food 1, :pher 4}
user=> [ [ :a :b :c ] [ :d :e :f ] ]
; [[:a :b :c] [:d :e :f]]
user=> (def data [[:a :b :c] [:d :e :f]])
; #'user/data
user=> (nth (nth data 1) 2)
; :f
user=> (nth (nth data 2) 1)
; IndexOutOfBoundsException   clojure.lang.PersistentVector.arrayFor 
; (PersistentVector.java:106)
user=> (nth (nth data 0) 1)
; :b
user=> (-> data (nth 1) (nth 2))
; :f
user=> (map (fn [x] (* x 2)) [1 2 3])
; (2 4 6)
user=> (vector (map (fn [x] (* x 2)) [1 2 3]))
; [(2 4 6)]
user=> (apply vector (map (fn [x] (* x 2)) [1 2 3]))
; [2 4 6]
user=> (range 5)
; (0 1 2 3 4)
user=> (apply vector (map (fn [x] (struct cell 0 0)) (range 5)))
; [{:food 0, :pher 0} {:food 0, :pher 0} {:food 0, :pher 0} 
; {:food 0, :pher 0} {:food 0, :pher 0}]
```

Knowing what we now know, it is possible to imagine a loose translation of the
original Clojure code sample into Ruby, if we account for a few cavaets:

1. Most `Enumerable` methods return `Array` objects, which are not lazily
evaluated. Some support for lazy sequences exist in Ruby 2.0, but we'll
not bother with that in our translation because it'd only create more
work for us.

2. We don't have a direct analogy to Clojure's `Ref` construct, but we can
pretend that we do for the purposes of this example.

3. We don't have anything baked into the language which implements a `Hash` with
some required keys and some optional ones. But such behavior could be
emulated by building a custom `Cell` object. 

4. We don't have destructuring in the parameter lists for our
functions, so we need to handle destructuring manually within the bodies
of our methods rather than their signatures.

Keeping these points in mind, here's a semi-literal translation of Clojure code
to Ruby:

```ruby
DIM    = 80                                          
WORLD  = DIM.times.map do                                 # 1
           DIM.times.map { Ref.new(Cell.new(0, 0)) }      # 2,3
         end

def place(pos)       
  x, y = pos                                              # 4
  WORLD[x][y]
end
```

While the two languages cannot be categorically compared by such a coarse
exercise in syntactic gymnastics, it does help the similarities and 
differences between the languages stand out a bit more. This allows us to reuse
the knowledge we already have, and also exposes the gaps in our 
understanding that need to be filled in.

> **SIDE QUEST:** The remaining two sections in this article will repeat this
same basic process on two more small chunks of code from the ant simulator. If you have some
free time and an interest in learning Clojure, you may want to start
with the initial code samples in each section and try to figure them out on
your own, and *then* come back to read my notes. If you decide to
try this out, please share a comment with what you've learned.

Now that we've tackled one concrete feature from this program, it will be much
easier to understand the rest. There's a lot left to learn, so let's keep
moving!

## Modeling an ant

The following code is responsible for initializing an ant at a 
given location within the world:

```clojure
(defstruct ant :dir) ;may also have :food

(defn create-ant 
  "create an ant at the location, returning an ant agent on the location"
  [loc dir]
    (dosync
      (let [p (place loc)
            a (struct ant dir)]
        (alter p assoc :ant a)
        (agent loc))))
```

Because we already have a rudimentary understanding of how `StructMap` works,
and how to define functions, we can skip over some of the boilerplate
and get right to the good stuff:

```clojure
(dosync                               ; 1
  (let [p (place loc)                 ; 2
        a (struct ant dir)]
    (alter p assoc :ant a)            ; 3
    (agent loc)))                     ; 4
```

Digging back into Clojure's API docs, we can learn four new things 
from this code sample:

1. The [dosync][dosync] macro starts a transaction,
which among other things, makes it possible to modify `Ref`
structures in a thread-safe way.

1. The [let][let] macro allows you to make use of named values within
a lexical scope. This construct appears to be roughly similar to the 
concept of block-local variables in Ruby.

1. The [alter][alter] function is used for modifying the contents of a `Ref`
structure, and can only be called within a transaction.

1. The [Agent][Agent] construct is another one of Clojure's concurrency
primitives. This structure provides an interesting state-centric alternative
to the actor model of concurrency: rather than encapsulating behavior that acts
upon external state, agents encapsulate state which is *acted upon* by external
behaviors.

Of course, in order to verify that we understand what the documentation is
telling us, nothing beats a bit of casual experimentation in the REPL:

```clojure
user=> (let [x 10 y 20] (+ x y))
; 30
user=> (let [x 10] (let [y 20] (+ x y)))
; 30
user=> (let [x 10] (let [y 20]) y)
; CompilerException java.lang.RuntimeException: Unable to resolve 
; symbol: y in this context, compiling:(NO_SOURCE_PATH:3) 
user=> (def foo (ref { :x 1 :y 1}) )
; #'user/foo
user=> foo
; #<Ref@6762ba99: {:y 1, :x 1}>
user=> (assoc foo :z 2)
; ClassCastException clojure.lang.Ref cannot be cast to clojure.lang.Associative  
; clojure.lang.RT.assoc (RT.java:691)
user=> (assoc @foo :z 2)
; {:z 2, :y 1, :x 1}
user=> @foo
; {:y 1, :x 1}
user=> (alter foo assoc :z 2)
; IllegalStateException No transaction running  
; clojure.lang.LockingTransaction.getEx (LockingTransaction.java:208)
user=> (dosync (alter foo assoc :z 2))
; {:z 2, :y 1, :x 1}
user=> (def bar (agent [1 2 3]))
; #'user/bar
user=> bar
; #<Agent@3445378f: [1 2 3]>
user=> @bar
; [1 2 3]
user=> (send bar reverse)
; #<Agent@3445378f: [1 2 3]>
user=> bar
; #<Agent@3445378f: (3 2 1)>
user=> @bar
; (3 2 1)
user=> (reverse @bar)
; (1 2 3)
user=> @bar
; (3 2 1)
```

The ant creation code sample consists mostly of features that don't exist in
Ruby, so a direct translation isn't possible. However, it doesn't hurt to
imagine what the syntax for these features might look like in Ruby if we did
have Clojure's concurrency primitives:

```ruby
  def create_ant(loc, dir)
    Ref.transaction do 
      p = place(loc)
      a = Ant.new(dir)
    
      p.ant = a

      Agent.new(loc)
    end
  end
```

Assuming that Clojure's semantics were maintained, either all mutations that
happen within the `Ref.transaction` block would be applied, or none of them
would be. Furthermore, thread-safety would be handled for us ensuring state
consistency for the duration of the block. Language-level transactions seem like
seriously powerful stuff, and it will be interesting to see if Ruby ends up
adopting them in the future.

## Populating the world

The following code populates the initial state of the world with ants and food:

```clojure
;number of ants = nants-sqrt^2
(def nants-sqrt 7)
;number of places with food
(def food-places 35)
;range of amount of food at a place
(def food-range 100)

(def home-off (/ dim 4))
(def home-range (range home-off (+ nants-sqrt home-off)))

(defn setup 
  "places initial food and ants, returns seq of ant agents"
  []
  (dosync
    (dotimes [i food-places]
      (let [p (place [(rand-int dim) (rand-int dim)])]
        (alter p assoc :food (rand-int food-range))))
    (doall
     (for [x home-range y home-range]
       (do
         (alter (place [x y]) 
                assoc :home true)
         (create-ant [x y] (rand-int 8)))))))
```

As in the ant initialization code, this snippet includes a mixture of new
concepts and old ones. If we focus on the body of the `setup` definition, there
are five new things for us to learn:

```clojure
(dosync
    (dotimes [i food-places]                             ;1
      (let [p (place [(rand-int dim) (rand-int dim)])]   ;2
        (alter p assoc :food (rand-int food-range))))
    (doall                                               ;3
     (for [x home-range y home-range]                    ;4
       (do                                               ;5
         (alter (place [x y]) 
                assoc :home true)
         (create-ant [x y] (rand-int 8))))))
```

1. The [dotimes][dotimes] macro is a simple iterator that is comparable to the
block form of `Integer#times` in Ruby. 

1. The [rand-int][rand-int] function returns a random integer between 0 and
a given number, which is similar to calling Ruby's `Kernel#rand` with an 
integer argument.

1. The [doall][doall] macro is used to force a lazy sequence to be fully
evaluated.

1. The [for][for] macro implements list comprehensions, which are a very
powerful form of iterator that does not have a direct analogue in Ruby.

1. The [do][do] special form executes a series of expressions in sequence and
returns the result of the last expression. This is roughly equivalent to Ruby's
`do...end` block syntax.

One last trip back to the REPL is needed to confirm that once again, the
documentation is not lying, and we have not misunderstood its explanations:

```clojure
user=> (dotimes [i 5] (println i))
; 0
; 1
; 2
; 3
; 4
; nil
user=> (rand-int 10)
; 3
user=> (rand-int 10)
; 6
user=> (rand-int 10)
; 6
user=> (rand-int 10)
; 2
user=> (for [x (range 5) y (range 5)] [x y])
; ([0 0] [0 1] [0 2] [0 3] [0 4] [1 0] [1 1] [1 2] [1 3] [1 4] 
; [2 0] [2 1] [2 2] [2 3] [2 4] [3 0] [3 1] [3 2] [3 3] [3 4] 
; [4 0] [4 1] [4 2] [4 3] [4 4])
user=> (for [x (range 5) y (range 5)] (+ x y))
; (0 1 2 3 4 1 2 3 4 5 2 3 4 5 6 3 4 5 6 7 4 5 6 7 8)
user=> (do (print "hello world\n") (+ 1 1))
; hello world
; 2
user=> (realized? (for [x (range 5) y (range 5)] [x y])) 
; false
user=> (realized? (doall (for [x (range 5) y (range 5)] [x y])))
; true
```

Because many of the Clojure features used for populating the simulation's 
world either already exist in Ruby or are irrelevant due to implementation
differences, this code sample translates fairly well. Apart from the fact
that the `Ref` construct in this example is imaginary, the only 
noticeable thing that is lost in translation is the conciseness
of Clojure's list comprehensions. But in this particular use case,
`Array#product` gets us part of the way there:

```ruby
NANTS_SQRT  = 7
FOOD_PLACES = 35
FOOD_RANGE  = 100

HOME_OFF   = DIM / 4
HOME_RANGE = (HOME_OFF..NANTS_SQRT + HOME_OFF)

def setup
  Ref.transaction do
    FOOD_PLACES.times do
      p      = place([rand(DIM), rand(DIM])
      p.food = rand(FOOD_RANGE)
    end

    HOME_RANGE.to_a.product(HOME_RANGE.to_a).map do |x,y|
      place([x,y]).home = true
    
      create_ant([c, y], rand(8))
    end
  end
end
```

At this point, you should now completely understand the structure of the initial
state of the world in [Rich Hickey's ant simulator][sim], and if you're new to
Clojure, you probably know a lot more about the language than you did when you
started reading. If you have enjoyed the journey so far, definitely consider
reading the entire program; this article only covers tip of the iceberg! 

## Reflections

[XKCD] sums up how I feel about this exercise much better than I could on my own:

[![](http://imgs.xkcd.com/comics/lisp_cycles.png)](http://xkcd.com/297/)

That said, I'm sure that more than a few people would be happy to tell you that 
many of the pragmatic compromises that Clojure has made are blasphemic in some
way. Truth be told, I don't know nearly enough about functional languages to
weigh in on any of those claims.

The real takeaway for me was that by stepping outside of my comfort zone for
even a few hours, I was able to look back at Ruby with a fresh perspective. I
was also able to gain an understanding of a programming problem that I couldn't
find a good Ruby example for. Both of these things were a huge win for me. I
hope that you will find a way to try this exercise out on one of your own 
problems, and I look forward to hearing what you think of it.

Learning to read code in a language you are not familiar with takes practice,
but it is easier than it seems. If you step outside the
bubble from time to time, only good things will come of it.

> **NOTE**: You may want to try out [4Clojure][4Clojure] if you want to hone
> your Clojure skills at a more gradual pace than what we attempted in this
> article. It's a quiz site similar to [RubyKoans].
 
[swarm]:       http://en.wikipedia.org/wiki/Swarm_intelligence
[boids]:       http://en.wikipedia.org/wiki/Boids
[aco]:         http://en.wikipedia.org/wiki/Ant_colony_optimization
[sim]:         https://gist.github.com/1093917
[clojure]:     http://clojure.org/
[clojure-doc]: http://clojure.org/documentation
[hickey]:      http://blip.tv/clojure/clojure-concurrency-819147
[xkcd]:        http://xkcd.com
[4Clojure]:    http://www.4clojure.com
[RubyKoans]:   http://rubykoans.com

[def]:     http://clojure.org/special_forms#Special%20Forms--%28def%20symbol%20init?%29
[Map]: http://clojure.org/data_structures#Data%20Structures-Maps%20%28IPersistentMap%29
[StructMap]: http://clojure.org/data_structures#Data%20Structures-StructMaps
[Vector]: http://clojure.org/data_structures#Data%20Structures-Vectors%20%28IPersistentVector%29
[Sequence]: http://clojure.org/sequences
[Ref]: http://clojure.org/refs
[->]: http://blog.fogus.me/2009/09/04/understanding-the-clojure-macro/
[dosync]: http://clojure.github.com/clojure/clojure.core-api.html#clojure.core/dosync
[let]: http://clojure.github.com/clojure/clojure.core-api.html#clojure.core/let
[alter]: http://clojure.github.com/clojure/clojure.core-api.html#clojure.core/alter
[agent]: http://clojure.org/agents
[dotimes]: http://clojure.github.com/clojure/clojure.core-api.html#clojure.core/dotimes
[for]: http://clojure.github.com/clojure/clojure.core-api.html#clojure.core/for
[doall]: http://clojure.github.com/clojure/clojure.core-api.html#clojure.core/doall
[rand-int]: http://clojure.github.com/clojure/clojure.core-api.html#clojure.core/rand-int
[do]: http://clojure.org/special_forms#Special%20Forms--%28do%20exprs*%29


================================================
FILE: articles/v5/009-ants.md
================================================
*This article is based on a [heavily modified Ruby port][rubyantsim] 
of Rich Hickey's [Clojure ant simulator][hickey]. Although I didn't directly collaborate with Rich on this issue of 
Practicing Ruby, I learned a lot from his code and it provided
me with a great foundation to start from.*

Watch as a small ant colony identifies and completely consumes its four nearest
food sources:

<div align="center">
<iframe width="720" height="480"
src="//www.youtube.com/embed/f2IX1Y5o6pc?rel=0" frameborder="0" allowfullscreen></iframe>
</div>

While this search effort may seem highly organized, it is the
result of very simple decisions made by individual ants. On each
tick of the simulation, each ant decides its next action based only on its
current location and the three adjacent locations ahead of it. But 
because ants can indirectly communicate via their environment, complex 
behavior arises in the aggregate.

Emergence and self-organization are popular concepts in programming, but far too many
developers start and end their explorations into these ideas with [Conway's Game of Life][conway]. 
In this article, I will help you see these fascinating properties in a new
light by demonstrating the role they play in [ant colony optimization (ACO)][aco] algorithms.

> **NOTE:** There are many ways to simulate ant behavior, some of which can be quite useful
for a wide range of search applications. For this article, I have built
a fairly naïve simulation that is meant to loosely mimic the kind of ant
behavior you can observe in the natural world. This article *may* be useful as a 
brief introduction to ACO, but be sure to dig deeper if you are interested in
practical applications. My goal is to provide a great example of emergent 
behavior, NOT a great reference for nature-inspired search algorithms.

## Modeling the state of an ant colony

This simulated world consists of many cells: some are food sources, 
some are part of the colony's nest, and the rest are an
open field that needs to be traversed. Each cell can contain a single 
ant facing in one of the eight directions you'd find on a compass. 
As the ants move around the world, they mark the cells they visit with
a trail of pheromones that helps them find their way between their 
nest and nearby food sources. Pheromones accumulate as more ants 
travel across a given trail, but they also gradually evaporate. 
The combination of these two properties of pheromones helps 
ants find efficient paths to nearby food sources.

Subtle changes to any of these rules can yield very different outcomes, 
and finding an optimal result will necessarily involve some
experimentation. Knowing that, it makes sense for the simulator to 
have a data model that is divorced from its domain logic. Many
behavioral changes can be made without altering the
underlying data model, and that allows the `Ant`, `Cell`, and `World` constructs to
be defined as simple value objects as shown below:

```ruby
module AntSim 
  class Ant
    def initialize(direction, location)
      self.direction = direction
      self.location  = location
    end

    attr_accessor :food, :direction, :location
  end

  class Cell
    def initialize(food, home_pheremone, food_pheremone)
      self.food           = food 
      self.home_pheremone = home_pheremone
      self.food_pheremone = food_pheremone
    end

    attr_accessor :food, :home_pheremone, :food_pheremone, :ant, :home
  end

  class World
    def initialize(world_size)
      self.size = world_size
      self.data = size.times.map { size.times.map { Cell.new(0,0,0) } }
    end

    def [](location)
      x,y = location

      data[x][y]
    end

    def sample
      data[rand(size)][rand(size)]
    end

    def each
      data.each_with_index do |col,x| 
        col.each_with_index do |cell, y| 
          yield [cell, [x, y]]
        end
      end
    end

    private

    attr_accessor :data, :size
  end
end
```

These classes are somewhat peculiar in that they are very state-centric and 
do not encapsulate any interesting domain logic. Although it won't win us
object-oriented style points, designing things this way decouples the state of 
the simulated world from both the events that happen within it and the 
optimization algorithms that run against it. These objects
represent only the nouns of our system, leaving it up to their collaborators 
to supply the verbs.

## Moving around the world

The ants in this system are surprisingly limited in their behavior. On each 
and every iteration, their entire decision making process can result 
in exactly one of the following outcomes:

![Ant movement rules](http://i.imgur.com/VsBkn.png)

Most of these actions are extremely localized. Turning does not affect any
cells, while moving only affects the cell the ant currently occupies
and the one immediately in front of it. However, taking or dropping food
triggers a pheromone update, affecting every cell the ant has 
visited since the last time it updated its trails. This can have far-reaching
effects on the behavior of the rest of the colony, even though each individual
ant can only sense the pheromone levels of its own cell and the three cells
directly in front of it. While natural ants must drop pheromone
continuously as they walk, artificial ants can improve upon nature by
updating entire paths instantaneously.

An object that implements these behaviors needs to know about the structure of
the `Ant`, `Cell`, and `World` objects, but it still does not
need to know much about the core domain logic of the simulator. What we want is
an `Actor` that understands its world and how to play specific roles within it, 
but does not attempt to define the broader story arc:

```ruby
require "set"

module AntSim
  class Actor
    DIR_DELTA   = [[0, -1], [ 1, -1], [ 1, 0], [ 1,  1],
                   [0,  1], [-1,  1], [-1, 0], [-1, -1]]

    def initialize(world, ant)
      self.world   = world
      self.ant     = ant

      self.history = Set.new
    end

    attr_reader :ant

    def turn(amt)
      ant.direction = (ant.direction + amt) % 8

      self
    end

    def move
      history << here

      new_location = neighbor(ant.direction)

      ahead.ant = ant
      here.ant  = nil

      ant.location = new_location

      self
    end

    def drop_food
      here.food += 1
      ant.food   = false

      self
    end

    def take_food
      here.food -= 1
      ant.food   = true

      self
    end

    def mark_food_trail
      history.each do |old_cell|
        old_cell.food_pheremone += 1 unless old_cell.food > 0 
      end

      history.clear

      self
    end

    def mark_home_trail
      history.each do |old_cell|
        old_cell.home_pheremone += 1 unless old_cell.home
      end

      history.clear

      self
    end

    def foraging?
      !ant.food
    end

    def here
      world[ant.location]
    end

    def ahead
      world[neighbor(ant.direction)]
    end

    def ahead_left
      world[neighbor(ant.direction - 1)]
    end

    def ahead_right
      world[neighbor(ant.direction + 1)]
    end
    
    def nearby_places
      [ahead, ahead_left, ahead_right]
    end

    private

    def neighbor(direction)
      x,y = ant.location

      dx, dy = DIR_DELTA[direction % 8]

      [(x + dx) % world.size, (y + dy) % world.size]
    end

    attr_accessor :world, :history
    attr_writer   :ant
  end
end
```

Of course, now that we have crossed the line from pure data models to an object
which actually does something, it is impossible to implement meaningful behavior
without making certain assumptions that will affect the capabilities of the 
rest of the system. The `Actor` class draws two significant lines in the sand that
are easy to overlook on a quick glance:

1. Storing history data in a `Set` rather than an `Array` makes it so
that when this object updates pheromone trails, it only takes into account
what cells were visited, not how many times they were visited or in what order
they were traversed.

2. The modular arithmetic performed in the `neighbor` function treats the world
as if it were a [torus][torus], instead of a plane. This means that the
leftmost column and the rightmost column of the map are adjacent to one 
another, as are the top and bottom rows. This allows ants to easily wrap around
the edges of the map, but also establishes connections between cells that you
may not intuitively think of as being close to one another. Without a
three-dimensional visualization, it is hard to show that the top right corner of
the map and the bottom left corner are actually adjacent to one another.

Of course, the purpose of the `Actor` class is to hide these details from
the rest of the system. As long as its collaborators can operate within these 
constraints, the `Actor` object can be treated as a magic black box that knows
how to make ants move around the world and do interesting things. To see why
that is useful, check out the `Simulator#iterate` function which drives the
simulator's main event loop:

```ruby
module AntSim
  class Simulator
    # ... other functions ...

    def iterate
      actors.each do |actor|
        optimizer = Optimizer.new(actor.here, actor.nearby_places)
        
        if actor.foraging?
          action = optimizer.seek_food
        else
          action = optimizer.seek_home
        end

        case action
        when :drop_food
          actor.drop_food.mark_food_trail.turn(4)
        when :take_food
          actor.take_food.mark_home_trail.turn(4)
        when :move_forward
          actor.move
        when :turn_left
          actor.turn(-1)
        when :turn_right
          actor.turn(1)
        else
          raise NotImplementedError, action.inspect
        end
      end

      sleep ANT_SLEEP
    end
  end
end
```

Here we can see that the `Simulator` acts as a bridge that translates
the `Optimizer` object's very abstract suggestions into concrete
actions for the `Actor` to carry out. The design of the `Actor` object gives the
`Simulator` just enough control to make some small adjustments to the process,
but not so much that it needs to be bogged down with the details.

## Finding food and bringing it home

Now that we know the state of the world and how it can be manipulated, it is
time to discuss how to produce the kind of behavior that you saw in the
video at the beginning of this article. Perhaps unsurprisingly, the life of the
everyday worker ant is actually fairly mundane.

Every ant in this simulation is always either searching for food to bring back
to the nest, or trying to return home with the food it found. As soon 
an ant accomplishes one of these tasks, it immediately transitions to the other,
not bothering to take even a moment to bask in fruits of its labor. The
following outline describes what the ants in this simulation are "thinking" 
at any given point in time, assuming that they haven't managed to 
become self-aware...

**When searching for food:**

1. If the current cell has food in it and it is NOT part of the nest, 
pick up some food.

2. Otherwise, check the cell directly in front of me. If it has food in it, is
not part of the nest, and it is not occupied by another ant, move there.

3. If not, rank the three adjacent cells in front of me based
on the amount of food they contain, and how intense their `food_pheremone`
levels are. I will *usually* choose to move or turn towards the cell with
highest ranking, but I will randomly deviate from this pattern on occasion
so that I can explore some uncharted territory.

**When searching for the nest:**

1. If the current cell is part of the nest, drop the food I am carrying.

2. Otherwise, check the cell directly in front of me. If it is part of the nest,
and it is not occupied by another ant, move there.

3. If not, rank the three adjacent cells in front of me based
on whether or not they are part of the nest, and how intense their `home_pheremone`
levels are. I will *usually* choose to move or turn towards the cell with
highest ranking, but I will randomly deviate from this pattern on occasion
so that I can explore some uncharted territory.

Translating these ideas into code is very straightforward, especially
if you treat the underlying mathematical formulas as a black box:

```ruby
module AntSim
  class Optimizer
    # ...

    def seek_food
      if here.food > 0 && (! here.home)
        :take_food
      elsif ahead.food > 0 && (! ahead.home ) && (! ahead.ant )
        :move_forward
      else
        food_ranking = rank_by { |cell| cell.food }
        pher_ranking = rank_by { |cell| cell.food_pheremone }

        ranks = combined_ranks(food_ranking, pher_ranking)
        follow_trail(ranks)
      end
    end

    def seek_home
      if here.home
        :drop_food
      elsif ahead.home && (! ahead.ant)
        :move_forward
      else
        home_ranking = rank_by { |cell| cell.home ? 1 : 0 }
        pher_ranking = rank_by { |cell| cell.home_pheremone }

        ranks = combined_ranks(home_ranking, pher_ranking)
        follow_trail(ranks)
      end
    end

    def follow_trail(ranks)
      choice = wrand([ ahead.ant ? 0 : ranks[ahead],
                       ranks[ahead_left],
                       ranks[ahead_right]])

      [:move_forward, :turn_left, :turn_right][choice]
    end
    

    # ...
  end
end
```

If you understand the general idea behind this algorithm, don't worry about the
exact computations that the `Optimizer` uses unless you are
planning on researching Ant Colony Optimization in much greater detail. While I
understand what my own code is doing, I'll admit that I mostly 
cargo-cult copied the probabilistic methods 
from [Rich Hickey's simulator][hickey] while sprinkling in a few minor tweaks 
here and there. That said, if you want to see exactly how I hacked things
together, feel free to check out 
the [full Optimizer class definition][optimizer].

What I personally find much more interesting than the nuts and bolt of
*how* this algorithm works is to think about *why* it works.

## How the hive mind emerges

As we discussed in the previous section, ants are attracted to pheromone, and
that makes them more likely to follow the trails left behind by other ants than
they are to venture out on their own. However, when ants first start exploring
a new space, there are no trails to follow and so they are forced to wander
around randomly until a food source is found.

Generally speaking, ants that take a shorter path from the nest to a food
source will arrive there sooner than ants that take a longer path. If they
follow their own pheromone trail back to the nest, they will also return home
sooner than those who are traversing longer paths. By the time ants who have
taken a longer path return home, the ants on the shortest paths have already
went back out in search of additional food, which increases the pheromone levels
on their trails.

This process on its own would bias the ant colony to prefer shorter paths over
longer ones, but the optimization would be somewhat sluggish and might tend to
produce solutions that work well locally but aren't nearly as attractive
globally. To get better results, the system needs a bit of entropy thrown into
the mix.

Because the behavior of ants has a certain amount of randomness to it,
the occasional deviation from established paths are fairly common. Even if the
fluctuations are small, each tiny shortcut that allows an ant to get between two
points along a path in a shorter amount of time ultimately contributes to
finding an optimal solution. This means that even an ant who goes wildly off
course and starves to death nowhere near the nest can make a meaningful
contribution to the colony if even some tiny segment of its path serves to
shorten an existing well-worn trail.

When you add in the fact that pheromones are volatile and tend to evaporate over
time, an upper limit emerges for how much a bad path or a local optimization can
influence the colony's decision making. Evaporation is also a key part of what
allows the ants to change course when a food source is exhausted, or an obstacle
stands in the way of an established path.

Pheromone decay is something that can be modeled in many ways, but the easiest
way of simulating it is to gradually reduce the pheromone at every cell in the 
world on a regular interval. For an example of this approach, check out
`Simulator#evaporate`:

```ruby
module AntSim
  class Simulator
    def evaporate
      world.each do |cell, (x,y)| 
        cell.home_pheremone *= EVAP_RATE 
        cell.food_pheremone *= EVAP_RATE
      end
    end
  end
end
```

So if you take the basic positive feedback loop caused by pheromone attraction
and mix in a bit of probabilistic exploration and the gradual evaporation of trails, you end
up with a fairly robust optimization process. It truly is remarkable that 
these basic factors can combine to create a very
effective search heuristic, especially when you consider the fact that what
we've discussed here is only a crude approximation of the tip of the iceberg
when it comes to [Ant Colony Optimization][aco].

## Reflections

Emergent behaviors in computing problems have always fascinated me, even though I
have not spent nearly enough time studying them to understand them well. I feel
similarly about a lot of other things in life, ranging from the board game Go,
to the spread of memes throughout communities both online and offline.

There is something deep and almost spiritual in the realization that the
extremely complex behaviors can emerge from very simple systems with very few
rules, and a complete lack of central organization. It forces us to call into
question everything we experience and to wonder whether there is some elegant
explanation for it all!

[conway]: http://en.wikipedia.org/wiki/Conway%27s_Game_of_Life
[aco]: http://en.wikipedia.org/wiki/Ant_colony_optimization
[torus]: http://en.wikipedia.org/wiki/Torus
[hickey]: https://gist.github.com/1093917
[rubyantsim]: https://github.com/elm-city-craftworks/practicing-ruby-examples/tree/master/v5/009
[optimizer]: https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v5/009/lib/ant_sim/optimizer.rb


================================================
FILE: articles/v5/010-prototypes.md
================================================
*This article was written by Avdi Grimm. Avdi is a [Ruby Rogue][rogue], a
consulting pair programmer, and the head chef at [RubyTapas][tapas]. He writes
about software development at [Virtuous Code.][virtuous]*

When you think of the term *object-oriented programming*, one of the
first associated words that springs to mind is probably *classes*. For
most of its history, the OOP paradigm has been almost inextricably
linked with the idea of classes. Classes serve as *object factories*:
they hold the blueprint for new objects, and can be called upon to
manufacture as many as needed. Each object, or *instance*, has its
state, but each derives its behavior from the class. Classes, in turn,
share behavior through inheritance. In most OO programs, the class
structure is the primary organizing principle.

Even though classes have gone hand-in-hand with OOP for decades, they
aren't the only way to build families of objects with shared behavior.
The most common alternative to *class-based* programming is
*prototype-based* programming. Languages that use prototypes rather than
classes include [Self][self], [Io][io], and (most well known of all) JavaScript.

Ruby comes from the class-based school of OO language design. But it's
flexible enough that with a little cleverness, we can experiment with
prototype-style coding. In this article that's just what we'll do.

[self]: http://en.wikipedia.org/wiki/Self_(programming_language
[io]: http://en.wikipedia.org/wiki/Io_(programming_language)
[rogue]: http://rubyrogues.com/
[tapas]: http://devblog.avdi.org/rubytapas/
[virtuous]: http://devblog.avdi.org/

## Getting started

So how do we write OO programs without classes? Let's explore this
question in Ruby. We'll use the example of a text-adventure game in the
style of "[Colossal Cave
Adventure](http://en.wikipedia.org/wiki/Colossal_Cave_Adventure)". This
is one of my favorite programming examples for object-oriented systems,
since it involves modeling a virtual world of interacting objects,
including characters, items, and interconnected rooms.

We open up an interactive Ruby session, and start typing. We begin with
an `adventurer` object. This object will serve as our avatar in the
game's world, translating our commands into interactions between
objects:

```ruby
adventurer = Object.new
```

The first ability we give to our adventurer is the ability to look at
its surroundings. The `look` command will cause the adventurer to output
a description of its current location:

```ruby
class << adventurer
  attr_accessor :location

  def look
    puts location.description
  end
end
```

Then we add a starting location, called `end_of_road`, and put the
adventurer in that location:

```ruby
end_of_road = Object.new
def end_of_road.description
  <<END
You are standing at the end of a road before a small brick building.
Around you is a forest.  A small stream flows out of the building and
down a gully.
END
end

adventurer.location = end_of_road
```

Now we can tell our adventurer to take a look around:

```console
> adventurer.look

You are standing at the end of a road before a small brick building.
Around you is a forest.  A small stream flows out of the building and
down a gully.
```

## Adding some conveniences

So far we've created an adventurer and a starting room without any kind
of `Adventurer` or `Room` classes. This adventure is getting off to a
good start! Although, if we're going to be creating a lot of these
objects we'd like for the process to be a little less verbose. We decide
to take a step back and build some syntax sugar before moving onward.

We start with an `ObjectBuilder` helper class. Yes, this is a class, when
we are supposed to be using only prototypes. However, Ruby doesn't offer
a lot of support for prototype-based programming out of the box. So we
have to build our tools with the class-oriented materials at hand. This
is intended to be behind-the-scenes support code. In other words, pay no
attention to the man behind the green curtain!

```ruby
class ObjectBuilder
  def initialize(object)
    @object = object
  end

  def respond_to_missing?(missing_method, include_private=false)
    missing_method =~ /=\z/
  end

  def method_missing(missing_method, *args, &block)
    if respond_to_missing?(missing_method)
      method_name = missing_method.to_s.sub(/=\z/, '')
      value       = args.first
      ivar_name   = "@#{method_name}"
     if value.is_a?(Proc)
        define_code_method(method_name, ivar_name, value)
      else
        define_value_method(method_name, ivar_name, value)
      end
    else
      super
    end
  end

  def define_value_method(method_name, ivar_name, value)
    @object.instance_variable_set(ivar_name, value)
    @object.define_singleton_method(method_name) do
      instance_variable_get(ivar_name)
    end
  end

  def define_code_method(method_name, ivar_name, implementation)
    @object.instance_variable_set(ivar_name, implementation)
    @object.define_singleton_method(method_name) do |*args|
      instance_exec(*args, &instance_variable_get(ivar_name))
    end
  end
end
```

There's a lot going on in this class. Going over it line-by-line might
be interesting in its own right, but it wouldn't advance our
understanding of prototype-based programming all that much. Suffice to
say for now that this class can help us add new attributes and methods
to a singleton object using a concise assignment-style syntax. This will
make more sense when we start to make use of it.

We add another bit of syntax sugar: a global method named `Object` (not
to be confused with the class of the same name):

```ruby
def Object(&definition)
  obj = Object.new
  obj.singleton_class.instance_exec(ObjectBuilder.new(obj), &definition)
  obj
end
```

This method takes a block, instantiates a new object, and evaluates the
block in the context of the object's singleton class, passing an
`ObjectBuilder` as a block argument. Then it returns the new object.

Now we recreate our adventurer using this new helper:

```ruby
adventurer = Object { |o|
  o.location = end_of_road

  attr_writer :location

  o.look = ->(*args) {
    puts location.description
  }
}
```

The combination of the `Object` factory method and the `ObjectBuilder`
gives us a convenient, powerful notation for creating new ad-hoc
objects. We can create attribute reader methods and assign the value of
the attribute all at once:

```ruby
o.location = end_of_road
```

We can use standard Ruby class-level code:

```ruby
attr_writer :location
```

And finally we can define new methods by assigning a lambda to an 
attribute:

```ruby
o.look = ->(*args) { puts location.description }
```

We've deliberately avoided defining methods using `def` or
`define_method`. We'll get into the reasons for that later on.

Before we move on, let's take a moment to make sure our shiny new adventurer still works the
same as before:

```console
> adventurer.look

You are standing at the end of a road before a small brick building.
Around you is a forest.  A small stream flows out of the building and
down a gully.
```

## Moving around

It's time to let our adventurer object stretch its legs a bit.
We want to give it the ability to move from location to location. First,
we make a small modification to our `Object()` method:

```ruby
def Object(object=nil, &definition)
  obj = object || Object.new
  obj.singleton_class.instance_exec(ObjectBuilder.new(obj), &definition)
  obj
end
```

Now along with creating new objects, `Object()` can also augment an
existing object which is passed in as an argument.

We pass the `adventurer` to `Object()`, and add a new `#go` method. This
method will take a direction (like `:east`), and attempt to move to the
new location using the `exits` association on its current location:

```ruby
Object(adventurer) { |o|
  o.go = ->(direction){
    if(destination = location.exits[direction])
      self.location = destination
      puts location.description
    else
      puts "You can't go that way"
    end
  }
}
```

We add a destination room to the system:

```ruby
wellhouse = Object { |o|
  o.description = <<END
You are inside a small building, a wellhouse for a large spring.
END
}
```

Then we add an `exits` Hash to `end_of_road`, with an entry saying that
the `wellhouse` is to the `:north` of it:

```ruby
Object(end_of_road) { |o| o.exits = {north: wellhouse} }
```

With that done, we are now ready to set off on our journey!

```console
> adventurer.go(:north)

You are inside a small building, a wellhouse for a large spring.
```

## Cloning prototypes

We try to go north again, expecting to see the admonition "You can't go
that way" as we bump into the wall:

```console
> adventurer.go(:north)
```

Instead, we get an exception:

```console
-:82:in `block (2 levels) in <main>': undefined method `exits' for 
#<Object:0x0000000434d768> (NoMethodError)
        from -:56:in `instance_exec'
        from -:56:in `block (2 levels) in define_code_method'
        from -:100:in `<main>'
```

This is because we never got around to adding an `exits` Hash to
`wellhouse`. We could go ahead and do that now. But as we think about
it, we realize that now that our adventurer is capable of travel, it
would make sense if all rooms started out with an empty `exits` Hash,
instead of us having to add it manually every time.

Toward that end, we create a *prototypical room*:

```ruby
room = Object { |o| o.exits = {} }
```

We then experiment with creating a new `wellhouse`, this one based on
the `room` prototype. We do this by simply cloning the `room` object. We
use `#clone` rather than `#dup` because `#clone` copies singleton class
methods:

```ruby
new_wellhouse = room.clone

new_wellhouse.exits[:south] = end_of_road
```

We quickly uncover a problem with this naive cloning technique. Because
Ruby's `#clone` (as well as `#dup`) are *shallow copies*, `room` and
`new_wellhouse` now share the same `exits`:

```ruby
require 'pp'

puts "new_wellhouse exits:"
pp new_wellhouse.exits
puts "room exits:"
pp room.exits
```

```console
new_wellhouse exits:
{:south=>
  #<Object:0x0000000482c8d8
   @exits=
    {:north=>
      #<Object:0x0000000482bcd0
       @description=
        "You are inside a small building, a wellhouse for a large spring.\n">}>}
room exits:
{:south=>
  #<Object:0x0000000482c8d8
   @exits=
    {:north=>
      #<Object:0x0000000482bcd0
       @description=
        "You are inside a small building, a wellhouse for a large spring.\n">}>}
```

To fix this, we could possibly customize the way Ruby does cloning by overriding
the [Object#initialize_clone](http://jonathanleighton.com/articles/2011/initialize_clone-initialize_dup-and-initialize_copy-in-ruby/)
method, but that would be an invasive change with broad reaching effects.
Because extending core objects is a bit safer than modifying them, we opt to
define our own `Object#copy` method which does a one-level-deep copying of
instance variables:

```ruby
class Object
  def copy
    prototype = clone

    instance_variables.each do |ivar_name|
      prototype.instance_variable_set(
        ivar_name,
        instance_variable_get(ivar_name).clone)
    end

    prototype
  end
end
```

Then we recreate `room` and `new_wellhouse`, and confirm that they no
longer share exits:

```ruby
room = Object { |o| o.exits = {} }

# Use the newly defined Object#copy here instead of Object#clone
new_wellhouse = room.copy

new_wellhouse.exits[:south] = end_of_road

puts "new_wellhouse exits:"
pp new_wellhouse.exits
puts "room exits:"
pp room.exits
```

```console
new_wellhouse exits:
{:south=>
  #<Object:0x00000002ea85d8
   @exits=
    {:north=>
      #<Object:0x00000002ea79d0
       @description=
        "You are inside a small building, a wellhouse for a large spring.\n">}>}
room exits:
{}
```

Cloning a prototypical object in order to create new
objects is the most basic form of prototype-based programming. In fact,
the "Kevo" research language (I'd link to it, but all the information
about it seems to have fallen off the Internet) used copying as the sole
way to share behavior between objects.

## Building dynamic prototypes

There are drawbacks to copying, however. It's a very static way to share
behavior between objects. Clones of `room` only share the behavior which
was defined at the time of the copy. If we were to modify `room`, we'd
have to recreate the `new_wellhouse` object once again in order to take
advantage of any new methods added to it.

Cloning also implies single inheritance. An object can only be a clone
of one "parent" object.

Finally, we also can't add any new behavior to our existing `wellhouse`
object this way. We'd have to throw away our program's state and rebuild
it, this time cloning our `end_of_road` and `wellhouse` objects from
`room`.

In Ruby, we're used to being able to make changes to a live session and
see how they play out. Thus far, we've done this all in a live
interpreter session. It seems a shame to have to lose our state and
start again. So we decide to find out if we can come up with a more
dynamic form of prototypical inheritance than plain copying.

We start by adding a helper method called `#implementation_of` to
Object. Given a method name that the object supports, it will return a
`Proc` object containing the code of that method. We make it aware of
the style of method definition used in `ObjectBuilder`, where the
implementation `Procs` of new methods were stored in instance variables
named for the methods:

```ruby
class Object
  def implementation_of(method_name)
    if respond_to?(method_name)
      implementation = instance_variable_get("@#{method_name}")
      if implementation.is_a?(Proc)
        implementation
      elsif instance_variable_defined?("@#{method_name}")
        # Assume the method is a reader
        ->{ instance_variable_get("@#{method_name}") }
      else
        method(method_name).to_proc
      end
    end
  end
end
```

We then define a new kind of `Module`, called `Prototype`:

```ruby
class Prototype < Module
  def initialize(target)
    @target = target
    super() do
      define_method(:respond_to_missing?) do |missing_method, include_private|
        target.respond_to?(missing_method)
      end

      define_method(:method_missing) do |missing_method, *args, &block|
        if target.respond_to?(missing_method)
          implementation = target.implementation_of(missing_method)
          instance_exec(*args, &implementation)
        else
          super(missing_method, *args, &block)
        end
      end
    end
  end
end
```

A `Prototype` is instantiated with a prototypical object. When a
`Prototype` instance is added to an object using `#extend`, it makes the
methods of the prototype available to the extended object. It does this
by implementing `#method_missing?` (and the associated
`#respond_to_missing?`). When a message is sent to the extended object
that matches a method on the prototype object, the `Prototype` grabs the
implementation `Proc` from the prototype. Then it uses `#instance_exec`
to evaluate the `prototype`'s method in the context of the extended
object. In effect, the extended object "borrows" a method from the
prototype object for just long enough to execute it.

Note that this is different from delegation. In delegation, one object
hands off a message to be handled by another object. If object `a`
delegates a `#foo` message to object `b`, using, for instance, Ruby's
`forwardable` library, `self` in that method will be object `b`. This is
easily demonstrated:

```ruby
require 'forwardable'

class A
  extend Forwardable
  attr_accessor :b
  def_delegator :b, :foo
end

class B
  def foo
    puts "executing #foo in #{self}"
  end
end

a = A.new
a.b = B.new
a.foo
# >> executing #foo in #<B:0x00000003295e20>
```

But delegation is not what we want. We want to execute the methods from
prototypes as if they had been defined on the inheriting object. We want
this because we want them to work with the instance variables of the
inheriting object. If we send `wellhouse.exits`, we want the reader
method to show us the content of `wellhouse`'s `@exits` instance
variable, not `room`'s instance variable.

Remember how, in `ObjectBuilder`, we stored the implementations of
methods as `Procs` in instance variables rather than defining them
directly as methods? This need to call prototype methods on the
inheriting object is the reason for that. In Ruby, it is not possible to
execute a method from class A on an instance of unrelated class B. Since
in this program we are using the singleton classes of objects to define
all of their methods, Ruby considers all of our objects as belonging to
different classes for the purposes of method binding. We can see this if
we try to rebind a method from `room` onto `wellhouse` and then call it:

```ruby
room.method(:exits).unbind.bind(wellhouse)
```

```console
-:115:in `bind': singleton method called for a different object (TypeError)
        from -:115:in `<main>'
```

By storing the implementation of methods as raw `Procs`, without any
association to a specific class, we are able to take the implementations
and `instance_exec` them in other contexts.

The last change we make to support dynamic prototype inheritance is to
add a new `#prototype` method to our `ObjectBuilder`:

```ruby
class ObjectBuilder
  def prototype(proto)
    # Leave method implementations on the proto object
    ivars = proto.instance_variables.reject{ |ivar_name|
      proto.respond_to?(ivar_name.to_s[1..-1]) &&
      proto.instance_variable_get(ivar_name).is_a?(Proc)
    }
    ivars.each do |ivar_name|
      unless @object.instance_variable_defined?(ivar_name)
        @object.instance_variable_set(
          ivar_name,
          proto.instance_variable_get(ivar_name).dup)
      end
    end
    @object.extend(Prototype.new(proto))
  end
end
```

This method does two things:

1.  It copies instance variables from a prototype object to the object
    being built.
2.  It extends the object being built with a `Prototype` module
    referencing the prototype object.

We can now use all of this new machinery to dynamically add `room` as a
prototype of `wellhouse`. We are then able to set the south exit to
point back to `end_of_road`, using the `exits` association that
`wellhouse` now inherits from `room`:

```ruby
Object(wellhouse) { |o| o.prototype room }

wellhouse.exits[:south] = end_of_road

adventurer.location = wellhouse
```

Then we can move around again to make sure things are working as expected:

```ruby
puts "* trying to go north from wellhouse"
adventurer.go(:north)

puts "* going back south"
adventurer.go(:south)
```

```console
* trying to go north from wellhouse
You can't go that way
* going back south
You are standing at the end of a road before a small brick building.
Around you is a forest.  A small stream flows out of the building and
down a gully.
```

## Carrying items around

We now have some powerful tools at our disposal for composing objects
from prototypes. We quickly proceed to implement the ability to pick up
and drop items to our game. We start by creating a prototypical
"container" object, which has an array of items and the ability to
transfer an item from itself to another container:

```ruby
container = Object { |o|
  o.items = []
  o.transfer_item = ->(item, recipient) {
    recipient.items << items.delete(item)
  }
}  
```

We then make the `adventurer` a container, and add some commands for
taking items, dropping items, and listing the adventurer's current
inventory:

```ruby
Object(adventurer) {|o|
  o.prototype container

  o.look = -> {
    puts location.description
    location.items.each do |item|
      puts "There is #{item} here."
    end
  }

  o.take = ->(item_name) {
    item = location.items.detect{|item| item.include?(item_name) }
    if item
      location.transfer_item(item, self)
      puts "You take #{item}."
    else
      puts "You see no #{item_name} here"
    end
  }

  o.drop = ->(item_name) {
    item = items.detect{|item| item.include?(item_name) }
    if item
      transfer_item(item, location)
      puts "You drop #{item}."
    else
      puts "You are not carrying #{item_name}"
    end
  }

  o.inventory = -> {
    items.each do |item|
      puts "You have #{item}"
    end
  }
}
```

For convenience, we've implemented `#take` and `#drop` so that they can
accept any substring of the intended object's name.

Next we make `wellhouse` a container, and add a list of starting items
to it:

```ruby
Object(wellhouse) { |o|
  o.prototype container
  o.items = [
    "a shiny brass lamp",
    "some food",
    "a bottle of water"
  ]
  o.exits = {south: end_of_road}
}
```

As you may recall, `wellhouse` already has a prototype: `room`. But this
is not a problem. One of the advantages of our dynamic prototyping
system is that objects may have any number of prototypes. Since
prototyping is implemented using specialized modules, when an object is
sent a message that it can't handle itself, Ruby will keep searching up an
object's ancestor chain, from one `Prototype` to the next, looking for a
matching method. (This also puts us one-up on JavaScript's
single-inheritance prototype system!)

Finally, we make `end_of_road` a container:

```ruby
Object(end_of_road) { |o| o.prototype(container) }
```

We then proceed to tell our adventurer to pick up a bottle of water from
the wellhouse, and put it down at the end of the road:

```console
> adventurer.go(:north)
You are inside a small building, a wellhouse for a large spring.
> adventurer.take("water")
You take a bottle of water.
> adventurer.inventory
You have a bottle of water
> adventurer.look
You are inside a small building, a wellhouse for a large spring.
There is a shiny brass lamp here.
There is some food here.
> adventurer.go(:south)
You are standing at the end of a road before a small brick building.
Around you is a forest.  A small stream flows out of the building and
down a gully.
> adventurer.drop("water")
You drop a bottle of water.
> adventurer.look
You are standing at the end of a road before a small brick building.
Around you is a forest.  A small stream flows out of the building and
down a gully.
There is a bottle of water here.
```

And with that, we now have a small but functional system which allows us to move
around the game world and interact with it.

## Reflections

We've written the beginnings of a text adventure game in a
prototype-based style. Now, let's take a step back and talk about what
the point of this exercise was.

There is a strong argument to be made that prototype-based inheritance
more closely maps to how humans normally think through problems than
does class-based inheritance. Quoting the paper "[Classes vs.
Prototypes: Some Philosophical and Historical
Observations](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.56.4713)":

> A typical argument in favor of prototypes is that people seem to be a
> lot better at dealing with specific examples first, then generalizing
> from them, than they are at absorbing general abstract principles
> first and later applying them in particular cases, ... the ability to
> modify and evolve objects at the level of individual objects reduces
> the need for a priori classification and encourages a more iterative
> programming and design style.

As we built up our adventure game, we immediately added concrete objects
to the system as soon as we thought them up. We added an `adventurer`,
and then an `end_of_road` for the adventurer to start out in. Then
later, as we added more objects, we generalized out commonalities into
objects like `room` and `container`. Our program design emerged
completely organically, and our abstractions emerged as soon as we
needed them, but no sooner. This kind of emergent, organic design
process is one of the ideals of agile software development, and
prototype-based systems seem to encourage it.

Of course, the way we jammed prototypes into a class-based language here
is a horrendous hack: please don't use it in a production system!
But the experience of writing code in a prototyped style can teach
us a lot. We can use what we've learned to influence our daily
coding. We might prototype (heh) a system's design by writing one-off
objects at first, adding methods to their singleton classes. Then, as
patterns of interaction emerge, we might capture the design using
classes. Prototypes can also teach us to do more with delegation and
composition, building families of collaborating objects rather than
hierarchies of related behavior.

Now that we've reached the end of our journey, I hope you've found 
this trip through prototype-land illuminating and thought-provoking. 
I'm still a relative newb to this way of thinking, so if you
have anything to add‚ i.e. other benefits of using prototypes; subtle gotchas;
experiences from prototype-based languages, or alternative implementations of
any of the code above, please don't hesitate to pipe up in the comments. Also,
if you want clarifications about any of the gnarly metaprogramming I used to
bash Ruby into a semblance of a prototype-based language, feel free to ask --
but I can't guarantee that the answers will make any more sense than the
code :-)

> **NOTE:** If you had fun reading this article, you may also enjoy reading Advi's 
> blog post on the [Prototype Pattern](http://devblog.avdi.org/?p=5560), a design pattern that takes 
> ideas from prototype-based programming and applies them to class-based
> modeling. That post started as a section of this article that gained a life
> of its own.


================================================
FILE: articles/v5/README.md
================================================
The articles in this folder are from Practicing Ruby's fifth volume, which ran from
August 2012 to December 2012.

You can also read them for free online at practicingruby.com.


================================================
FILE: articles/v6/001-parsing-json.md
================================================
*This article was written by Aaron Patterson, a Ruby
developer living in Seattle, WA.  He's been having fun writing Ruby for the past
7 years, and hopes to share his love of Ruby with you.*

Hey everybody!  I hope you're having a great day today!  The sun has peeked out
of the clouds for a bit today, so I'm doing great!

In this article, we're going to be looking at some compiler tools for use with Ruby.  In
order to explore these tools, we'll write a JSON parser.  I know you're saying,
"but Aaron, *why* write a JSON parser?  Don't we have like 1,234,567 of them?".
Yes!  We do have precisely 1,234,567 JSON parsers available in Ruby!  We're
going to parse JSON because the grammar is simple enough that we can finish the
parser in one sitting, and because the grammar is complex enough that we can
exercise some of Ruby's compiler tools.

As you read on, keep in mind that this isn't an article about parsing JSON, 
its an article about using parser and compiler tools in Ruby.

## The Tools We'll Be Using

I'm going to be testing this with Ruby 2.1.0, but it should work under any
flavor of Ruby you wish to try.  Mainly, we will be using a tool called `Racc`,
and a tool called `StringScanner`.

**Racc**

We'll be using Racc to generate our parser.  Racc is an LALR parser generator
similar to YACC.  YACC stands for "Yet Another Compiler Compiler", but this is
the Ruby version, hence "Racc".  Racc converts a grammar file (the ".y" file)
to a Ruby file that contains state transitions.  These state transitions are
interpreted by the Racc state machine (or runtime).  The Racc runtime ships
with Ruby, but the tool that converts the ".y" files to state tables does not.
In order to install the converter, do `gem install racc`.

We will write ".y" files, but users cannot run the ".y" files.  First we convert
them to runnable Ruby code, and ship the runnable Ruby code in our gem.  In
practical terms, this means that *only we install the Racc gem*, other users
do not need it.

Don't worry if this doesn't make sense right now.  It will become more clear
when we get our hands dirty and start playing with code.

**StringScanner**

Just like the name implies, [StringScanner](http://ruby-doc.org/stdlib-1.9.3/libdoc/strscan/rdoc/StringScanner.html)
is a class that helps us scan strings.  It keeps track of where we are
in the string, and lets us advance forward via regular expressions or by
character.

Let's try it out!  First we'll create a `StringScanner` object, then we'll scan
some letters from it:

```ruby
require 'strscan'

ss = StringScanner.new 'aabbbbb' #=> #<StringScanner 0/7 @ "aabbb...">
ss.scan /a/ #=> "a"
ss.scan /a/ #=> "a"
ss.scan /a/ #=> nil
ss #=> #<StringScanner 2/7 "aa" @ "bbbbb">
```

Notice that the third call to
[StringScanner#scan](http://ruby-doc.org/stdlib-1.9.3/libdoc/strscan/rdoc/StringScanner.html#method-i-scan)
resulted in a `nil`, since the regular expression did not match from the current
position.  Also note that when you inspect the `StringScanner` instance, you can
see the position of the scanner (in this case `2/7`).

We can also move through the scanner character by character using
[StringScanner#getch](http://ruby-doc.org/stdlib-1.9.3/libdoc/strscan/rdoc/StringScanner.html#method-i-getch):

```ruby
ss #=> #<StringScanner 2/7 "aa" @ "bbbbb">
ss.getch #=> "b"

ss #=> #<StringScanner 3/7 "aab" @ "bbbb">
```

The `getch` method returns the next character, and advances the pointer by one.

Now that we've covered the basics for scanning strings, let's take a 
look at using Racc.

## Racc Basics

As I said earlier, Racc is an LALR parser generator.  You can think of it as a
system that lets you write limited regular expressions that can execute
arbitrary code at different points as they're being evaluated.

Let's look at an example.  Suppose we have a pattern we want to match:
`(a|c)*abb`.  That is, we want to match any number of 'a' or 'c' followed by
'abb'.  To translate this to a Racc grammar, we try to break up this regular
expression to smaller parts, and assemble them as the whole.  Each part is
called a "production".  Let's try breaking up this regular expression so that we
can see what the productions look like, and the format of a Racc grammar file.

First we create our grammar file.  At the top of the file, we declare the Ruby
class to be produced, followed by the `rule` keyword to indicate that we're
going to declare the productions, followed by the `end` keyword to indicate the
end of the productions:

```
class Parser
rule
end
```

Next lets add the production for "a|c".  We'll call this production `a_or_c`:


```
class Parser
rule
  a_or_c : 'a' | 'c' ;
end
```

Now we have a rule named `a_or_c`, and it matches the characters 'a' or 'c'.  In
order to match one or more `a_or_c` productions, we'll add a recursive
production called `a_or_cs`:

```
class Parser
rule
  a_or_cs
    : a_or_cs a_or_c
    | a_or_c
    ;
  a_or_c : 'a' | 'c' ;
end
```

The `a_or_cs` production recurses on itself, equivalent to the regular
expression `(a|c)+`.  Next, a production for 'abb':

```
class Parser
rule
  a_or_cs
    : a_or_cs a_or_c
    | a_or_c
    ;
  a_or_c : 'a' | 'c' ;
  abb    : 'a' 'b' 'b' 
end
```

Finally, the `string` production ties everything together:


```
class Parser
rule
  string
    : a_or_cs abb
    | abb
    ;
  a_or_cs
    : a_or_cs a_or_c
    | a_or_c
    ;
  a_or_c : 'a' | 'c' ;
  abb    : 'a' 'b' 'b';
end
```

This final production matches one or more 'a' or 'c' characters followed by
'abb', or just the string 'abb' on its own.  This is equivalent to our original
regular expression of `(a|c)*abb`.

**But Aaron, this is so long!**

I know, it's much longer than the regular expression version.  However, we can
add arbitrary Ruby code to be executed at any point in the matching process.
For example, every time we find just the string "abb", we can execute some
arbitrary code:

```
class Parser
rule
  string
    | a_or_cs abb
    | abb         
    ;
  a_or_cs
    : a_or_cs a_or_c
    | a_or_c
    ;
  a_or_c : 'a' | 'c' ;
  abb    : 'a' 'b' 'b' { puts "I found abb!" };
end
```

The Ruby code we want to execute should be wrapped in curly braces and placed
after the rule where we want the trigger to fire.

To use this parser, we also need a tokenizer that can break the input
data into tokens, along with some other boilerplate code. If you are curious
about how that works, you can check out [this standalone
example](https://gist.githubusercontent.com/sandal/9532497/raw/8e3bb03fc24c8f6604f96516bf242e7e13d0f4eb/parser_example.y).

Now that we've covered the basics, we can use knowledge we have so far to build 
an event based JSON parser and tokenizer.

## Building our JSON Parser

Our JSON parser is going to consist of three different objects, a parser, a
tokenizer, and document handler.The parser will be written with a Racc grammar, 
and will ask the tokenizer for input from the input stream.  Whenever the parser 
can identify a part of the JSON stream, it will send an event to the document 
handler.  The document handler is responsible for collecting the JSON 
information and translating it to a Ruby data structure. When we read in 
a JSON document, the following method calls are made:

![method calls](//i.imgur.com/HZ0Sa.png)

It's time to get started building this system. We'll focus on building the 
tokenizer first, then work on the grammar for the parser, and finally implement 
the document handler.

## Building the tokenizer

Our tokenizer is going to be constructed with an IO object.  We'll read the
JSON data from the IO object.  Every time `next_token` is called, the tokenizer
will read a token from the input and return it. Our tokenizer will return the 
following tokens, which we derived from the [JSON spec](http://www.json.org/):

* Strings
* Numbers
* True
* False
* Null

Complex types like arrays and objects will be determined by the parser.

**`next_token` return values:**

When the parser calls `next_token` on the tokenizer, it expects a two element
array or a `nil` to be returned.  The first element of the array must contain
the name of the token, and the second element can be anything (but most people
just add the matched text).  When a `nil` is returned, that indicates there are
no more tokens left in the tokenizer.

**`Tokenizer` class definition:**

Let's look at the source for the Tokenizer class and walk through it:

```ruby
module RJSON
  class Tokenizer
    STRING = /"(?:[^"\\]|\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4}))*"/
    NUMBER = /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/
    TRUE   = /true/
    FALSE  = /false/
    NULL   = /null/

    def initialize io
      @ss = StringScanner.new io.read
    end

    def next_token
      return if @ss.eos?

      case
      when text = @ss.scan(STRING) then [:STRING, text]
      when text = @ss.scan(NUMBER) then [:NUMBER, text]
      when text = @ss.scan(TRUE)   then [:TRUE, text]
      when text = @ss.scan(FALSE)  then [:FALSE, text]
      when text = @ss.scan(NULL)   then [:NULL, text]
      else
        x = @ss.getch
        [x, x]
      end
    end
  end
end
```

First we declare some regular expressions that we'll use along with the string
scanner.  These regular expressions were derived from the definitions on
[json.org](http://www.json.org).  We instantiate a string scanner object in the
constructor.  String scanner requires a string on construction, so we read the
IO object.  However, we could build an alternative tokenizer that reads from the
IO as needed.

The real work is done in the `next_token` method.  The `next_token` method
returns nil if there is nothing left to read from the string scanner, then it
tries each regular expression until it finds a match.  If it finds a match, it
returns the name of the token (for example `:STRING`) along with the text that
it matched.  If none of the regular expressions match, then we read one
character off the scanner, and return that character as both the name of the
token, and the value.

Let's try feeding the tokenizer a JSON string and see what tokens come out:

```ruby
tok = RJSON::Tokenizer.new StringIO.new '{"foo":null}'
#=> #<RJSON::Tokenizer:0x007fa8529fbeb8 @ss=#<StringScanner 0/12 @ "{\"foo...">>

tok.next_token #=> ["{", "{"]
tok.next_token #=> [:STRING, "\"foo\""]
tok.next_token #=> [":", ":"]
tok.next_token #=> [:NULL, "null"]
tok.next_token #=> ["}", "}"]
tok.next_token #=> nil
```

In this example, we wrap the JSON string with a `StringIO` object in order to
make the string quack like an IO.  Next, we try reading tokens from the
tokenizer.  Each token the Tokenizer understands has the name as the first value of
the array, where the unknown tokens have the single character value.  For
example, string tokens look like this: `[:STRING, "foo"]`, and unknown tokens
look like this: `['(', '(']`.   Finally, `nil` is returned when the input has
been exhausted.

This is it for our tokenizer.  The tokenizer is initialized with an `IO` object, 
and has only one method: `next_token`.  Now we can focus on the parser side.

## Building the parser

We have our tokenizer in place, so now it's time to assemble the parser.  First
we need to do a little house keeping.  We're going to generate a Ruby file from
our `.y` file.  The Ruby file needs to be regenerated every time the `.y` file
changes.  A Rake task sounds like the perfect solution.

**Defining a compile task:**

The first thing we'll add to the Rakefile is a rule that says *"translate .y files to
.rb files using the following command"*:

```ruby
rule '.rb' => '.y' do |t|
  sh "racc -l -o #{t.name} #{t.source}"
end
```

Then we'll add a "compile" task that depends on the generated `parser.rb` file:

```ruby
task :compile => 'lib/rjson/parser.rb'
```

We keep our grammar file as `lib/rjson/parser.y`, and when we run `rake
compile`, rake will automatically translate the `.y` file to a `.rb` file using
Racc.

Finally we make the test task depend on the compile task so that when we run
`rake test`, the compiled file is automatically generated:

```ruby
task :test => :compile
```

Now we can compile and test the `.y` file.

**Translating the JSON.org spec:**

We're going to translate the diagrams from [json.org](http://www.json.org/) to a
Racc grammar.  A JSON document should be an object or an array at the root, so
we'll make a production called `document` and it should be an `object` or an
`array`:

```
rule
  document
    : object
    | array
    ;
```

Next we need to define `array`.  The `array` production can either be empty, or
contain 1 or more values:

```
  array
    : '[' ']'
    | '[' values ']'
    ;
```

The `values` production can be recursively defined as one value, or many values
separated by a comma:

```
  values
    : values ',' value
    | value
    ;
```

The JSON spec defines a `value` as a string, number, object, array, true, false,
or null.  We'll define it the same way, but for the immediate values such as
NUMBER, TRUE, and FALSE, we'll use the token names we defined in the tokenizer:

```
  value
    : string
    | NUMBER
    | object
    | array
    | TRUE
    | FALSE
    | NULL
    ;
```

Now we need to define the `object` production.  Objects can be empty, or
have many pairs:

```
  object
    : '{' '}'
    | '{' pairs '}'
    ;
```

We can have one or more pairs, and they must be separated with a comma.  We can
define this recursively like we did with the array values:

```
  pairs
    : pairs ',' pair
    | pair
    ;
```

Finally, a pair is a string and value separated by a colon:

```
  pair
    : string ':' value
    ;
```

Now we let Racc know about our special tokens by declaring them at the top, and
we have our full parser:

```
class RJSON::Parser
token STRING NUMBER TRUE FALSE NULL
rule
  document
    : object
    | array
    ;
  object
    : '{' '}'
    | '{' pairs '}'
    ;
  pairs
    : pairs ',' pair
    | pair
    ;
  pair : string ':' value ;
  array
    : '[' ']'
    | '[' values ']'
    ;
  values
    : values ',' value
    | value
    ;
  value
    : string
    | NUMBER
    | object
    | array
    | TRUE
    | FALSE
    | NULL
    ;
  string : STRING ;
end
```

## Building the handler

Our parser will send events to a document handler.  The document handler will
assemble the beautiful JSON bits in to lovely Ruby object!  Granularity of the
events is really up to you, but I'm going to go with 5 events:

* `start_object` - called when an object is started
* `end_object`   - called when an object ends
* `start_array`  - called when an array is started
* `end_array`    - called when an array ends
* `scalar`       - called with terminal values like strings, true, false, etc

With these 5 events, we can assemble a Ruby object that represents the JSON
object we are parsing.

**Keeping track of events**

The handler we build will simply keep track of events sent to us by the parser.
This creates tree-like data structure that we'll use to convert JSON to Ruby.

```ruby
module RJSON
  class Handler
    def initialize
      @stack = [[:root]]
    end

    def start_object
      push [:hash]
    end

    def start_array
      push [:array]
    end

    def end_array
      @stack.pop
    end
    alias :end_object :end_array

    def scalar(s)
      @stack.last << [:scalar, s]
    end

    private

    def push(o)
      @stack.last << o
      @stack << o
    end
  end
end
```

When the parser encounters the start of an object, the handler pushes a list on
the stack with the "hash" symbol to indicate the start of a hash.  Events that
are children will be added to the parent, then when the object end is
encountered the parent is popped off the stack.

This may be a little hard to understand, so let's look at some examples.  If we
parse this JSON: `{"foo":{"bar":null}}`, then the `@stack` variable will look
like this:

```ruby
[[:root,
  [:hash,
    [:scalar, "foo"],
    [:hash,
      [:scalar, "bar"],
      [:scalar, nil]]]]]
```

If we parse a JSON array, like this JSON: `["foo",null,true]`, the `@stack`
variable will look like this:

```ruby
[[:root,
  [:array,
    [:scalar, "foo"],
    [:scalar, nil],
    [:scalar, true]]]]
```

**Converting to Ruby:**

Now that we have an intermediate representation of the JSON, let's convert it to
a Ruby data structure.  To convert to a Ruby data structure, we can just write a
recursive function to process the tree:

```ruby
def result
  root = @stack.first.last
  process root.first, root.drop(1)
end

private
def process type, rest
  case type
  when :array
    rest.map { |x| process(x.first, x.drop(1)) }
  when :hash
    Hash[rest.map { |x|
      process(x.first, x.drop(1))
    }.each_slice(2).to_a]
  when :scalar
    rest.first
  end
end
```

The `result` method removes the `root` node and sends the rest to the `process`
method.  When the `process` method encounters a `hash` symbol it builds a hash
using the children by recursively calling `process`.  Similarly, when an
`array` symbol is found, an array is constructed recursively with the children.
Scalar values are simply returned (which prevents an infinite loop).  Now if we
call `result` on our handler, we can get the Ruby object back.

Let's see it in action:

```ruby
require 'rjson'

input   = StringIO.new '{"foo":"bar"}'
tok     = RJSON::Tokenizer.new input
parser  = RJSON::Parser.new tok
handler = parser.parse
handler.result # => {"foo"=>"bar"}
```

**Cleaning up the RJSON API:**

We have a fully function JSON parser.  Unfortunately, the API is not very
friendly.  Let's take the previous example, and package it up in a method:

```ruby
module RJSON
  def self.load(json)
    input   = StringIO.new json
    tok     = RJSON::Tokenizer.new input
    parser  = RJSON::Parser.new tok
    handler = parser.parse
    handler.result
  end
end
```

Since we built our JSON parser to deal with IO from the start, we can add
another method for people who would like to pass a socket or file handle:

```ruby
module RJSON
  def self.load_io(input)
    tok     = RJSON::Tokenizer.new input
    parser  = RJSON::Parser.new tok
    handler = parser.parse
    handler.result
  end

  def self.load(json)
    load_io StringIO.new json
  end
end
```

Now the interface is a bit more friendly:

```ruby
require 'rjson'
require 'open-uri'

RJSON.load '{"foo":"bar"}' # => {"foo"=>"bar"}
RJSON.load_io open('http://example.org/some_endpoint.json')
```

## Reflections 

So we've finished our JSON parser.  Along the way we've studied compiler
technology including the basics of parsers, tokenizers, and even interpreters
(yes, we actually interpreted our JSON!).  You should be proud of yourself!

The JSON parser we've built is versatile. We can:

* Use it in an event driven manner by implementing a Handler object
* Use a simpler API and just feed strings
* Stream in JSON via IO objects

I hope this article has given you the confidence to start playing with parser
and compiler technology in Ruby. Please leave a comment if you have any
questions for me.

## Post Script

I want to follow up with a few bits of minutiae that I omitted to maintain
clarity in the article:

* [Here](https://github.com/tenderlove/rjson/blob/master/lib/rjson/parser.y) is
the final grammar file for our JSON parser.  Notice 
the [---- inner section in the .y file](https://github.com/tenderlove/rjson/blob/master/lib/rjson/parser.y#L53).
Anything in that section is included *inside* the generated parser class.  This
is how we get the handler object to be passed to the parser.

* Our parser actually [does the
translation](https://github.com/tenderlove/rjson/blob/master/lib/rjson/parser.y#L42-50)
of JSON terminal nodes to Ruby.  So we're actually doing the translation of JSON
to Ruby in two places: the parser *and* the document handler.  The document
handler deals with structure where the parser deals with immediate values (like
true, false, etc).  An argument could be made that none or all of this
translation *should* be done in the parser.

* Finally, I mentioned that [the
tokenizer](https://github.com/tenderlove/rjson/blob/master/lib/rjson/tokenizer.rb)
buffers.  I implemented a simple non-buffering tokenizer that you can read
[here](https://github.com/tenderlove/rjson/blob/master/lib/rjson/stream_tokenizer.rb).
It's pretty messy, but I think could be cleaned up by using a state machine.

That's all. Thanks for reading! <3 <3 <3

> NOTE: If you'd like to learn more about this topic, consider doing the Practicing Ruby self-guided course on [Streams, Files, and Sockets](https://practicingruby.com/articles/study-guide-1?u=dc2ab0f9bb). You've already completed one of its reading exercises by working through this article!


================================================
FILE: articles/v6/002-code-reading.md
================================================
> **NOTE:** This issue of Practicing Ruby is one of several content experiments that was published in Volume 6. It intentionally breaks away from the traditional article format that we have developed over the years in the hopes of finding new and interesting ways for you to level up your programming skills.

When I prepare examples for Practicing Ruby articles, I work hard to find a linear path through a codebase so that I can tell a single coherent story. By doing this, I'm trying to help you navigate through complex problem spaces without getting lost along the way. This approach has its strong points, but it comes at the cost of discouraging ad-hoc exploration in a subtle way: if I give you a single path to follow through a codebase, you are less likely to veer off trail and discover something that I neglected to mention. This is a shame, because Practicing Ruby code examples are usually much deeper than the stories I tell about them.

In this experiment, we will discover what happens when I drop you directly into a codebase to explore on your own. In particular, you'll be working through understanding the implementation of [Weiqi](https://github.com/elm-city-craftworks/weiqi), the Go-playing desktop application shown in the following video:


<div align="center">
<iframe width="720" height="480"
src="//www.youtube.com/embed/5RZVfK1GdA8?rel=0" frameborder="0" allowfullscreen></iframe>
</div>


Rather than attempting to rush off to a particular destination that I have planned for you in advance, I'd like you to try forging your own path and see where it takes you. Start with the project's [README](https://github.com/elm-city-craftworks/weiqi/blob/master/README.md) and then follow your interests from there.

Once you've had a chance to take a look around, consider either coming back here to discuss the things you found interesting, or continuing the conversation over on Github via tickets and pull requests. The code has its good and bad parts (as you'll soon find out), so there is definitely plenty to discuss.

And most importantly, please let me know whether this particular experiment seemed to work for you or not. You're welcome to leave a comment below, or if you'd be more comfortable giving feedback in private, you can email me at: **gregory@practicingruby.com**.


================================================
FILE: articles/v6/003-actors.md
================================================
> This issue was a collaboration with [Alberto Fernández Capel][afcapel], a Ruby developer
from Spain. Although it has been through many revisions since
we started, Alberto's ideas, code, and explanations provided
an excellent starting point that lead us to publish this article.

Conventional wisdom says that concurrent programming is hard, especially in 
Ruby. This basic assumption is what lead many Rubyists to take an interest
in languages like Erlang and Scala -- their baked in support for 
the [actor model][actors] is meant to make concurrent systems 
much easier for everyday programmers to implement and understand.

But do you really need to look outside of Ruby to find concurrency primitives
that can make your work easier? The answer to that question probably 
depends on the levels of concurrency and availability that you require, but
things have definitely been shaping up in recent years. In particular, 
the [Celluloid][celluloid] framework has brought us a convenient and clean way to implement
actor-based concurrent systems in Ruby.

In order to appreciate what Celluloid can do for you, you first need to
understand what the actor model is, and what benefits it offers over the
traditional approach of directly using threads and locks for concurrent 
programming. In this article, we'll try to shed some light on those points by
solving a classic concurrency puzzle in three ways: Using Ruby's built-in
primitives (threads and mutex locks), using the Celluloid framework, and using a
minimal implementation of the actor model that we'll build from scratch.

By the end of this article, you certainly won't be a concurrency expert
if you aren't already, but you'll have a nice head start on some
basic concepts that will help you decide how to tackle concurrent programming
within your own projects. Let's begin!

## The Dining Philosophers Problem

The [Dining Philosophers][philosophers] problem was formulated by Edsger Djisktra in 1965 to
illustrate the kind of issues we can find when multiple processes compete to
gain access to exclusive resources.

In this problem, five philosophers meet to have dinner. They sit at a round
table and each one has a bowl of rice in front of them. There are also five
chopsticks, one between each philosopher. The philosophers spent their time
thinking about _The Meaning of Life_. Whenever they get
hungry, they try to eat. But a philosopher needs a chopstick in each
hand in order to grab the rice. If any other
philosopher has already taken one of those chopsticks, the hungry
philosopher will wait until that chopstick is available.

This problem is interesting because if it is not properly solved it can easily
lead to deadlock issues. We'll take a look at those issues soon, but first let's
convert this problem domain into a few basic Ruby objects.

### Modeling the table and its chopsticks

All three of the solutions we'll discuss in this article rely on a `Chopstick`
class and a `Table` class. The definitions of both classes are shown below:

```ruby
class Chopstick
  def initialize
    @mutex = Mutex.new
  end

  def take
    @mutex.lock
  end

  def drop
    @mutex.unlock

  rescue ThreadError
    puts "Trying to drop a chopstick not acquired"
  end

  def in_use?
    @mutex.locked?
  end
end

class Table
  def initialize(num_seats)
    @chopsticks  = num_seats.times.map { Chopstick.new }
  end

  def left_chopstick_at(position)
    index = (position - 1) % @chopsticks.size
    @chopsticks[index]
  end

  def right_chopstick_at(position)
    index = (position + 1) % @chopsticks.size
    @chopsticks[index]
  end

  def chopsticks_in_use
    @chopsticks.select { |f| f.in_use? }.size
  end
end
```

The `Chopstick` class is just a thin wrapper around a regular Ruby mutex 
that will ensure that two philosophers can not grab the same chopstick 
at the same time. The `Table` class deals with the geometry of the problem; 
it knows where each seat is at the table, which chopstick is to the left 
or to the right of that seat, and how many chopsticks are currently in use.

Now that you've seen the basic domain objects that model this problem, we'll
look at different ways of implementing the behavior of the philosophers. 
We'll start with what *doesn't* work.

## A solution that leads to deadlocks

The `Philosopher` class shown below would seem to be the most straightforward
solution to this problem, but has a fatal flaw that prevents it from being
thread safe. Can you spot it?

```ruby
class Philosopher
  def initialize(name)
    @name = name
  end

  def dine(table, position)
    @left_chopstick  = table.left_chopstick_at(position)
    @right_chopstick = table.right_chopstick_at(position)

    loop do
      think
      eat
    end
  end

  def think
    puts "#{@name} is thinking"
  end

  def eat
    take_chopsticks

    puts "#{@name} is eating."

    drop_chopsticks
  end

  def take_chopsticks
    @left_chopstick.take
    @right_chopstick.take
  end

  def drop_chopsticks
    @left_chopstick.drop
    @right_chopstick.drop
  end
end
```

If you're still scratching your head, consider what happens when each
philosopher object is given its own thread, and all the philosophers attempt to
eat at the same time. 

In this naive implementation, it is
possible to reach a state in which every philosopher picks up their left-hand
chopstick, leaving no chopsticks on the table. In that scenario, every
philosopher would simply wait forever for their right-hand chopstick to 
become available -- resulting in a deadlock. You can reproduce the problem
by running the following code:

```ruby
names = %w{Heraclitus Aristotle Epictetus Schopenhauer Popper}

philosophers = names.map { |name| Philosopher.new(name) }
table        = Table.new(philosophers.size)

threads = philosophers.map.with_index do |philosopher, i|
  Thread.new { philosopher.dine(table, i) }
end

threads.each(&:join)
sleep
```

Ruby is smart enough to inform you of what went wrong, so you should end up
seeing a backtrace that looks something like this:

```console
Aristotle is thinking
Popper is eating.
Popper is thinking
Epictetus is eating.
Epictetus is thinking
Heraclitus is eating.
Heraclitus is thinking
Schopenhauer is eating.
Schopenhauer is thinking

dining_philosophers_uncoordinated.rb:79:in `join': deadlock detected (fatal)
  from dining_philosophers_uncoordinated.rb:79:in `each'
  from dining_philosophers_uncoordinated.rb:79:in `<main>
```

In many situations, the most simple solution tends to be the best one, but this
is obviously not one of those cases. Since we've learned the hard way that the
philosophers cannot be safely left to their own devices, we'll need to do more
to make sure their behaviors remain coordinated.

### A coordinated mutex-based solution

One easy solution to this issue is introduce a `Waiter` object into the mix. In this
model, the philosopher must ask the waiter before eating. If the number of chopsticks
in use is four or more, the waiter will make the philosopher wait until someone
finishes eating. This will ensure that at least one philosopher will be able to eat 
at any time, avoiding the deadlock condition.

There's still a catch, though. From the moment the waiter checks the number of chopstick
in use until the next philosopher starts to eat we have a critical region in our
program: If we let two concurrent threads execute that code at the same time there
is still a chance of a deadlock. For example, suppose the waiter checks the number of
chopsticks used and see it is 3. At that moment, the scheduler yields control to
another philosopher who is just picking the chopstick. When the execution flow
comes back to the original thread, it will allow the original philosopher to
eat, even if there may be more than four chopsticks already in use.

To avoid this situation we need to protect the critical region with a mutex, as
shown below:


```ruby
class Waiter
  def initialize(capacity)
    @capacity = capacity
    @mutex    = Mutex.new
  end

  def serve(table, philosopher)
    @mutex.synchronize do
      sleep(rand) while table.chopsticks_in_use >= @capacity 
      philosopher.take_chopsticks
    end

    philosopher.eat
  end
end
```

Introducing the `Waiter` object requires us to make some minor changes to our
`Philosopher` object, but they are fairly straightforward: 

```ruby
class Philosopher

  # ... all omitted code same as before

  def dine(table, position, waiter)
    @left_chopstick  = table.left_chopstick_at(position)
    @right_chopstick = table.right_chopstick_at(position)

    loop do
      think

      # instead of calling eat() directly, make a request to the waiter 
      waiter.serve(table, self)
    end
  end

  def eat
    # removed take_chopsticks call, as that's now handled by the waiter

    puts "#{@name} is eating."

    drop_chopsticks
  end
end
```

The runner code also needs minor tweaks, but is mostly similar to what
you saw earlier:

```ruby
names = %w{Heraclitus Aristotle Epictetus Schopenhauer Popper}

philosophers = names.map { |name| Philosopher.new(name) }

table  = Table.new(philosophers.size)
waiter = Waiter.new(philosophers.size - 1)

threads = philosophers.map.with_index do |philosopher, i|
  Thread.new { philosopher.dine(table, i, waiter) }
end

threads.each(&:join)
sleep
```

This approach is reasonable and solves the deadlock issue, but using mutexes 
to synchronize code requires some low level thinking. Even in this simple 
problem, there were several gotchas to consider. As programs get more
complicated, it becomes really difficult to keep track of critical regions 
while ensuring that the code behaves properly when accessing them.

The actor model is meant to provide a more systematic and natural way of 
sharing data between threads. We'll now take a look at an actor-based 
solution to this problem so that we can see how it compares to this 
mutex-based approach.

## An actor-based solution using Celluloid

We'll now rework our `Philosopher` and `Waiter` classes to make use of 
Celluloid. Much of the code will remain the same, but some important
details will change. The full class definitions are shown below to preserve
context, but the changed portions are marked with comments.

We'll spend the rest of the article explaining the inner workings 
of this code, so don't worry about understanding every last detail. Instead,
just try to get a basic idea of what's going on here:

```ruby
class Philosopher
  include Celluloid

  def initialize(name)
    @name = name
  end

  # Switching to the actor model requires us get rid of our
  # more procedural event loop in favor of a message-oriented
  # approach using recursion. The call to think() eventually
  # leads to a call to eat(), which in turn calls back to think(),
  # completing the loop.

  def dine(table, position, waiter)
    @waiter = waiter

    @left_chopstick  = table.left_chopstick_at(position)
    @right_chopstick = table.right_chopstick_at(position)

    think
  end

  def think
    puts "#{@name} is thinking."
    sleep(rand)

    # Asynchronously notifies the waiter object that
    # the philosophor is ready to eat

    @waiter.async.request_to_eat(Actor.current)
  end

  def eat
    take_chopsticks

    puts "#{@name} is eating."
    sleep(rand)

    drop_chopsticks

    # Asynchronously notifies the waiter
    # that the philosopher has finished eating

    @waiter.async.done_eating(Actor.current)

    think
  end

  def take_chopsticks
    @left_chopstick.take
    @right_chopstick.take
  end

  def drop_chopsticks
    @left_chopstick.drop
    @right_chopstick.drop
  end

  # This code is necessary in order for Celluloid to shut down cleanly
  def finalize
    drop_chopsticks
  end
end


class Waiter
  include Celluloid

  def initialize
    @eating   = []
  end

  # because synchronized data access is ensured
  # by the actor model, this code is much more
  # simple than its mutex-based counterpart. However,
  # this approach requires two methods
  # (one to start and one to stop the eating process),
  # where the previous approach used a single serve() method.

  def request_to_eat(philosopher)
    return if @eating.include?(philosopher)

    @eating << philosopher
    philosopher.async.eat
  end

  def done_eating(philosopher)
    @eating.delete(philosopher)
  end
end
```

The runner code is similar to before, with only some very minor changes:

```ruby
names = %w{Heraclitus Aristotle Epictetus Schopenhauer Popper}

philosophers = names.map { |name| Philosopher.new(name) }

waiter = Waiter.new # no longer needs a "capacity" argument
table = Table.new(philosophers.size)

philosophers.each_with_index do |philosopher, i| 
  # No longer manually create a thread, rely on async() to do that for us.
  philosopher.async.dine(table, i, waiter) 
end

sleep
```

The runtime behavior of this solution is similar to that of our mutex-based
solution. However, the following differences in implementation are worth noting:

* Each class that mixes in `Celluloid` becomes an actor with its own thread of execution.

* The Celluloid library intercepts any method call run through the `async` proxy
object and stores it in the actor's mailbox. The actor's thread will sequentially 
execute those stored methods, one after another.

* This behavior makes it so that we don't need to manage threads and mutex
synchronization explicitly. The Celluloid library handles that under 
the hood in an object-oriented manner.

* If we encapsulate all data inside actor objects, only the actor's
thread will be able to access and modify its own data. That prevents the
possibility of two threads writing to a critical region at the same time,
which eliminates the risk of deadlocks and data corruption.

These features are very useful for simplifying the way we think about
concurrent programming, but you're probably wondering how much magic is involved
in implementing them. Let's build our own minimal drop-in replacement for
Celluloid to find out!

## Rolling our own actor model

Celluloid provides much more functionality than what we can discuss
in this article, but building a barebones implementation of the actor
model is within our reach. In fact, the following 80 lines of code are
enough to serve as a replacement for our use of Celluloid in the 
previous example:

```ruby
require 'thread'

module Actor  # To use this, you'd include Actor instead of Celluloid
  module ClassMethods
    def new(*args, &block)
      Proxy.new(super)
    end
  end

  class << self
    def included(klass)
      klass.extend(ClassMethods)
    end

    def current
      Thread.current[:actor]
    end
  end

  class Proxy
    def initialize(target)
      @target  = target
      @mailbox = Queue.new
      @mutex   = Mutex.new
      @running = true

      @async_proxy = AsyncProxy.new(self)

      @thread = Thread.new do
        Thread.current[:actor] = self
        process_inbox
      end
    end

    def async
      @async_proxy
    end
      
    def send_later(meth, *args)
      @mailbox << [meth, args]
    end

    def terminate
      @running = false
    end

    def method_missing(meth, *args)
      process_message(meth, *args)
    end

    private

    def process_inbox
      while @running
        meth, args = @mailbox.pop
        process_message(meth, *args)
      end

    rescue Exception => ex
      puts "Error while running actor: #{ex}"
    end

    def process_message(meth, *args)
      @mutex.synchronize do
        @target.public_send(meth, *args)
      end
    end
  end

  class AsyncProxy
    def initialize(actor)
      @actor = actor
    end

    def method_missing(meth, *args)
      @actor.send_later(meth, *args)
    end
  end
end
```

This code mostly builds upon concepts that have already been covered in this 
article, so it shouldn't be too hard to follow with a bit of effort. That
said, combining meta-programming techniques and concurrency can
lead to code that makes your eyes glaze over, so we should also make
an attempt to discuss how this module works at the high level. Let's do that
now!

Any class that includes the `Actor` module will be converted into an actor and will be 
able to receive asynchronous calls. We accomplish this by overriding the constructor
of the target class so that we can return a proxy object every time an object of 
that class is instantiated. We also store the proxy object in a
thread level variable. This is necessary because when sending messages between actors, 
if we refer to self in method calls we will exposed the inner target object, 
instead of the proxy. This same [gotcha is also present in Celluloid](https://github.com/celluloid/celluloid/wiki/Gotchas).

Using this mixin, whenever we attempt to create an instance of a `Philosopher`
object, we will actually receive an instance of `Actor::Proxy`. The `Philosopher` 
class is left mostly untouched, and so the actor-like behavior is handled
entirely by the proxy object. Upon instantiation, that proxy creates
a mailbox to store the incoming asynchronous messages and a thread to process those 
messages. The inbox is a thread-safe queue that ensures that incoming message
are processed sequentially even if they arrive at the same time. Whenever the inbox
is empty, the actor's thread will be blocked until a new message needs to
be processed.

This is roughly how things work in Celluloid as well, although its
implementation is much more complex due to the many additional features it
offers. Still, if you understand this code, you're well on your way to having a
working knowledge of what the actor model is all about.

### Actors are helpful, but are not a golden hammer

Even this minimal implementation of the actor model gets the low-level
concurrency primitives out of our ordinary class definitions, and into a
centralized place where it can be handled in a consistent and reliable way.
Celluloid goes a lot farther than we did here by providing excellent fault
tolerance mechanisms, the ability to recover from failures, and lots of other
interesting stuff. However, these benefits do come with their own share of
costs and potential pitfalls.

So what can go wrong when using actors in Ruby? We've already hinted at the potential 
issues that can arise due to the issue of [self schizophrenia][self] in 
proxy objects. Perhaps more complicated is the issue of mutable state: while
using actors guarantees that the state *within* an object will be accessed
sequentially, it does not provide the same guarantee for the messages that are
being passed around between objects. In languages like Erlang, messages consist of immutable parameters, so consistency 
is enforced at the language level. In
Ruby, we don't have that constraint, so we either need to solve this problem by
convention, or by freezing the objects we pass around as arguments -- which is quite
restrictive!

Without attempting to enumerate all the other things that could
go wrong, the point here is simply that there is no such thing as a golden hammer 
when it comes to concurrent programming. Hopefully this article has
given you a basic sense of both the benefits and drawbacks of applying the
actor model in Ruby, along with enough background knowledge to apply some
of these ideas in your own projects. If it has done so, please do share your
story.

### Source code from this article

All of the code from this article is in 
Practicing Ruby's [example repository][examples],
but the links below highlight the main points of interest:

* [A solution that leads to deadlocks](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/mutex_uncoordinated/dining_philosophers.rb)
* [A coordinated mutex-based solution](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/mutex_coordinated/dining_philosophers.rb)
* [An actor-based solution using Celluloid](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/celluloid/dining_philosophers.rb)
* [An actor-based solution using a hand-rolled actor library](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/actors_from_scratch/dining_philosophers.rb)
* [Minimal implementation of the actor model](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/lib/actors.rb)
* [Chopsticks class definition](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/lib/chopstick.rb)
* [Table class definition](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/lib/table.rb)

If you see anything in the code that you have questions about, don't hesitate to
ask.

[examples]: https://github.com/elm-city-craftworks/practicing-ruby-examples/tree/master/v6/003
[actors]: http://en.wikipedia.org/wiki/Actor_model
[celluloid]: http://celluloid.io/
[philosophers]: http://en.wikipedia.org/wiki/Dining_philosophers
[self]: http://en.wikipedia.org/wiki/Schizophrenia_%28object-oriented_programming%29
[afcapel]: https://github.com/afcapel


================================================
FILE: articles/v6/004-code-reuse.md
================================================
% Code reuse in Ruby -- It's complicated 
% Gregory Brown (practicingruby.com)
% February 12, 2013

\begin{abstract} 
Ruby provides at least seven common ways of reusing code, all of them with their
own strengths and weaknesses. However, the main thing that separates these
various techniques is whether they are a form of \emph{implementation sharing} or a
form of \emph{behavior sharing}. This article explains what distinguishes those
two categories, the kinds of complexities that can arise from each of
them, and some practical recommendations that you can apply to reusing code
within your own projects.
\end{abstract}

# 1. Introduction

As a deeply object-oriented programming language, Ruby permits code reuse in
more ways than most languages. When put in the right hands, Ruby's flexibility
is extremely powerful, allowing us to model our systems any way we want.
But with great power comes great responsibility.

If not used carefully, Ruby's code reuse mechanisms can quickly erode most (if
not all) of the benefits that object-oriented design is meant to offer us. The
larger our systems get, the easier it is for things to spiral out of control.
But the truth is that most codebases don't start out as an unmaintainable mess, 
nor do they suddenly turn bad overnight.
Instead, they erode gradually -- one poor decision at a time.

In this article, we'll discuss the various pitfalls of Ruby's code reuse
methods. My hope is that by studying these issues, you'll be more aware of the 
risks involved in certain modeling decisions, and that will help you better 
understand the compromises you must make while designing your projects.

# 2. Common methods of code reuse

Ruby's code sharing methods can be divided into two groups: those that
provide direct access to the internals of the shared component (*implementation
sharing*), and those that do not (*behavior sharing*). While each approach has
its own set of costs and benefits, a lot can be said about the
complexity of a technique by knowing which reuse category it belongs to.

## 2.1 Implementation sharing techniques

The following techniques reuse code in ways that provide
direct access to internals:

- **inheriting** from a superclass
- **including** a module into a class
- **extending** an individual object using a module
- **patching** a class or individual object directly
- **evaluating** code in the context of a class or individual object

## 2.2 Behavior sharing techniques

The following techniques rely on message passing
between distinct objects for code sharing, limiting 
direct access to internals:

- **decorating** an object using a dynamic proxy
- **composing** objects using simple aggregation

## 2.3 Reference examples 

Our goal is only to discuss the complexities
of implementation sharing and behavior sharing in general,
so you don't need to be familiar with all seven methods
of code reuse listed above in order to understand
the rest of this article.

However, if you want some additional clarification about what
each of these terms mean, you can check out [this set of
code-reuse examples](https://gist.github.com/sandal/4755113).

# 3. Complexities of implementation sharing

An entire book can be written about the complexities involved in sharing
functionality without proper encapsulation between components. However,
since we don't have room for that level of detail in this article, I've
attempted to group the common issues together into three broad areas 
of concern to look out for: shared instance variables, shared method
definitions, and combinatorial effects.

## 3.1 Shared instance variables

Each object has a single set of instance variables, even if it
has a very complex ancestry chain. For example,
the following code references an instance variable that was
defined by its superclass:

```ruby
require "ostruct"

class PrettyStruct < OpenStruct
  def inspect
    @table.map { |k,v| "#{k} = #{v.inspect}" }.join("\n")
  end
end

struct = PrettyStruct.new(:a => 3, :b => 4, :c => 5)
p struct

# a = 3
# b = 4
# c = 5
```

When two or more shared components reference the same
instance variable, it may be intentional or unintentional.
It goes without saying that unintentional variable name
collisions can lead to defects that are hard to debug, but
intentional shared access (such as in the snippet above)
has more subtle issues to consider. 

Whenever we directly access a variable rather than using 
a public accessor, we may be skipping validations, 
transformations, caching features, or concurrency-related 
features that are meant to keep the underlying data
consistent and synchronized. Is a simple read-only reference
such as the one we've done here really that risky? The
truth is, there's no way to know without reading the `OpenStruct`
source code.

Unfortunately, the only way to know for sure what instance variables 
will be defined, accessed, and modified at runtime for *any*
Ruby object is to read the source of every single class and
module that is in its ancestry chain, both at the
individual object and class definition level. Because new variables 
can spring into existence any time a method is called, this kind 
of static analyis is not practical for most non-trivial programs. 

At the extreme end of the spectrum, you have objects that inherit
from `ActiveRecord::Base`; they exist at the tail end of
an ancestry chain that provides several instance variables and
hundreds of methods through dozens of modules, and that's assuming 
that you haven't installed any third-party plugins. If you aren't 
convinced by the trivial example I've shown in this article, spend 
some time with the Rails source code and you'll surely get the point.

## 3.2 Shared method definitions

Even when reusing an ancestor's public API, it can be challenging to 
avoid strange inconsistencies. Bob Martin provided a classic example
of this problem in an article on the [Liskov Substitution
Principle](http://www.objectmentor.com/resources/articles/lsp.pdf).
Consider a `Rectangle` class with a `Square` subclass, as shown
below:

```ruby
class Rectangle
  def initialize(width, height)
    self.width   = width
    self.height  = height
  end

  attr_accessor :width, :height

  def area
    width * height
  end
end

class Square < Rectangle
  def initialize(size)
    super(size, size)
  end
end
```

On the surface, this implementation looks simple, and seems to work 
as expected:

```ruby
square = Square.new(5)

p square.area                   #=> 25
p [square.width, square.height] #=> [5, 5]
```

But there is also the potential for bad behavior here, because the `Square` class
also inherits `Rectangle#width=` and `Rectangle#height=`, which can lead
to inconsistent data in the `Square` object:

```ruby
square.width = 10

p [square.width, square.height] #=> [10, 5] -- not a square!
```

One way to resolve this issue would be to override `Rectangle#width=` 
and `Rectangle#height=` so that the two values are synchronized:

```ruby
class Square < Rectangle
  def initialize(size)
    super(size, size)
  end

  def width=(size)
    @width   = size
    @height  = size
  end

  def height=(size)
    @width   = size
    @height  = size
  end
end

square = Square.new(5)

square.width = 10
p [square.width, square.height] #=> [10, 10]
p square.area                   #=> 100
```

This change enables the kind of behavior you might expect from a `Square`,
and if you are simply reusing code to keep things DRY, that might be good
enough. However, there may still be some subtle issues in code which assumes
that a rectangle's height can vary independently of its width, such as in
this test code:

```ruby
def test_area
  rect.width  = 5
  rect.height = 10

  assert 50, rect.area
end
```

Arguably, this test is written poorly if it is meant to be used as a shared
example for all descendents of the `Rectangle` object. The problem is that at a
first glance, the flaw is not at all obvious. And that essentially is the
core challenge in inheritance-based modeling: ancestors must
guess about the kinds of ways that they will be extended, and descendents need
to guess about whether their extensions will break upstream features. With
some practice and careful design thought this is possible, but it certainly
is not *easy* to reason about.

## 3.3 Combinatorial effects 

Shared method definitions and shared instance variables are at the root of what
makes implementation sharing complex, but that complexity is compounded by the 
fact that ancestry chains can grow arbitrarily long. Module mixins in particular
tend to cause this problem, because they are typically viewed by Ruby programmers
as a tool for implementing orthogonal *plugins*, but are functionally more similar 
to *multiple inheritance*.

Consider an arbitrary class C, with four modules mixed into it: M~1~,
M~2~, M~3~, and M~4~. Typically, each of these modules will provide some
features to C and perhaps require that C implement a few methods to enable those
features. Since each of these modules is meant to be used standalone, they
aren't directly aware of one another, nor do they depend on each other's
features. 

In this scenario, each module might need to make
calls to C's public API and vice-versa, but there would be no need for the
modules to be able to call each other's public methods directly. Furthermore,
in an ideal situation, C and its mixed-in modules would communicate entirely via
public method calls, allowing each to have their own private methods and
internal state. If these constraints were enforced at the language level, it'd be 
possible to model mixins as a simple, horizontal lookup path that would be 
trivial to reason about.

From our perspective as Ruby users, the scenario described above might cover 90%
of what we use modules for on a day to day basis. But because modules are
actually a much more powerful and generalized construct, we cannot expect that
simplistic mental model to be a good fit for how they actually work. In reality,
every module we mix into a class has direct access to the variables and methods
defined by every other mixed in module in that class, resulting in a
combinatorial explosion of possible interactions.

The following graph attempts to illustrate the difference between our typical
way of thinking about (and using) modules, and how they actually work:

**(see next page)**

\newpage

![](topology.png)

What you see above is just one small slice of the total method lookup path, but
it illustrates the general problem that repeats itself along the whole chain:
every ancestor can access the internals of every other, and the number of 
possibilities expands greatly with each new component added to the chain.

In practice, when concerns really are orthogonal, most of the combinatorial
effects between components can safely be ignored as long as you apply some
informal reasoning. But as an object gets larger, it becomes more likely
that some pair of ancestors will accidentally develop conflicting
definitions of what a method or variable is meant to be used for, and those
issues can be very difficult to debug. Furthermore, each new ancestor
also makes it harder to add new functionality to an object without 
accidentally breaking existing features.

This issue can be mitigated by the use of mixins at the individual object level,
which can allow different bits of reusable functionality to be used in isolation
of one another by only mixing in one module at at a time. However, this technique
only works around the issue -- it does not eliminate it entirely.

# 4. Complexities of behavior sharing

Behavior sharing techniques do not suffer from any of the issues we've
discussed so far, and that alone makes them worth considering as a better
default approach. However, they do have their own share of problems, 
so you need to be aware of what the tradeoffs are when deciding how to 
model your systems.

## 4.1 Indirect access

When access to an object's internals is truly necessary, it isn't practical
to use composition based techniques. For example, consider the following
mixin-based code which implements a memoization routine for caching method
return values:

```ruby
module Cached
  def cache(*method_names)
    method_names.each do |m|
      original = instance_method(m)
      results  = {}

      define_method(m) do |*a|
        results[a] ||= original.bind(self).call(*a) 
      end
    end
  end
end

## EXAMPLE USAGE:

class Numbers
  extend Cached

  def fib(n)
    raise ArgumentError if n < 0
    return n if n < 2

    fib(n - 1) + fib(n - 2)
  end

  cache :fib
end

n = Numbers.new
  
(0..100).each { |e| p [e, n.fib(e)] }
```

A naive attempt to refactor the `Cached` module into a `ComposedCache` class
might end up looking something like this:

```ruby
class ComposedCache
  def initialize(target)
    @target = target
  end

  def cache(*method_names)
    method_names.each do |m|
      results = {}

      define_singleton_method(m) do |*a|
        results[a] ||= @target.send(m, *a)
      end
    end
  end
end

n = ComposedCache.new(Numbers.new)
n.cache(:fib)
  
(0..100).each { |e| p [e, n.fib(e)] }
```

Unfortunately, this code has a critical flaw in it that makes it unsuitable 
for general use: It caches calls made through the `ComposedCache` proxy, but
it does not cache internal calls made within the objects it wraps. In 
practice, this makes it absolutely useless for optimizing the performance of
recursive functions such as the `fib()` method we're working with here.

There is no way around this problem without modifying the wrapped object.
In order to stick with composition-based modeling and still get proper
caching behavior, here's what we'd need to do: 

```ruby
class ComposedCache
  def initialize(target)
    @target  = target
  end

  def cache(*method_names)
    method_names.each do |m|
      original = @target.method(m)
      results  = {}

      @target.define_singleton_method(m) do |*a|
        results[a] ||= original.call(*a)
      end

      define_singleton_method(m) { |*a| @target.send(m, *a) }
    end
  end
end

n = ComposedCache.new(Numbers.new)
n.cache(:fib)
  
(0..100).each { |e| p [e, n.fib(e)] }
```

Such a design *would* prevent a new ancestor from being introduced
into the `Numbers` object's lookup path, and it would externalize
the code that actually understands how to handle the caching. However,
because `ComposedCache` still directly modifies the behavior of
the `Numbers` objects it wraps, it loses the benefit of encapsulation
that typically comes along with composition based modeling.

We also end up with an interface that feels awkward: defining what
methods ought to be cached via an instance method call does not
feel nearly as natural as using a class-level macro, and might
be cumbersome to integrate within a real project. There are ways
to improve this interface, but that would require us to jump through
a few more hoops, increasing the complexity of the implementation.

Because the `ComposedCache` expects all cached methods to be explicitly
declared and it does not support automatic delegation to the underlying
object, it might be cumbersome to work with -- it would either need
to be modified to forward all uncached method calls to the object it
wraps (losing the benefits of a narrow surface), or the caller would
need to keep both a reference to the original object and the composed 
cache object around (which is very awkward and confusing!).

Good composition-based modeling produces code that is simpler than
the sum of its parts, as a direct result of strong encapsulation
and well-defined interactions between collaborators. Unfortunately,
our implementation of the `ComposedCache` class has none of those 
benefits, and so it serves as a useful (if pathological) example 
of the downsides of composition-based modeling.

## 4.2 Self-schizophrenia

When sharing behavior via decorators, it can sometimes be tricky to remember
what `self` refers to. This can happen both on the proxy side (a reference to
`self` accidentally refers to the proxy rather than the target), and
within the target object (a reference to `self` accidentally exposes the
target rather than the proxy). This common mistake can lead to subtle bugs 
that are tricky to detect.

A clear example of this problem can be found in the Celluloid concurrency
framework. Pay attention to the lines marked #1 and #2 in the following
code:

**(see next page)**
\newpage

```ruby
require "celluloid"

class Alert
  include Celluloid

  def initialize(message, delay)
    @message = message
    @delay   = delay
    @display = Display.new
  end

  attr_reader :message

  def run
    loop do
      sleep @delay

      @display.async.render(Actor.current)  # 1
    end
  end
end

class Display
  include Celluloid

  def render(actor)
    puts actor.message
  end
end

alert = Alert.new("Foo", 5)
alert.async.run # 2

sleep
```

In the line marked #1, the `Actor.current` method is called, rather than
referring to `self`. This is a direct effect of Celluloid relying on a proxy
mechanism for handling its fault tolerance and concurrency functionality.

When `alert.async.run` is called on the line marked #2, `Alert#run` is not
executed directly, but instead gets scheduled to be run indirectly by a proxy object.
However, once the method is actually executed, `self` refers to the `Alert`
object, not the proxy object that enables it to be used in a
concurrent, thread-safe way. Celluloid ensures that the `Actor.current` method
will return a reference to that proxy object, and this is how you can safely pass 
a reference to an object that you're using Celluloid with.

If this design technique sounds awkward, it's because it is. However, there
isn't really a better composition-based workaround: this kind of complexity
arises from the indirect access problem that we discussed in the previous section, and
is worsened by the automatic delegation that is meant to make two distinct
objects appear as if they were one single coherent entity.

When faced with the self-schizophrenia issue, it's important to consider how
much benefit is gained by encapsulating implementation details. In the case of
Celluloid, the benefit of not mixing complicated concurrency mechanics into
ordinary objects is probably well worth it, but in other cases it may make
sense to use an implementation sharing approach instead.

*NOTE: The self-schizophrenia problem also can occur when using the **eval**
implementation sharing approach. However, since it not a general problem for
that category, I've categorized it as more of a behavior sharing problem.*

## 4.3 Lack of established design practices 

Although it is not a technical issue, one of the main barriers to making
effective use of behavior sharing in Ruby is that most
developers are simply not comfortable with using aggregation as a primary
modeling technique. Ruby has lots of tools that make this style of programming
easier, but they tend to take a back seat to module mixins and eval-based 
domain-specific interfaces.

Decorators and simple composition are definitely gaining in popularity due to
the encapsulation and understandability benefits that they offer, but in many
cases they are used as direct replacements for inheritance-based modeling. This
leads to somewhat high-ceremony and awkward interfaces that aren't necessarily
convenient or comfortable to use.

In other words, we haven't yet established idioms or practices that truly allow
composition based modeling to shine: most of our libraries and frameworks still
heavily rely on implementation sharing techniques, and until that changes, our
applications will tend to follow in their footsteps.

This is an issue that will hopefully be resolved in time, but for now I think
it's only fair to include a lack of familiarity with behavior sharing methods as
something that makes code that uses them more complicated to reason about.

# 5. Notes and recommendations

Implementation sharing is very powerful, and that makes it a good deal more
complex than behavior sharing by default. To decide which style of code reuse is
better to use in a given situation, it makes sense to ask yourself whether your
code actually needs direct access to the internals of its ancestors.

In the rare cases where direct access is needed, it makes sense to use as weak
of a form of implementation sharing as possible. Techniques which limit global
effects are most desireable, e.g. individual object mixins, eval-based domain
specific interfaces, and adding methods directly to individual objects. But
if you find that the setup for these techniques ends up introducing needless
complexity, including a module into a class or
inheriting from a base class is still an option. No matter what
technique you choose, it's best to not directly rely on instance variables or
private methods from ancestors, just to play it safe.

However, you might find that most of the problems you currently solve with
implementation sharing methods could fairly easily be solved with a
behavior sharing approach. If a little extra work is
likely to save you maintenance effort in the future, and it makes the code
easier to reason about, it makes sense to reach for simple composition based
modeling by default. Using a dynamic decorator can also offer a reasonable 
middle ground when you are trying to build an object that can serve as a
drop-in replacement for some other component.

If you try to go the behavior sharing route and find it's too complicated or
that it has obvious drawbacks (such as in the caching example we discussed in
this article), you can always go back to implementation sharing techniques.
However, since most of the issues with behavior sharing tend to happen along the
edge cases, and the issues with implementation sharing are baked into its core,
it does make sense to try to avoid the latter where possible.

Much more research into this problem is needed. If you'd like to discuss it with
me, don't hesitate to drop a message on the conversation thread over at
practicingruby.com, or email me at **gregory@practicingruby.com**.

# 6. Further reading

There are three papers I'd recommend if you want to study these issues further:

* *Disciplined inheritance, M. Sakkinen 1989*
* *A behavioral notion of subtyping, Liskov / Wing 1994*
* *Out of the tar pit, Moseley 2006*

The first two papers deal squarely with the issues of implementation sharing vs.
behavior sharing in code reuse, and the third provides a more general inquiry
into what makes our programs difficult to reason about. All three are more
formal than this article, but also much more in-depth.

For a Ruby-centric summary of the first two papers, see [Issue
3.7](https://practicingruby.com/articles/shared/uvgdkprzmoqf) and [Issue
3.8](https://practicingruby.com/articles/shared/lxgettcjiggh) of Practicing
Ruby. However, please note that these articles only reveal a small portion of
the insight to be gained from the papers listed above.


================================================
FILE: articles/v6/005-debugging.md
================================================
*This article was contributed by Carol Nichols
([@carols10cents](http://twitter.com/carols10cents),
[carols10cents@rstat.us](https://rstat.us/users/Carols10cents)), one of the
active maintainers of [rstat.us](https://rstat.us). Carol is also involved in
the Pittsburgh Ruby community, and is a co-organizer of the [Steel City Ruby
Conf](http://steelcityrubyconf.org/). At the time of writing this article,
she was busy doing Rails development for [Think
Through Math](http://www.thinkthroughmath.com/).*

Whenever our code breaks, it can be hard to remain calm. Debugging
often occurs when production is down, customers are experiencing a problem, and
managers are asking for status updates every five minutes. In this situation, panicking
is a natural response, but it can easily disrupt your troubleshooting process. It may
lead to changing code on hunches rather than on evidence or writing
untested code. By rushing to fix things immediately, you may make
things worse or not know which of your changes actually
fixed the problem. As many of us have learned the hard way: a
chaotic debugging process is no fun for you, and no fun for the people
who rely on your software.

Having a calm and collected way of approaching software defects goes a long way,
especially if you also have a firm grasp of the tools and techniques that can
help make the debugging process easier. This article is meant to help you with
both of those things, so let's get started.

## Don't Panic!

If external pressures are stressing you out, either disable
the feature that is causing the problem, or roll the production code back to a known
stable state before attempting to investigate further. Even
if it isn't ideal from a usability standpoint, functioning software with a
few missing features is still more useful than unstable and potentially
dangerous software.

Once you've reproduced the issue in a development environment, you can start to narrow down the problem using
a divide-and-conquer strategy. If you suspect the problem is coming from your
application code, you might try starting with an end-to-end reproduction and
then gradually eliminate components until you find the source of the issue.
However, if you think the issues are coming from the infrastructure your
project is built on top of (libraries, services, frameworks, and even Ruby
itself!), it might be better to start with minimal stand-alone examples to test your 
assumptions about your application's integration points. Picking a good 
angle of attack depends on the nature of the problem, so don't be afraid to spend some time
figuring out the right questions to ask -- even if you're under pressure.

Regardless of your starting point, you won't be guaranteed to find the source
of your problem right away. However, each new experiment you try out will add yet another constraint to your 
problem space, making it easier to reason about what is going
wrong. The fear of the unknown is a big part of what causes us to panic in the
first place, and so this approach will help you build up the confidence you
need in order to maintain a calm mindset as you work.

Throughout the rest of this article, we'll discuss some things that will 
help you find bugs, fix them, and make sure they stay fixed. But keep 
this in mind: they work best if you don't panic!

> Editor's Note: *Don't Panic* is the motivating
> force behind several of the maintenance policies for practicingruby.com. For
> more on this topic, see Lessons 4 and 5 from [Issue
> 5.6](https://practicingruby.com/articles/91)

## Read stack traces

Stack traces are ugly. They typically present themselves as a wall of text 
in your terminal when you aren't expecting them. When pairing, I've often seen people
ignore stack traces entirely and just start changing the code. But stack
traces do have valuable information in them, and learning to pick out the
useful parts of the stack trace can save you a lot of time in trying to narrow
down the problem.

The two most valuable pieces of information are the resulting error message
(which is usually shown at the beginning of the stack trace in Ruby) and the
last line of your code that was involved (which is often in middle). The
error message will tell you *what* went wrong, and the last line of your
code will tell you *where* the problem is coming from.

A particularly horrible stack trace is [this 1400 line trace](https://gist.github.com/carols10cents/4751381/raw/b75bdb41e7fa8ded54d13dc786808b464357effe/gistfile1.txt)
from a Rails app using JRuby running on websphere. In this case, the error message
*"ERROR [Default Executor-thread-15]"* is not very helpful. The vast majority of the lines are
coming from JRuby's Java code and are also uninformative. However, skimming
through and looking for lines that don't fit in, there are some lines that are
longer than the others (shown wrapped and trimmed below for clarity):

```
rubyjit.ApplicationHelper
  $$entity_label_5C9C81BAF0BBC4018616956A9F87C663730CB52E.
  __file__(/..LONGPREFIX../app/helpers/application_helper.rb:232)
  
rubyjit.ApplicationHelper
  $$entity_label_5C9C81BAF0BBC4018616956A9F87C663730CB52E
  .__file__(/..LONGPREFIX../app/helpers/application_helper.rb)
```

These lines of the stack trace point to the last line of the Rails code that
was involved, line 232 of *application_helper.rb*. But this particular line
of code was simply concatenating two strings together -- making it pretty
clear that the problem was not caused by our application code! By trying
various  values for those strings, we eventually found the cause of the
problem: an [encoding-related bug](https://github.com/jruby/jruby/issues/366) in
JRuby was causing a Ruby 1.9 specific feature to be called from within Ruby 1.8
mode. Even though our stack trace was very unpleasant to read and did not
provide us with a useful error message, tracing the exception down to a
particular line number was essential for identifying what would have otherwise
been a needle in a haystack.

Of course, there are some edge cases where line numbers are not very helpful. One is
the dreaded *"syntax error, unexpected $end, expecting keyword_end"* error, which
will usually point to the end of one of your files. It actually means you're
missing an `end` somewhere in that file. However, these situations are rare, and
so it makes sense to skim stack traces for relevant line numbers
that might give you a clue about where your bug is coming from.

If all else fails, you can always try doing a web search for the name of the
exception and its message -- even if the results aren't directly related to your
issue, they may give you useful hints that can help you discover the right
questions to ask about your problem.

## Use debugging tools

Debugging tools (such as ruby-debug) are useful because they allow you to inspect your code and its 
environment while it's actually running. However, this is also true about using
a REPL (such as irb), and many Rubyists tend to strongly prefer the latter
because it is a comfortable workflow for more than just troubleshooting.

The [Pry](http://pryrepl.org/) REPL is becoming increasingly popular, because it
attempts to serve as both a debugger and an interactive console simultaneously.
Placing the `binding.pry` command anywhere in your codebase will launch you into
a Pry session whenever that line of code is executed. From there, you can do
things like inspect the values in variables or run some arbitrary code. Much like 
irb, this lets you try out ideas and hypotheses quickly. If you can't easily
think of a way to capture the debugging information you need with some simple
print statements or a logger, it's a sign that using Pry might get you 
somewhere.

This kind of workflow is especially useful when control flow gets complicated,
such as when working with events or threads. For example, suppose we wanted to
get a closer look at the behavior of [the actor model](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/master/v6/003/lib/actors.rb)
from the Dining Philosopher's problem from [Issue 6.3](https://practicingruby.com/articles/100). 
Here's how we would inspect what's happening in the `Waiter#request_to_eat`
method:

```ruby
require "pry"

class Waiter
  # ...

  def request_to_eat(philosopher)
    binding.pry
    return if @eating.include?(philosopher)

    @eating << philosopher
    philosopher.async.eat
  end
end
```

Because the `Waiter` class is an actor, it will execute the requests to eat in
sequence, but they will be queued up asynchronously. As soon as one is
actually executed, we will be dropped into a Pry session:

```console
From: (..)/dining_philosophers.rb @ line 61 Waiter#request_to_eat:

    60: def request_to_eat(philosopher)
 => 61:   binding.pry
    62:   return if @eating.include?(philosopher)
    63:
    64:   @eating << philosopher
    65:   philosopher.async.eat
    66: end

[1] pry(#<Waiter>)>
```

From here, we can deeply interrogate the current state of our program,
revealing that the `philosopher` references an `Actor::Proxy` 
object, which in turn wraps a `Philosopher` object 
(Schopenhauer in this case):

```
# NOTE: this output cleaned up somewhat for clarity

[1] pry(#<Waiter>)> ls philosopher
Actor::Proxy#methods: async  method_missing  send_later  terminate
instance variables: @async_proxy  @mailbox  @mutex  @running  @target  @thread
[2] pry(#<Waiter>)> cd philosopher/@target
[3] pry(#<Philosopher>):2> ls
Philosopher#methods: dine  drop_chopsticks  eat  take_chopsticks  think
self.methods: __pry__
instance variables: @left_chopstick  @name  @right_chopstick  @waiter
locals: _  __  _dir_  _ex_  _file_  _in_  _out_  _pry_
[4] pry(#<Philosopher>):2> @name
=> "Schopenhauer"
[5] pry(#<Philosopher>):2> cd
[6] pry(#<Waiter>)> @eating.count
=> 0
```

Once we're ready to move on to the next call to `request_to_eat`, we simply call
`exit`. That immediately launches a new console that allows us to determine
that Schopenhauer's is already in the `@eating` queue by the time Aristotle's request
is starting to be processed:

```
[1] pry(#<Waiter>)> cd philosopher/@target
[2] pry(#<Philosopher>):2> @name
=> "Aristotle"
[3] pry(#<Philosopher>):2> cd
[4] pry(#<Waiter>)> @eating.count
=> 1
[5] pry(#<Waiter>)> cd @eating.first/@target
[6] pry(#<Philosopher>):2> @name
=> "Schopenhauer"
```

Imagine for a moment that there was a defect in this code. Replicating this 
exact situation in a test where we can access the values of
`@eating` and the internals of the `philosopher` argument at these 
particular points in the execution would
not be straightforward, but Pry makes it easier to casually poke at these
values as part of an ad-hoc exploration. If there was a bug to be found here,
Pry could help you identify the conditions that trigger it, and then other
techniques could be used to reproduce the issue once its root cause
was discovered.

This particular use case merely scratches the surface of Pry's capabilities -- 
there are many commands that Pry provides that are powerful tools for inspecting your
code while it's running. That said, it is not a complete substitute for
a traditional debugger. For example, gdb can be useful for hunting down
hard-to-investigate issues such as segfaults in MRI's C code. If you're interested in that kind
of thing, you may want to check out [this talk from Heath Lilley](http://vimeo.com/54736113)
about using gdb to determine why a Ruby program was crashing.

You don't always need to use a heavyweight debugging utility to solve
your problems. Whenever you can get away with it, it's reasonable to use the
most simple thing that could possibly work; it's amazing how much a decent log
file or some well placed print statements can tell you about what's going on in
your code. But whenever you feel like those approaches are starting to get
cumbersome, don't be afraid to break out the power tools.

## Lean on tests, but don't overspecify

Whenever you need to fix a bug, you're writing a test first, right? This
serves multiple purposes: it gives you a convenient way to reproduce the issue
while you're experimenting, and if added to your test suite, it will help
you detect regressions in future changes.

Regardless of whether the tests you write end up becoming a permanent part of
your test suite or not, they still provide a useful way to record your 
discoveries and experiments during a debugging session. For example, you might 
start with an end-to-end acceptance test that is able to reproduce the problem 
and then write smaller and smaller tests as you are narrowing down where the 
issue is occurring until you get to a unit test that covers the root cause.
From there, you can fix the issue, run all the tests to confirm the fix, 
and then finally remove the redundant tests that only reproduce the problem 
indirectly. 

But why bother removing intermediate tests? It's mostly just a matter of
good housekeeping that helps maintain the clarity of your test suite. For
example, negative tests like "it should not crash when given special characters" 
are mostly just noise at the acceptance test level, unless someone could
reasonably assume that the tested feature wouldn't support that use case.
For a real example of this kind of over-specification, here is a test that I 
added to [rstat.us' codebase](https://github.com/hotsh/rstat.us/commit/26444ea95ec8da12d4e74764bf52bdaad18e7776)
a while ago:

```ruby
it "lets you update your profile even if you use a different case in the url" do
  u = Factory(:user, :username => "LADY_GAGA")
  a = Factory(:authorization, :user => u)
  log_in(u, a.uid)
  visit "/users/lady_gaga/edit"
  bio_text = "To be or not to be"
  fill_in "bio", :with => bio_text
  click_button "Save"

  assert_match page.body, /#{bio_text}/
end
```

Rather than adding another test for the case of going to the url for username
"lady_gaga" when the username is "LADY_GAGA" (don't ask why I chose Lady Gaga,
I don't remember), I could have instead updated
[the existing happy path test](https://github.com/hotsh/rstat.us/blob/26444ea95ec8da12d4e74764bf52bdaad18e7776/test/acceptance/profile_test.rb#L45)
to encompass this situation (effectively replacing the existing happy path test
with this special case test). In this way, both the special case and the happy path
would be tested, but with less duplication.

If in doubt, it's probably better to have a few extra tests cornering a defect
than too few, but just keep in mind that like all other code, your tests have a
maintenance overhead along with a potential to become brittle. As with
all things, having a sense of balance pays off here.

## Reflections

Even though sometimes it seems like software has a mind of its own, computers
only do what a human has told them to do at some point. So next time you end up
facing a painful defect, remember the following things:

* You can figure out why a bug is happening by using deterministic processes to narrow down where the
problem is happening. 

* You can learn to pick out the useful parts of stack traces, even if they
seem heavily obfuscated at first.

* You can use debugging tools to experiment with what your code is
actually doing as it runs, especially in cases where it wouldn't
be easy to work out what was going on straight from your log files.

* You can write tests that help you while debugging and then turn some
or all of them into useful regression tests that will help make your
codebase more stable as it grows. 

Thanks for reading. Now go figure out some bugs! <3


================================================
FILE: articles/v6/006-recipes-method-bloat.md
================================================
> **NOTE:** This issue of Practicing Ruby was one of several content experiments 
that was run in Volume 6. It uses a cookbook format (e.g. problem -> solution -> discussion)
instead of the traditional long-form article format we use in most Practicing Ruby articles.

**Problem: A method has many parameters, making it hard to remember its
interface.**

Suppose we were building a HTTP client library called `HyperClient`. A trivial
request might look like this:

```ruby
http = HyperClient.new("example.com")
http.get("/")
```

But we would probably need to support some other features as well, such as 
accessing HTTP services running on non-standard ports, and routing 
requests through a proxy. If we simply add these features 
without careful design consideration, we may end up
with the following bloated interface for `HyperClient.new`: 

```ruby
http = HyperClient.new("example.com", 1337, 
                       "internal.proxy.example.com", 8080, 
                       "myuser", "mypassword")
```

If the above code looks familiar to you, it's because it is modeled directly
after Ruby's `Net::HTTP` standard library; a codebase which
is often critized for it's poor API design! There are many reasons 
why this style of interface is bad, but three obvious issues stand out:

* Without a single unambiguous way of sorting the parameters, it is very
difficult to remember their order.

* This style of interface makes it hard to set defaults for parameters in a
flexible way. For example, consider the difficulty of setting default values for
the `service_port` and `proxy_port` in the code above.

* If the `HyperClient` API changes and a new optional parameter is introduced, 
it must either be added to the end of the arguments list or risk breaking 
all calls that relied on the previous order of the parameters.

Fortunately, all of the above points can be addressed by designing a better
method interface.

---

**Solution: Use a combination of keyword arguments and parameter objects to
create interfaces that are both memorable and maintainable.**

Whenever a method's interface accumulates several related arguments, it is a
sign that introducing a parameter object might be helpful. In this 
particular example, we can easily group together the proxy-related arguments 
as shown below:

```ruby
proxy = HyperClient::Proxy.new("internal.proxy.example.com",
                               :port     =>  8080,
                               :username => "myuser",
                               :password => "mypass")
```

By switching to keyword arguments, it becomes obvious what
each of these parameters represent, and there is no need to list them
in a particular order. This basic idea can also be extended to simplify 
the interface of the original `HyperClient` object:

```ruby
http = HyperClient.new("example.com", :port  => 1337, :proxy => proxy) 
```

This new constructor looks and feels more comfortable to use, because it
introduces some structure to separate essential parameters from
optional ones while grouping related concepts together. This
makes it easier to recall the right bits of knowledge at the right time.

---

**Discussion**

Both interfaces for `HyperClient.new` handle the most common use case 
in the same way:

```ruby
http = HyperClient.new("example.com")
```

Where they differ is when you have extra parameters. Dealing with
default values in the former is *much* uglier. For example, if
`HyperClient` provided default ports for both the service and the
proxy, you'd need to do something like this when using a username
and password:

```ruby
http = HyperClient.new("example.com", nil, 
                       "internal.proxy.example.com", nil,
                       "myuser", "mypassword")
```                       

In the improved code, those parameters could simply be omitted:

```ruby
proxy = HyperClient::Proxy.new("internal.proxy.example.com",
                               :username => "myuser",
                               :password => "mypass")

http = HyperClient.new("example.com", :proxy => proxy)
```

But this is a consequence of using keyword arguments -- it has 
little to do with the fact that we've introduced the `HyperClient::Proxy` 
parameter object. For example, if the following API were used instead,
it would be trivial to fall back to default values for `:service_port` and
`:proxy_port` if they were not explicitly provided:

```ruby
http = HyperClient.new("google.com",
                       :proxy_address   => "internal.proxy.example.com",
                       :proxy_username  => "myuser",
                       :proxy_password  => "mypass")
```

The following signature supports this kind of behavior, using Ruby 2.0's 
keyword arguments:

```ruby
class HyperClient
  def initialize(service_address, service_port: 80, 
                 proxy_address:  nil, proxy_port: 8080, 
                 proxy_username: nil, proxy_password: nil)

    # ...        
  end
end
``` 

This style of design isn't especially painful to work with for the end-user, 
and it has a fairly wide precedent in Ruby library design. However, taking this
approach comes with three significant drawbacks:

* An interface with many similarly named parameters that are 
differentiated only by a prefix (e.g. `service_port` vs. `proxy_port`)
is still intention-revealing and memorable, but the repetition 
introduces line noise that hurts readability.

* Validating and transforming inputs becomes increasingly complex 
as method interfaces become bloated. Think about the various
checks that would need to be done in the previous example to
verify what proxy settings should be used, if any.

* Each and every new parameter introduced into a method's interface 
creates a new set of branches that need to be covered by tests,
and considered during debugging.

To see how these issues are mitigated by the introduction of the
`HyperClient::Proxy` object, think through what the validation
and transformation work might look like in both the example shown
above, and in the code shown below:

```ruby
class HyperClient
  def initialize(service_address, port: 80, proxy: nil)
    # ...
  end

  class Proxy
    def initialize(address, port: 8080, username: nil, password: nil)
      # ...
    end
  end
end
```

Although the two implementations will end up sharing a lot of code in 
common, introducing a formal parameters object allows you to hide
some of the ugly details from the `HyperClient` class that would
otherwise end up in its constructor. This is good for both testability
and maintainability.

Despite its utility, it is possible to take this technique too far. 
For example, introducing a `HyperClient::Service` object to wrap the service 
address and port is probably more trouble than its worth, because it does not
hide enough complexity to have a net positive impact on maintainability.That said,
design decisions are highly context dependent and need to 
be revisited as requirements grow and change. Suppose that wanted to support
both SSL and HTTP basic authentication were in this library; 
then adding a `HyperClient::Service` object might start to make sense!
This rise in necessary complexity shifts the balance of things to make
an extra layer of indirection seem worthwhile, where it may not have before.

The thing to remember is that being influenced by features that will soon be 
implemented is part of the design process, but considering vague scenarios 
that may or may not happen in the far future is more akin to gazing into a 
crystal ball. The former is productive; the latter is potentially harmful.

---

**Conclusions**

When designing method interfaces, don't bother trying to get them perfect,
because they will eventually end up changing anyway. However, don't just ignore
their design either -- keep in mind that good APIs makes easy things easy and hard 
things possible. The techniques we've discussed in this recipe should help you
avoid some of the most common mistakes people make, but the rest is up to you!

If you want to learn more about method-level interface design, James Noble wrote
a great paper on the topic called [Arguments and
Results](http://www.laputan.org/pub/patterns/noble/noble.pdf). I strongly
recommend reading his work, as well as [Issue 2.14](https://practicingruby.com/articles/shared/vpxpovppchww) 
and [Issue 2.15](https://practicingruby.com/articles/shared/mupuergickjz) of
Practicing Ruby, which cover the same topic with some Ruby-specific examples.


================================================
FILE: articles/v6/007-demeter.md
================================================
*This article was contributed by [David A. Black](https://twitter.com/david_a_black), Lead Developer at Cyrus Innovation. David is a long-time Ruby developer, author, trainer, 
speaker, and community event organizer. He is the author of The Well-Grounded Rubyist (Manning Publications, 2009).* 

A lot has been written and said about the Law of Demeter. I'd read and heard a
lot about the law before I ever went back and looked at the seminal, original
papers that described it. In spite of how much I thought I knew about the law, I
found those original papers quite enlightening and absorbing. 

I've been particularly absorbed in two articles: the 1988 OOPSLA paper
"Object-Oriented Programming: An Objective Sense of Style" by Karl J.
Lieberherr, Ian M. Holland, and Arthur J.Riel, and the 1989 article "Assuring
Good Style for Object-Oriented Programs" by Lieberherr and Holland. 
The two papers are of course closely related. But what I've found interesting,
aside from just the process of studying and absorbing information about the Law
of Demeter at its source, is considering the ways in which they differ. 

Both papers posit that there are different versions of the Law of Demeter. But
the taxonomies they construct for the law differ considerably from each other.
A lot of further thought and work, evidently, went into the law between 1988 and
1989. 

I'm going to put the two taxonomies, and the differences between them, under a
microscope -- at least, a medium-powered microscope. I won't recapitulate
everything in the two articles, but I'll go into enough detail to set the
stage for some reflective and interpretive observations about why the law might
have evolved in the ways it did in a relatively short time. 

I'll then conclude with a couple of speculative, open-ended thoughts about the
Law of Demeter as it relates to general problems of code organization and best
practices in programming -- a probably small-scale but hopefully interesting
perspective that I've dubbed "Metademeter". 

## THE 1988 TAXONOMY

> **Note:** In addition to the type and object versions of the law described here,
the 1988 article talks about the *strong* and *weak* versions of the law. That
distinction has to do with whether or not it's considered permissible to send
messages to inherited instance variables. The strong version says no; the weak
version says yes. I'm not going to go into detail about that aspect of the 1988
taxonomy, but it's certainly worth a look at the original article.

In 1988, the three authors state the Law of Demeter, initially, in the following
terms: 

For all classes C, and for all methods M attached to C, all objects to which M
sends a message must be instances of classes associated with the following
classes: 

```
  1. The argument classes of M (including C).
  2. The instance variable classes of C.

  (Objects created by M, or by functions or methods which M calls, and objects
  in global variables are considered as arguments of M.) 

  (pg. 325)
```

There follows an extensive treatment of the motivation for and implications of
the law. Included in this treatment is consideration of a case where strict
adherence to the law nonetheless runs contrary to its intended effect. Consider
a case where there's a kind of circular structure to the instance variable types
of a set of classes. The following example is adapted from the article, and while Ruby doesn't enforce instance variable classes, the code illustrates the basic difficulty the authors identify: 

```ruby
  class A
    def initialize
      @b, @c, @d, @e = B.new, C.new, D.new, E.new
    end

    def bad_style
      b.d.e
    end

    attr_reader :b, :c, :d, :e
  end

  class B
    def initialize
      @c, @d = C.new, D.new
    end

    attr_reader :c, :d
  end

  class C; end

  class D
    def initialize
      @e = E.new
    end

    attr_reader :e
  end

  class E; end

  a = A.new
  a.bad_style
```

The `bad_style` instance method in class `A`, called at the end of the example, triggers a series of calls. The first, a call to the reader method `b`, returns `a`'s instance variable `@b`, which is an instance of class `B`. Then the message `d` is sent to that `B` instance; the result is an instance of `D`, namely the instance held in the instance variable `@d` of the `B` instance. Sending `d` to a `B` instance is legal, Demeter-wise, because one of `a`'s instance variables is of class `B`. Then the `D` instance gets the message `e`; this is also OK for the same reason. 

So you've only "talked to" objects belonging to classes corresponding to
instance variables of your instance of `A`, but, as the article states, *"the
method looks two levels deep into the structure of instance variable first,
violating the ideals of information-hiding and maintainability."* 

The authors propose a second formulation of the law as a way around this
problem. Note that here the law is stated in terms of objects, not classes:

```
  For all classes C, and for all methods M attached to C, all objects to which M
  sends a message must be:

    * M's argument objects, including the self object or...
    * The instance variable objects of C. 

  (Objects created by M, or by functions or methods which M calls, and objects
  in global variables are considered as arguments of M.) 

  (327)
```

The downside to this object version of the Law of Demeter is that it makes it
hard to do compile-time checking. The conclusion of the authors is that *"to
retain easy compile-time checking we require the Law's formulation in terms of
types. We feel that such path[o]logical cases as the one above will not occur
often enough to cause problems" (327).*

Still, the object version of the law serves as an important guide for
programmers. Toward the end of the article, the authors provide formulations of
the law for several specific object-oriented languages, using the law's object
version. Of the languages for which they offer such formulations, the closest to
Ruby is Smalltalk-80. In that language, the authors state that message-sending
should be restricted to:

* an argument object of [the method] M including objects in pseudo variables
  "self" and "super" or
* an instance variable object of the class to which M is attached. 
  (332)

As before, newly-created objects and objects in global variables count as
argument objects.

The *object* version of the law casts a somewhat wider net, as far as languages
are concerned, than the first, *class* version. Certainly for a dynamic language
like Ruby, where static code analysis can do relatively little for you and
compile-time checking doesn't exist, the object version makes sense. It also
makes sense in languages where there's no such thing as the *type* of an
instance variable; Ruby instance variables, for example, can be assigned any
object and even different objects at different times. The object version of the
law of Demeter, as laid out in 1988, doesn't specifically address the matter of
reassigning to instance variables but might provide enough structure and
discipline to give you pause if you find yourself doing that. 

Let's move a year forward. 

## THE 1989 TAXONOMY

Like the 1988 article, the 1989 article presents the Law of Demeter in two major
versions: the class version and the object version. Here, though, the
definitions of the two versions have changed in interesting ways, and the class
version, in turn, is broken down into the minimization version and the strict
version. 

The 1989 taxonomy of the law rests on the notion of clients and suppliers.
Clients are methods; suppliers are classes. If method M calls method N on an
instance of class C (or on class C itself), then M is a client of both the
method N and the class C. In turn, C is a supplier to M. (There are some further
subtleties but this is the thrust of how clients and suppliers relate to each
other.) 

In the client/supplier relationship, the supplier class may be an *acquaintance*
class (what's often paraphrased as a "stranger"), or it may be a preferred
supplier (sometimes called a "friend"). Preferred suppliers, in brief, include:

* the subcategory *preferred acquaintance*, consisting of:
  * the class(es) of object(s) instantiated inside the client method
  * the class(es) of global object(s)
* the class of an instance variable (or a superclass)
* the class of an argument to the method (or a superclass)

The article summarizes the two sub-versions of the class version of the law as follows:

```
  Minimization version: Minimize the number of acquaintance classes of all
  methods.

  Strict version: All methods may have only preferred-supplier classes. 
  (40-41)
```

As you can see, the 1989 taxonomy involves more terms and definitions than the
1988 taxonomy. It's a denser account of the law. But there's something gained
for the added complexity. Everything is organized from the root of the structure
upward. The categories of newly created objects and global variables, both of
which were literally added via parenthetical addenda to the 1988 versions of the
law, are more smoothly integrated into the model in 1989. Every imaginable
object that might be sent a message falls somewhere on one consistent spectrum,
ranging from mere acquaintance (to be avoided) to preferred acquaintance
(acceptable but still flagged as not quite a full *friend*) to preferred
supplier (the real friends). I have found that the 1989 taxonomy requires
longer and deeper study than the 1988 taxonomy, but that it repays careful
reading.

And that's just the class version of the law. As before, there's also an object
version, summarized as follows:

```
  All methods may have only preferred-supplier objects. 
```

Note the shift, subtle but important, from *classes* to *objects*, as compared
with the strict version of the class version of the law. Focusing on objects
allows for inclusion of such constructs as self and super. Moreover, the authors
make the following interesting point about the object version of the law:

```
    While the object version of the law expresses what is really wanted, it cannot
    be enforced at compile time. The object version serves as an additional guide
    in addition to the class version of the law (42).
```  

There's a kind of "bend before you break" principle at work here. The Law of
Demeter is not all-or-nothing, as regards the ability to do compile-time
checking. It's also something that you can, and in some cases must, bake into
your programming habits as you go along. 

As in 1988, the 1989 authors present a kind of checklist of how to enforce the
law in the cases of several specific languages (C++, CLOS, Eiffel, Flavors, and
Smalltalk-80). Interestingly, the 1989 account of how to apply the language to
C++ recommends the strict version of the class form of the law -- whereas in
1988, the C++ guidelines suggested the object version. For the other languages,
the 1989 guidelines refer to the object version, though there's some explanatory
text suggesting that in any statically-typed language (including Eiffel), "the
class form is most useful because it can be checked by a modified compiler"
(47). 

Once again, the Smalltalk-80 criteria come the closest to what we might
formulate for Ruby:

```
  Smalltalk-80, object form. In all message expressions inside method M the
  receiver must be one of the following objects:
    * an argument object of M, including objects in the pseudovariables Self and
      Super,
    * an immediate part of Self, or
    * an object that is either an object created directly by M or an object in a
      global variable (47).
```

(An "immediate part of Self" can be an instance variable. It is not explicitly stated in the article whether or not the concept of "immediate part" can also include collection elements.) 

The salient point here is that the framers of the Law of Demeter were at pains
to welcome dynamic languages to the fold. This is directly related to the
complexity of the taxonomy of the law. Exploding the law into several versions
and sub-versions allows for close, reasoned analysis of what can and cannot be
checked at compile time, as well as other details and underpinnings of the law's
rationale and logic. In the end, though, everything converges back on the
original purpose: providing programmers using object-oriented languages with a
set of principles that reduce inter-class dependencies. 

## METADEMETER

The Law of Demeter is engineered to help programmers using object-oriented
languages gain a lot of clarity of code for a relatively small price. Of course,
there's a whole world of refactoring out there; the Law of Demeter is not the
only guideline, or set of guidelines, for making code better, clearer, and more
maintainable. It would be a mistake to lump all refactorings as "Demeter-ish";
that does justice neither to the Law of Demeter nor to the other refactorings. 

And yet... I'm intrigued by the possibility that recognizable aspects of the Law
of Demeter might surface in contexts other than those for which the law was
originally formulated. I'm not going to push this point very far. I've got one
example that I find suggestive, and I'll leave it at that. See what you think. 

The 1989 article describes a programming technique that the authors call
*lifting*. To illustrate lifting, here's an example of an acquaintance class,
and a Demeter violation:

```ruby
  class Plane
    attr_accessor :name
  end

  class Flight
    attr_accessor :plane
  end

  class Person
    def itinerary_for(flight)
      "Flight on #{flight.plane.name}"
    end
  end
```

Here, `Plane` is an acquaintance class of `Person`. `Flight` isn't; `Flight` is a
preferred supplier class, because it's the class of an argument. `Flight` is a
friend; `Plane` isn't, and by calling name on a plane object we're operating
outside of the Law of Demeter. 

You can fix this Demeter violation by "lifting" the method that provides the
information into the external class:

```ruby
  class Plane
    attr_accessor :name
  end

  class Flight
    attr_accessor :plane
    def plane_name
      plane.name
    end
  end

  class Person
    def itinerary_for(flight)
      "Flight on #{flight.plane_name}"
    end
  end
```

Note that this code is longer than the original. It's not uncommon for
Demeter-compliant code to have more methods than non-compliant code. The gain,
on the other hand, lies in the way the code is organized, and the ease with
which the code can be maintained and changed. If you change the way `Plane#name`
works, and you want to make sure it's still used consistently in all your code,
you only need to hunt for classes that use `Plane` objects as arguments or
instance variables, and make sure the code is still correct. In the first
version of the plane code, you'd have to dig deep into every class in the
program, since you have no guidelines for figuring out where `Plane#name` is
likely to be called or not called. 

Now for the part about aspects of Demeter cropping up outside the original
context. I'm thinking specifically of programming controllers and view templates
in Rails. Templates are already a bit of an oddity, in terms of object-oriented
programming, because of the way they share instance variables with controller
actions: assign something to `@buyer` in the controller, and you can use `@buyer` in
the view. Instance variables always belong to self, and self in the controller
is different from self in the view -- yet the instance variables resurface. 

In case you've ever wondered, this is brought about by an explicit assignment
mechanism: when a view object is created, it copies over the controller's
instance variables one by one into instance variables of its own. So we've got a
domain-specific and kind of hybrid situation: two self objects sharing, or
appearing to share, instance variables.

So where does lifting come in, in any sense reminiscent of the Law of Demeter?

Consider a view snippet like this:

```erb
  <% @user.friends.each do |friend| %>
    <% friend.items.each do |iitem| %>
      <%= friend.name %> has a(n) <%= item.description %>
    <% end %>
  <% end %>
```

I don't want to get into a whole debate here about whether or not it's ever
acceptable to hit the database from the views. My philosophy has always been
that you should be allowed to send a message to any object that the controller
shares with the view. By that reckoning, @user.friends would be acceptable, and
it's up to the controller to eager-load the friends if it wants to. 

But what about `friend.items`? Here we're wandering out on a limb; we're an extra
level of remove from the original object. I can't claim that this is exactly the
situation envisioned by the framers of the Law of Demeter -- but it reminds me
strongly of Demeter-ish situations. And I would propose a Demeter-ish solution,
based on the lifting technique: "lift" one of the method calls back into the
controller. Here's a simple version:

```ruby
  def show
    @user = current_user
    @friends = @user.friends
  end
```

And then in the view:

```erb
  <% @friends.each do |friend| %>
    <% friend.items.each do |item| %>
      <%= friend.name %> has a(n) <%= item.description %>
    <% end %>
  <% end %>
```
In "metademeter" terms, we're talking only to the immediate parts of the
`@friends` object -- in this case, the elements of a collection. I believe there's
room for debate, within discussions of the law itself, on whether or not
collection elements count as *immediate parts* of an object. But here it seems a
good fit. Again, keep in mind that this is just an observation of what I would
call a Demeter-ish way of thinking about code. The Rails controller/view
relation is not the same as the relation between and among classes and methods
that the Law of Demeter directly addresses. And the object whose immediate
parts I'm restricting myself to is not the self object; it is, itself, an
instance variable object. Still, I think we could do worse in a situation like
this than to be inspired to think of a motto like "talk only to your friends",
understanding "friends" to be objects that lie one method call away from the
original ActiveRecord objects handed off by the controller. 

That's the extent of my metademeter musings. Meanwhile I hope you'll continue to
study and contemplate the Law of Demeter, and explore the many writings and
discussions and debates that you'll find surrounding it. I've presented no more
than a subset of what has been or can be said; but I hope that this trip back
to the original statements on the law has been engaging and worthwhile.

## REFERENCES AND FURTHER READING

* [The 1988 article (in special OOPSLA issue of SIGPLAN Notices)](http://www.ccs.neu.edu/research/demeter/papers/law-of-demeter/oopsla88-law-of-demeter.pdf)

* [The 1989 article, available through IEEE](http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=35588&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D35588)

* [A somewhat different version of the 1989 article, in PostScript form](ftp://ftp.ccs.neu.edu/pub/research/demeter/documents/papers/LH89-law-of-demeter.ps)

* [Another 1988 document, with some further/interim reflections on the Law of Demeter, by Lieberherr and Holland](http://www.ccs.neu.edu/research/demeter/papers/law-of-demeter/law-formulations/ss.tex)

* [An excellent account of the Law of Demeter and its practical uses, by David Bock](
http://www.ccs.neu.edu/research/demeter/demeter-method/LawOfDemeter/paper-boy/demeter.pdf)

* [Practicing Ruby Issue 5.2: Rocket Science and the Law of Demeter](https://practicingruby.com/articles/shared/gulrqynwlywm)


================================================
FILE: articles/v6/008-procedural-to-oop.md
================================================
> **NOTE:** This issue of Practicing Ruby was one of several content experiments 
that was run in Volume 6. It uses a cookbook format (e.g. problem -> solution -> discussion)
instead of the traditional long-form article format we use in most Practicing Ruby articles.

**Problem: An adhoc script has devolved into an unmaintainable mess**

Imagine that you're working on a shipping cost estimation program for a small
business that uses a courier service for regional deliveries. Part of the task
for building that tool would involve importing pricing information from
some data source, such as this CSV file:

```
06770,$12.00
06512,$14.00
06510,$15.30
06701,$12.15
```

A real dataset would be more complex, but this minimal example exposes the
information we're interested in: what it costs to ship something from our
facility to somewhere else, based on the destination's zip code.

Now suppose that we want to build a simple data store which will be updated
daily with the latest pricing information. We then could easily write a script
using a few of Ruby's standard libraries (`PStore`, `BigDecimal`, and `CSV`),
which would normalize the data in a way that could be used by the user-facing
cost estimation program. If we could assume the source CSV data was validated
before we processed it, the program could be as simple as what you see below:

```ruby
require "csv"
require "pstore"
require "bigdecimal"

store = PStore.new("shipping_rates.store")

store.transaction do
  CSV.foreach(ARGV[0] || "rates.csv") do |r|
    zip    = r[0]
    amount = BigDecimal.new(r[1][1..-1])
    
    store[zip] = amount
  end
end
```

But in reality, most businesses environments do not make things like this easy
for you. You'd probably quickly discover that the source data could have
any number of problems with it, ranging from duplicate entries to inconsistently
formatted fields. Because this kind of data often originates from people who are
entering information into Excel by hand, they can even be littered with typos!

To help mitigate these issues somewhat, you need a combination of
sanity-checking validations and basic logging so that when something goes wrong
you know why it happened. After adding those features, your simple script might
collapse into the mess you see below:

```ruby
require "csv"
require "pstore"
require "bigdecimal"

store = PStore.new("shipping_rates.store")

store.transaction do
  processed_zipcodes  = []
  
  CSV.foreach(ARGV[0] || "rates.csv") do |r|
    raise unless r[0][/\A\d{5}\z/]
    raise unless r[1][/\A\$\d+\.\d{2}\z/]
    
    zip    = r[0]
    amount = BigDecimal.new(r[1][1..-1])

    raise "duplicate entry: #{zip}" if processed_zipcodes.include?(zip)
    processed_zipcodes << zip
    
    next if store[zip] == amount

    if store[zip].nil?
      STDERR.puts("Adding new entry for #{zip}: #{'%.2f' % amount}")
    elsif store[zip] != amount
      STDERR.puts("Updating entry for #{zip}: "+
                  "was #{'%.2f' % store[zip]}, now #{'%.2f' % amount}")
    end
    
    store[zip] = BigDecimal.new(amount)
  end
end
```

Once your code ends up like this, it becomes increasingly difficult to 
add new features or make any sort of change without breaking 
something. Because this style of program is fairly difficult to test,
the maintenance problems can be made even worse by the fact that bugs may 
end up not being discovered until long after they're introduced.

Procedural scripts are great when you can throwaway the code once you've
completed your task, or for solving simple problems that you are reasonably
sure the requirements will never change for. For everything else,
more structure pays off in the long run. It's clear that this program
is in the latter category, so how do we fix it?

---

**Solution: Redesign the script as an object-oriented program**

The thing that makes ad-hoc scripts complicated to reason about
as they grow is that they blend all their concerns together -- both 
logically and conceptually. For that reason, it is worthwhile to
start thinking in terms of functions and objects as soon as your
program exceeds more than a paragraph or two of code.

Imagine that the script portion of your importer tool was reduced
to the following code:

```ruby
require "csv"

Importer.update("shipping_rates.store") do |store|
  CSV.foreach(ARGV[0] || "rates.csv") do |r|
    info = PriceInformation.new(zipcode: r[0], shipping_rate: r[1])
    
    store[info.zipcode] = info.shipping_rate
  end
end
```

This brings us back to about the same level of detail expressed in the
naïve implementation of the importer script, albeit with a few custom classes
thrown into the mix. It hides a lot of detail
from the reader, but its core purpose is obvious: it iterates over a CSV file
to create a mapping of zipcodes to shipping rates in a datastore. 

To see where the real work is being done, we need to look at the
`PriceInformation` and `Importer` class definitions. We'll start by taking a
look at the former, because it has fewer moving parts to consider:

```ruby
require "bigdecimal"

class PriceInformation
  ZIPCODE_MATCHER = /\A\d{5}\z/
  PRICE_MATCHER   = /\A\$\d+\.\d{2}\z/

  def initialize(zipcode: raise, shipping_rate: raise)
    raise "Zipcode validation failed"       unless zipcode[ZIPCODE_MATCHER]
    raise "Shipping rate validation failed" unless shipping_rate[PRICE_MATCHER]
    
    @zipcode       = zipcode 
    @shipping_rate = BigDecimal.new(shipping_rate[1..-1])
  end

  attr_reader :zipcode, :shipping_rate
end
```

Here we see that `PriceInformation` applies the same validations and
transformations as shown in the script version of this program, but
encapsulates them in its constructor. This makes sure that a `PriceInformation`
object will either represent valid data or not be instantiated at all, 
which makes it so that the main script does not need to concern itself 
with these issues. Even if these validations or transformations become
more complex over time, the calling code should not need to change.

In a similar vein, the `Importer` class attempts to encapsulate the details
about some lower level concepts at a higher level of abstraction. It's
functionality is a bit more involved than the `PriceInformation` class,
so take a few minutes to study it before moving on:

```ruby
require "pstore"

class Importer
  def self.update(filename)
    store = PStore.new(filename)

    store.transaction do
      yield new(store)
    end
  end

  def initialize(store)
    self.store    = store
    self.imported = []
  end

  def []=(key, new_value)
    raise_if_duplicate(key)

    old_value = store[key]

    return if old_value == new_value # nothing to do!

    if old_value.nil?
      ChangeLog.new_record(key, new_value)
    else
      ChangeLog.updated_record(key, old_value, new_value)
    end

    store[key] = new_value
  end

  private

  attr_accessor :store, :imported

  def raise_if_duplicate(key)
    raise "Duplicate key in import data: #{key}" if imported.include?(key)
    imported << key
  end
end
```

Despite the complexity of its implementation, this class presents a very minimal
user interface, consisting of only `Importer.update` and `Importer#[]=`. The
`Importer.update` method is responsible for instantiating a `PStore` object,
initiating a transaction, and then wrapping it in an `Importer` instance to
limit access to its internals. From there, the only method available to the user
is `Importer#[]=`, which wraps `PStore#[]=` with two important features:

1. Single-assignment semantics: once a key has been set to particular value, it
cannot be reset from within the same `Importer` instance. This is because we
want to raise an exception whenever we encounter duplicate keys in the data
we're importing.

2. Update notifications: For debugging purposes, we want to know whether a
record is introducing a new key, or updating the value associated with
an old one. Rather than cluttering up this class with the particular log
messages associated with those events, we delegate to a `ChangeLog` helper
object, which is shown below:

```ruby
class << (ChangeLog = Object.new)
  def new_record(key, value)
    STDERR.puts "Adding #{key}: #{f(value)}"
  end

  def updated_record(key, old_value, new_value)
    STDERR.puts "Updating #{key}: Was #{f(old_value)}, Now #{f(new_value)}" 
  end

  private

  def f(value)
    '%.2f' % value
  end
end
```

With this last detail exposed, you've walked through the complete 
object-oriented solution to this problem. It is much longer than the
script version, but also much more organized. Before we wrap things up, 
let's talk a bit more about the costs and benefits involved in introducing
more structure into your programs.

---

**Discussion**

The best thing about unstructured code is that nothing is hidden from view. 
To understand a script, you start at the top of the file and read downwards, 
mentally evaluating the state changes and iterators you encounter along the way.

Object-oriented programs are much more logically complex, because they 
represent a network of collaborators rather than a linear set of instructions.
For example, whenever we make a call to `Importer#[]=`, messages are sent to the
`ChangeLog` helper object as well as to an instance of `PStore`, but these
details are not at all visible when you read the caller code. The more objects
that exist within a system, the more complex their interactions get, and so
it is not uncommon to end up with call graphs that are both wide and deep.

But when it comes to visibility, the strength of scripted solutions is also their 
weakness, and the weakness of object-oriented programs is also their strength:

* In an adhoc script, you cannot make simple decisions about your code
without considering the entire program. Even something as straightforward
as renaming a variable used for temporary storage must be carefully considered,
because everything exists within a single namespace; anything more involved
than that is simply inviting trouble unless you can keep the entire program
in your head at once.

* In an object-oriented program, the walls erected between different objects give
you freedom to make sweeping changes to internal structures, as long as their
interfaces are preserved. You can even rewire entire subnetworks of functionality
from your programs, as long as you know what features depend on them. When
done well, the fact that you cannot keep an entire object-oriented program
in your head is not much of a concern, because the layered abstractions
make it so you don't have to.

The real challenge involved in writing object-oriented programs is that they'll
only be as useful as the mental model they represent. This is why it can
actually be helpful to start off with less structure (even none at all!), and
gradually work your way towards something more organized. After all,
there is nothing worse than an abstract solution in search of a concrete problem!


================================================
FILE: articles/v6/009-isolate-responsibilities.md
================================================
> **NOTE:** This issue of Practicing Ruby was one of several content experiments 
that was run in Volume 6. It uses a cookbook format (e.g. problem -> solution -> discussion)
instead of the traditional long-form article format we use in most Practicing Ruby articles.

**Problem: Code for data munging projects can easily become brittle.**

Whenever you work on a project that involves a significant amount of [data
munging](http://en.wikipedia.org/wiki/Data_munging), you can expect to get some mud on your boots. Even if the individual
aggregation and transformation steps are simple, complexity arises from
messy process of assembling a useful data processing pipeline. With each
new change in requirements, this problem can easily be compounded in
brittle systems that have not been designed with malleability in mind.

As an example, imagine that you are implementing a tool that
delivers auto-generated email newsletters by aggregating and 
filtering links from Reddit. The following workflow provides
a rough outline of what that sort of program would need to
do in order to complete its task:

1. Map the raw JSON data from Reddit's API to an intermediate format that can be
used throughout the rest of the program.

3. Apply filters to ignore links that have already been included in a previous
newsletter, or fall below a minimum score threshold. 

4. Convert the curated list of links into a human readable format.

5. Send out the formatted list via email using GMail's SMTP servers.

Some will look at this set of steps and see a standalone script as the right
tool for the job: the individual steps are simple, and the time investment is
small enough that you could throw the entire script away and start again if you
end up facing significant changes in requirements.

Others will see this as a perfect opportunity to put together an elegant domain
model that supports a classic object-oriented design style. By encapsulating all
of these ideas in generalized abstractions, endless changes would be possible in
the future, thus justifying the upfront design cost.

Both of these perspectives have merit, but it would be unwise to set up a
false dichotomy between formal design and skipping the design process entirely. 
Interesting solutions to this problem also exist in the space between these two extremes,
and so we'll take a look at one of them now.

---

**Solution: Reduce the cost of rework by organizing your codebase into
isolated single-purpose components.**

Unlike the typical web application which has a wide range of end-points serving 
orthogonal concerns, the workflow for data munging projects often more closely 
resembles a flow-chart, with clearly defined beginning and end points. The
step-by-step nature of data munging projects makes them most naturally fit the 
procedural programming paradigm. This is a source of tension in Ruby,
because of its heavy object-oriented bias at the language level.

A reasonable compromise is to embrace "procedural programming with
objects". Rather than discussing this technique in the abstract,
we will instead explore what it looks like in practice by seeing
how it can be used to build the Reddit curation tool we
discussed earlier.

Let's start with the script that implements the core workflow of the program:

```ruby
require_relative "../lib/spyglass/actions/load_history_file"
require_relative "../lib/spyglass/actions/fetch_links"
require_relative "../lib/spyglass/actions/format_message"
require_relative "../lib/spyglass/actions/deliver_message"

basedir = File.dirname(__FILE__)

history   = Spyglass.load_history_file("#{basedir}/history.store")     #1
min_score = 20

selected_links = Spyglass.fetch_links("ruby").select do |link|         #2
  link.score >= min_score && history.new?(link)                        #3
end

history.update(selected_links)                                         #4

message = Spyglass.format_message(links: selected_links, 
                                  template: "#{basedir}/message.erb")  #5

Spyglass.deliver_message(subject: "Links for you!!!!!!",               #6
                         message: message)
```

This code looks a bit different than the typical Ruby snippet, because rather
than instantiating objects directly and then calling methods on them, it is
simply calling methods on the `Spyglass` module. It is obvious from the
`require_relative` calls that these features have been individually enabled,
which is also a non-standard way of doing thing.

If you set aside the quirks of this code for a moment, it should still be fairly
easy to read. Here's a rough English translation of what's going on:

1. A history log is being loaded from a file.
2. Links in the "ruby" sub-reddit are being fetched and filtered
3. Links are filtered out if they're below a score threshold or have been
selected in a previous run of the program.
4. The history log is updated with the newly selected links
5. The selected links are formatted into a human readable message
6. The message is delivered with the subject "Links for you!!!!!!"

Because the six steps above pretty much directly line up with the high-level
requirements of the project, it is safe to say that this code is sufficiently
expressive. But to properly evaluate the overall design, we'll need to dig into
the code that implements these features. Let's proceed by walking through the
features in the order that they are used.

First up is the `Spyglass.load_history_file` procedure:

```ruby
require_relative "../data/history"

module Spyglass
  def self.load_history_file(filename)
    Data::History.new(filename)
  end
end
```

This method is a trivial stub that creates an instance of the
`Spyglass::Data::History` class shown below:

```ruby
require "pstore"

module Spyglass
  module Data
    class History
      def initialize(filename)
        @store = PStore.new(filename)
      end

      def new?(link)
        @store.transaction { @store[link.url].nil? }
      end

      def update(links)
        @store.transaction do
          links.each { |link| @store[link.url] = true }
        end
      end
    end
  end
end
```

From this definition, we can infer that the job of the `History` 
object is to keep track of which URLs have been selected in 
previous runs of the program. It uses a `PStore` object as
its persistence method, but that is mostly an implementation detail.

With an understanding of how `Spyglass.load_history_file` works and 
what type of object it returns, we can now move on to investigating
the `Spyglass.fetch_links` procedure:

```ruby
require "json"
require "open-uri"

require_relative "../data/link"

module Spyglass
  def self.fetch_links(category)
    document = open("http://api.reddit.com/r/#{category}?limit=100").read

    JSON.parse(document)["data"]["children"].map do |e|
      e = e["data"]

      Data::Link.new(url: e["url"], score: e["score"], title: e["title"])
    end
  end
end
```

This method is responsible for making an HTTP request to the Reddit API to
capture a JSON document representing the raw data about links in a particular
subreddit. It then parses that document and transforms it into `Data::Link`
objects. A quick look at the class definition for `Data::Link` reveals that it
is a straightforward value object with no interesting business logic:

```ruby
module Spyglass
  module Data
    class Link
      def initialize(url: raise, score: raise, title: raise)
        @url   = url
        @score = score
        @title = title
      end

      attr_reader :url, :score, :title
    end
  end
end
```

As simple as it is, the `Data::Link` object is a very important part of this 
program, because every other feature that refers to links assumes that
they conform to this interface. In other words, we've set in stone here
that the data our program is interested in when it comes to links are
its `score`, `url`, and `title`. Any changes to this interface would
require widespread changes throughout our program.

Based on what you've seen so far, you should be able to understand exactly how
this program works up to step #4 in its main script. Only two steps remain:
formatting the list of curated links into a human readable message, and
delivering it to someone.

The formatting procedure (i.e. `Spyglass.format_message`) is extremely basic, 
as it is nothing more than a minimal wrapper around the `ERB` standard 
library:

```ruby
require "erb"

module Spyglass
  def self.format_message(links: raise, template: raise)
    ERB.new(File.read(template), nil, "-").result(binding)
  end
end
```

This code is somewhat generalized, allowing an arbitrary template to
present the list of links. In the case of this particular script, we use a
simple text-based template that looks like this:

```
Here are some links you might enjoy!
<% links.each do |link| -%>

  <%= link.title %>:
  <%= link.url   %>
<% end %>
Have fun!
-greg
```

When evaluated, this template spits out plain-text output that looks similar to
what you see below:

```
Here are some links you might enjoy!

  _why updated his site:
  http://whytheluckystiff.net

  Teabag: A Javascript test runner built on top of Rails:
  https://github.com/modeset/teabag

  Ruby 2.0 Works Hard So You Can Be Lazy:
  http://patshaughnessy.net/2013/4/3/ruby-2-0-works-hard-so-you-can-be-lazy

Have fun!
-greg
```

From here, all that remains is to fire this message out via email, which is
handled by the `Spyglass.deliver_message` procedure:

```ruby
require "mail"

Mail.defaults do
  delivery_method :smtp, {
    :address              => 'smtp.gmail.com',
    :port                 => '587',
    :user_name            => ENV["GMAIL_USER"],
    :password             => ENV["GMAIL_PASSWORD"],
    :authentication       => :plain,
    :enable_starttls_auto => true
  }
end

module Spyglass  
  def self.deliver_message(message: raise, subject: raise)
    mail = Mail.new

    mail.from = ENV["GMAIL_USER"]
    mail.to   = ENV["SPYGLASS_RECIPIENT"]

    mail.subject = subject
    mail.body    = message 

    mail.deliver!
  end
end
```

This is not as easy to read as many of the previous procedures, because it
involves some configuration code. However, on closer investigation we can easily
see that this is a thin wrapper around the `Mail` gem, and that it uses three
environment variables for its settings: `GMAIL_USER`, `GMAIL_PASSWORD`, and
`SPYGLASS_RECIPIENT`. This means that the main script for this program needs to
have these values set before it can be run, as in the example below:

```console
$ GMAIL_USER="test@gmail.com" GMAIL_PASSWORD="password" \
  SPYGLASS_RECIPIENT="test@test.com" ruby examples/reddit.rb
```

If you have a GMail account, you can actually give this a try by cloning
the [practicing-ruby-examples
repository](https://github.com/elm-city-craftworks/practicing-ruby-examples/tree/master/v6/009)
and running something similar to the line shown above in the *v6/009* folder.
But as long as you understand the general idea behind this program, don't worry
if you can't test it for yourself right now.

Assuming that you have been able to understand this walk-through, you may already
have some sense of why this solution is a reasonable middle ground between ad
hoc scripting and formal object-oriented design. However, we should discuss the
benefits and costs in more detail before we wrap things up here.

---

**Discussion**

The primary difference between object-oriented programming and "procedural
programming with objects" is that the former binds certain behaviors to
encapsulated data, and the latter decouples its data from its behavior.

Object-oriented design is best suited for problems where most of the interesting
details exist in the messages that are passed between objects. In other words,
when you have a complex set of interactions between a network of communicating
objects, it makes good sense to tightly bind together state and behavior.
However, this comes at the cost of indirection, and so it becomes hard to keep a
mental model in your mind of what the call graph looks like for even a single
request.

By contrast, data munging projects are procedural in nature, and so you have a
good sense of what needs to happen at each step in the process. The final
program represents a chain of transformations and filters on relatively simple
data structures, with some side effects thrown in along the way. Because each
step tends to be a very concrete action, the abstraction benefit that objects
can offer is negated by the fact that so much is subject to change in the whole
system.

If you go back and read through the [codebase we discussed in this
article](https://github.com/elm-city-craftworks/practicing-ruby-examples/tree/master/v6/009),
you will find that the data objects are trivially understandable, and the
actions are context-independent. Although they are not pure functions, each of
the actions can be fully understood in terms of its inputs, outputs, and
external dependencies. This makes it possible to make changes to the internals
without thinking about their impact on the overall program, as long as the
return values do not change.

Another interesting benefit of "procedural programming with objects" is that a
lack of internal behavioral dependencies makes it so that you can easily change
the signature of a single action without requiring a cascade of changes
throughout the system. The main script might need to be updated, but such
revisions would be trivial.

However, it is important to remember that all of these benefits come from the
fact that data munging projects occupy a special domain where certain benefits
of object-oriented programming are not especially important. You may want to
consider adopting a traditional object-oriented design if any of the following
conditions apply:

* You have actions that need to store data in instance variables, rather than
simply returning value objects or using repository objects like the `History`
object in this example.

* You have actions that need to call other actions in order to get their job
done, rather than relying on simple data objects that rarely change.

* You have actions that need to operate as a multi-step state machine, rather
than a single-purpose procedure that you can fire and forget.

All of the above are symptoms that the benefits of object-oriented design will
outweigh its costs, and Ruby *is* a deeply object-oriented language, so you
won't lose out by heading in that direction. However, if you are stuck in the
place between a throwaway script and a full object-oriented program, the example
shown in this article might help you find a nice compromise.


================================================
FILE: articles/v6/010-concept-before-code.md
================================================
> **NOTE:** This issue of Practicing Ruby was one of several content experiments 
that was run in Volume 6. It uses a cookbook format (e.g. problem -> solution -> discussion)
instead of the traditional long-form article format we use in most Practicing Ruby articles.

**Problem: It is hard to work on side projects without obsessing over technical
details and infrastructure decisions.**

There are lots of reasons to work on projects in your spare time, but there are 
two that stand out above the rest: scratching a personal itch by solving a 
real problem, and gaining a better understanding of various programming tools 
and techniques. Because these two motivating factors are competing interests, 
it pays to set explicit goals before working on a new side project.

That said, remembering this lesson is always a constant struggle for me. 
Whenever I'm brainstorming about a new project while taking a walk or sketching
something on a white board, I tend to develop big dreams that extend far
beyond what I can realistically accomplish in my available free time. To show
you exactly what I mean, I can share the back story on what that lead me to 
write the article you're reading now:

> Because I have a toddler to take care of at home,
meal planning can be a major source of stress for me. My wife and I are 
often too distracted to do planning in advance -- so we often need to make a 
decision on what to eat, put together a shopping list, go to the grocery 
store, and then come home and cook all in a single afternoon. 
Whenever this proves to be too much of a challenge for us, we order 
takeout or heat up some frozen junk food. Unsurprisingly,
this happens far more often than we'd like it to.

> To make matters worse, our family cookbook has historically consisted of a 
collection of haphazardly formatted recipes from various different sources. Over time, we've
made changes to the way we cook these recipes, but these revisions almost
never get written down. So for the most part, our recipes are inaccurate, 
hard to read, and can only be cooked by whichever one of us knows its quirks.
Most of them aren't even labeled with the name of the dish, so you need to
skim the instructions to find out what kind of dish it is!

> On one of my afternoon walks, I decided I wanted to build a program
that would help us solve some of these problems, so that we could make fewer
trips to the grocery store each week, while reducing the friction and cognitive
load involved in preparing a decent home cooked meal. It all seemed so simple in
my head, until I started writing out my ideas!

By the time I got done with my brain dump, the following items were on the 
wish list of things I wanted to accomplish in this side project:

* I figured this would be a great time to try out Rails 4, because this project
would obviously need to be a web application of some sort.

* It would be another opportunity for me to play around with Bootstrap.
I am weak at frontend development, but I am also bothered by poor visual 
design and usability, so it seems to be a toolset that's worth learning for
someone like me.

* I had been meaning to explore using the Pandoc toolchain from within Ruby programs
to produce HTML and PDF output from Markdown files, so this would be a perfect 
chance to try that out. This would allow me to have recipes look nice both
on the web and in print.

* It would be really cool if the meal planner would look for patterns in our
eating habits and generate recommendations for us once it had enough data to
draw some interesting conclusions.

* It would be nice to have a way of standardizing units of measures so that we
could trivially scale recipes and combine multiple recipes into a shopping list
automatically.

* It would be neat to support revision control and variations on recipes within
the web application, in addition to basic CRUD functionality and search.

* It would be great to be able to input a list of ingredients we have on hand
and get back the recipes that match them.

I won't lie to you, the system described above still sounds awesome to
me. Building it would involve lots of fun technological challenges, and
it'd be amazing to have such a powerful tool available to me. But it also
represents a completely unreasonable set of goals for someone who has so little
productive free time that even cooking dinner seems like too much work.
Sadly, it's easy to forget that sometimes.

To make a long story short: my initial brainstorming session proved to be 
a pleasant day dream, but it wasn't a real solution to my problems. Instead, 
what I needed was an approach that could deliver modest results in fractions 
of an hour rather than expecting to put in weeks of hard work. To do that, 
I'd have to radically scale back my expectations and set out in search of 
some low hanging fruit.

---

**Solution: Build a single useful feature and see how well it works in practice 
before attempting to design a full-scale application or library.**

When I catch myself getting caught up in *architectural astronaut* mode, 
I tend to bring myself back down to earth by completely inverting my approach. 
I drop the notion of building a perfect system, and instead focus on
building a single useful feature as quickly as possible without
any concern for elegance.

Like paratroopers in the night, the goal is not to find the exact right
place to start from, but instead to dive head first into unknown territory
and try to secure a foothold. Although there were many possible starting
points for working on my meal planner, I decided to start with the one that
seemed most simple in my mind: generating randomized selections of dishes 
to cook over a three day timespan.

Because this feature involved automating a small part of what was originally 
a completely manual process, the first step was to do a bit of tedious
data entry work. I thumbed through our binder of recipes and pulled out 16 
of them that we had cooked recently. I then used a felt-tipped pen to
number each recipe in ascending order, which yielded a rudimentary
way of looking up recipes by number.

This may seem like an ugly way of doing things, but I did it to save myself 
the trouble of figuring out how to convert my haphazardly printed recipes
into text-based source files. I also wanted to defer the decision of what to 
"officially" name each dish, and this way of labeling things allowed me to 
do that in the same way that an autoincrementing primary key does for 
database records.

Once I finished manually indexing my recipes, I compiled a CSV file 
that looked something like what you see below:

```
name,label
"Veggie Cassarole w. Swiss Chard + Baguette",1
"Stuffed Mushroom w. Leeks + Shallots",2
"Lentil Soup w. Leeks + Kale",3
"Spinach + White Bean Soup",4
```

This dataset introduces some human-readable names for the dishes, because 
I didn't want to have to thumb through the recipe book to see what 
"Dish #4" is actually made out of. This system also has an advantage of 
being truly arbitrary, unlike alphabetical order in which 
"Spinach + White Bean Soup" is just as reasonable a label as 
"White Bean + Spinach Soup", but the two would appear in totally 
different positions in the book. Although this may have been premature 
optimization, it came at a low cost and gave me some peace of mind, 
so that made it worthwhile to me.

Before writing any code, I manually tested the index to see how easy it
would be to look up a recipe by number. It proved to be no more complicated
than flipping to a particular page of a book, so it turned out to be a 
good enough system to start with. After that quick usability test,
I hacked together the following script to give me randomized meal selections:


```ruby
require "csv"

candidates = []

CSV.foreach("recipes.csv", :headers => true) { |row| candidates << row }

puts "How about this menu?\n\n" + candidates
  .sample(3)
  .map { |e| "* #{e['name']} (#{e['label']})" }
  .join("\n")
```

When run, this script produces the following output:

```
How about this menu?

* Tunisian Chickpea Stew (10)
* Tomato + Feta w. Green Bean Salad (13)
* Stuffed Mushroom w. Leeks + Shallots (2)
```

The first time I used this new tool, I had to run it a couple times 
in order to come up with three dishes that appealed to me at that moment.
However, this was still far less daunting than trying to choose three
dishes directly from our disorganized cookbook. With only about 30 minutes 
of work invested into this project (not counting the ridiculously 
ambitious brainstorming session), I already had a tool that
was doing something useful for me. Content with my progress for the day,
I plucked my chosen recipes from the binder we keep them in and headed off 
to the grocery store.

While shopping for ingredients and cooking the meals, I was reminded how 
terribly organized most of our recipes truly were. Some even made it hard
to see exactly what ingredients were needed, and nearly all of them listed
steps in a semi-arbitrary sequence of muddled paragraphs. Almost none of the
recipes were at the scale we tended to cook them at, so we'd need to do mental
math both when cooking and when shopping which occasionally lead us to make
mistakes.

I knew I didn't have the available free time to build a full-blown content management 
system for our recipes, but I wondered whether I could apply the lesson learned
from earlier that day to improve things in a low cost way. I eventually realized
that my idea of using Pandoc to convert markdown formatted recipes into PDFs 
wouldn't be so bad if I didn't need to build a whole system around it, so I 
decided to take a few recipes and manually format them in a way that was 
appealing to me. 

My personal preferences for organizing recipes is not
especially important here, but if you're curious, 
[check out this sample document](http://notes.practicingruby.com/barley_risotto.pdf).
The main goal I had was to limit the amount of information I needed to keep in my
mind at any given point in time, and to make the different transition points in
the cooking process explicitly clear.

The process of formatting the recipes this way was time consuming, 
and actually took longer than writing the randomizer program and preparing 
its data. With that in mind, I decided that I would work on this as I found 
time for it, rather than trying to get everything normalized into this
format all at once. The improved formatting definitely made a difference,
but I had to consider whether my time might be better used elsewhere.

Despite the mixed results, the lesson I learned from this experiment is that if
had I focused on solving the content management problem first, I may have spent
a good chunk of time building a complex system without
gaining an appreciation for the actual data entry costs. I also came to
realize that markdown files in a git repository seemed to be every bit
as comfortable for me as a web application could be, and I didn't need
to build anything in order to use them. This would be a terrible UI for
a general purpose application, but it worked great for me.

Over the course of a couple weeks, I kept using the meal randomizer with 
some degree of success, finding small opportunities to improve it along 
the way. Two main issues surfaced fairly early on in my use of the program:

1. Without some way of filtering recommended meals based on how much effort they 
required, I had to mentally ignore our more time-consuming dishes most
of the time.

2. Sixteen dishes is too small of a selection to get enough variety to avoid
duplicate suggestions and repeatedly seeing dishes you've ate recently.

For the first issue, I decided that I didn't need something as precise as preparation 
time in minutes, but instead could use a simple subjective rating system from 1-5 
where the low end represents dishes that can be made almost instantaneously 
(like a grilled cheese sandwich), the middle represents a dish we'd cook on a regular
evening, and the high end represents an all-day cooking session. I'd set up
the program to select dishes with an effort score of 3 or lower by default, but allow
for the limit to be set via an argument.

But it's easy to see that fixing the first problem would only make the second
issue worse. I briefly thought through some clever solutions to the variety problem, like
keeping track of a history or doing other things to make the selection process smarter,
but eventually decided that simply increasing the number of dishes in the data set
would be easiest. So I dug back into some of our other recipes that we had online,
and also added things like sandwiches and other quick meals that we don't cook
from a recipe. Most of these didn't have a printout in our cookbook, so I just
labeled them with an "X", to indicate that they'd need to be imported later.

Thanks to my active laziness, the script only required very minor changes. The
updated version is shown below:

```ruby
require "csv"

candidates = []
effort     = ARGV[0] ? Integer(ARGV[0]) : 3

CSV.foreach("recipes.csv", :headers => true) { |row| candidates << row }

puts "How about this menu?\n\n" + candidates
  .select { |e| Integer(e['effort']) <= effort }
  .sample(3)
  .map { |e| "* #{e['name']} (#{e['label']})" }
  .join("\n")
```

Similarly, the CSV file only required a tiny bit of rework to add the effort
ratings:

```
name,label,effort
"Veggie Cassarole w. Swiss Chard + Baguette",1,3
"Stuffed Mushroom w. Leeks + Shallots",2,3
"Lentil Soup w. Leeks + Kale",3,3
"Spinach + White Bean Soup",4,2
...
```

With a dataset including over 30 dishes, and a filter that removed the most 
complex ones by default, the variety of the recommendations got a lot 
better. This greatly reduced the number of times I needed to run the script
before I could put together a meal plan. A smarter selection algorithm
could definitely make the tool even more helpful, but these small changes 
made a huge difference on their own.

Another week passed, and I eventually realized that I don't particularly
like having to pop open a terminal and run a command line program simply
to decide what I want to have for dinner. After another half hour of work, 
I wrapped the script in a minimal web interface using Sinatra. Throwing 
that app up onto Heroku allowed me to do my meal planning via the web 
browser. The UI is nothing special, but it gets the job done:

![](http://i.imgur.com/Y1C3sxt.png)

As you might expect, the code that implements this UI isn't
especially exciting, it's just basic glue code and an ERB 
template:

```ruby
require "sinatra"
require "csv"

def meal_list(candidates, effort)
  "<ul>" + 
    candidates.select { |e| Integer(e['effort']) <= effort }
              .sample(3)
              .map { |e| "<li>#{e['name']} (#{e['label']})</li>" }
              .join + 
  "</ul>"
end

get "/" do
  candidates = []
  effort     = Integer(params.fetch("effort", 3))
  meal_list  = "#{File.dirname(__FILE__)}/../recipes.csv"

  CSV.foreach(meal_list, :headers => true) do |row| 
    candidates << row 
  end

  @selected = meal_list(candidates, effort)
  
  erb :index
end

__END__

@@index
<html>
  <body>
    <h1>How about these meals?</h1>
    <%= @selected %>
  </body>
</html>
```

It's worth noting that the code above is about at the level of complexity
where more formal development practices start to pay off. But since I 
managed to squeeze three weeks of active use out of the tool before
getting to this point, I definitely won't mind doing some cleanup work 
if and when I decide to add more features to it.

**Discussion**

The main thing I hope you will take away from this article is that "keeping
things simple" is term that we say often but rarely practice. This can have a
painful effect on our daily work, but is disasterous for our side projects,
because we often work on them with a tight time budget. 

Speaking from personal experience, I've lost count of how many Rails applications 
skeletons I've built that started with big dreams and ended up with nothing more 
than a couple database models, a few half-finished CRUD forms, and an
authentication system, but no actual features to speak of. I guess I am
just extremely good at overestimating how much time and motivation I'll
have for building the things I think of day to day.

Increasingly, I've been trying to think of software as a support system for
solving human problems, rather than some sort of artifact that holds intrinsic
value. Software is extremely expensive to build and maintain, so it pays to
write as little code as possible. This does not just mean writing terse
programs: it means spending more time and creativity on practical problem
solving, so that you can focus your energy on making people's lives
easier rather than obsessing over technical issues. Adopting this mindset
can lead you to being more thoughtful when you build software for others,
but it also serves as a reminder that you can and should enjoy the fruits 
of your own labor.

Even though programming can be fun in its own right, you don't need to view
every software project as an opportunity to solve interesting coding puzzles.
When measured in terms of functional value rather than implementation details,
sometimes the most elegant solution is a script that you cobbled together during
a lunch break, because it cost you almost nothing but still managed to do
something useful for you. These opportunities appear around every corner, you
just need to be prepared to take advantage of them when they arise. I hope
the story I've shared in this article has helped you learn what to look out for.

*Do you have an example of some code you wrote that took very little effort but
still ended up being very useful for you? If so, please share your story in
the comments section below.*


================================================
FILE: articles/v6/README.md
================================================
The articles in this folder are from Practicing Ruby's sixth volume, which ran from
January 2013 to May 2013.

You can also read them for free online at practicingruby.com.


================================================
FILE: articles/v7/001-simulating-tiny-worlds.md
================================================
As programmers we are very comfortable with the idea of using
software to solve concrete problems. However, it is easy to
underestimate the role that code can play in problem-solving itself, and that blindspot can hinder our creativity in a number of ways.

In this article, I will walk you through some fun examples that demonstrate how code can be used as an exploratory thinking tool, and then reflect upon how that kind of workflow might be applied to everyday programming tasks.

## Setting the stage

The source of my motivation for writing on this topic is the [StarLogo](http://education.mit.edu/starlogo/) programming environment and Mitchel Resnick's excellent book "Turtles, Termites, and Traffic Jams", both of which illustrate the potential for software to be used as a mind-expanding thinking tool.

As the title of the book implies, StarLogo is an environment that facilitates simplistic modeling of scenarios that occur in the natural world. The purpose of the tool is not to create environments that closely emulate reality, but instead, to encourage exploration and discovery in simple, tightly constrained microcosms. Apart from being an intellectual curiosity, this sort of toolset provides a powerful way to intuitively experience deep concepts that range from self-organization and emergent behavior to massive parallelism.

In the spirit of exploration, I won't attempt to make a case for those claims by way of a top-down explanation. Instead, we'll now walk through a few scenarios that are easily implemented using StarLogo-style modeling. The examples I've chosen are based on ideas from StarLogo and Resnick's book, but I have ported them to JRuby to allow you to explore the concepts without having to familiarize yourself with a new environment first. The engine I built is called [Terrarium](https://github.com/sandal/terrarium), and it is very much a rough prototype, but it should still be good enough to introduce you to these ideas with minimal friction.

## Scenario I: Forest fires

The environment in the StarLogo model consists of a two-dimensional grid of "patches", which are in some ways similar to cellular automata models such as Conway's Game of Life. 

Using only patch color to represent state, we could apply the following ruleset to simulate a rough sketch of a forest fire:

1. Start by building a forest. For the sake of simplicity, we can begin with an empty grid and then randomly paint some of its patches green.

2. To ignite our fire, we can pick a random patch in the grid and paint it red.

3. Each green patch then needs to repeatedly check to see if any of its neighbors are red, and if so, it becomes red itself, spreading the fire.

Applying these three trivial rules results in the following behavior:

![](http://i.imgur.com/MtAbXPF.gif)

Although this animation should be fairly straightforward to understand, it is worth pointing out one small detail about the geometry of a StarLogo-style world: rather than being an infinite grid like Conway's Game of Life, it is a torus, where the left side is connected to the right, and the top is connected to the bottom. This explains why the fire (which starts at the bottom of the screen) quickly overflows onto the top of the screen in this animation.

The code that was used to generate this visualization is shown below, and is nearly a direct translation of the rules shown above:

```ruby
Terrarium::Scenario.define do
  # Rule 1: Build the forest
  patches do
    with_probability(0.5) { set_color :green } 
  end

  # Rule 2. Start the fire
  random_patch { set_color :red }

  # Rule 3. Spread the fire
  patches! do
    if color == :green && neighbors.any? { |e| e.color == :red }
      set_color :red
    end
  end
end
```

It is here where you can catch the first glimpse of what I meant by "code as a thinking tool". With Terrarium as our engine and StarLogo-style data modeling, we don't need to think at all about the structure or inner workings of our program, but instead can immediately turn our ideas into code. This takes what would cost us hours in upfront modeling and reduces it to minutes of effort.

Being able to work at this very high level of abstraction allows us to try variations and experiments as soon as we think of them. A simple idea to try out with this model is to see how the fire spreads at various levels of tree density. You will find that at 50% (which is what is shown above), the fire will pretty much always spread across the forest, but at 30%, the opposite is true. Is there a critical tipping point between those two bounds? If so, why is it there? These are the kinds of thoughts that arise when you can focus on ideas rather than code.

## Scenario II: Infectious disease

As you may have guessed from the name of the language, StarLogo also implements the *turtle graphics* model found in the Logo programming language. Both languages were developed by same research group at MIT, and so if you are familiar with Logo turtles, you will find that StarLogo's creatures move around the world in a similar way to their classical ancestors.

However, that is where the similarities end. While the average Logo turtle lives a solitary life, StarLogo's creatures can be commanded en-masse, in groups of hundreds or thousands. Where the Logo turtle is mostly used for drawing lines (albeit in some very clever ways), the StarLogo creature is capable of having much more complex interactions with its world, including the other creatures in it.

Take for example the problem of modeling the spread of a contagious disease through a population of creatures. If we allow ourselves to paper over any inconsistencies with reality by using a bit of imagination, the following rules are sufficient for emulating this scenario:

1. Arrange a group of healthy creatures into a crowd
3. Infect some of the creatures with the disease
4. Allow the creatures to slowly move about their world
5. The disease will spread from sick to healthy creatures whenever they come into contact with each other.
6. After a set period of "sick time", the creature will either die or recover, based on probability. (Recovered creatures can be re-infected if they come into contact with sick creatures, dead creatures simply disappear.)

When applied to a population of 200 StarLogo creatures, these rules produce a pattern similar to what is shown in the following animation:

![](http://i.imgur.com/dZ6czuf.gif)

Here we see the disease quickly spreading from a few infected individuals to the majority of the population. However, the rate of infection then dampens due to the following factors:

* As the creatures wander around, they become less densely packed together, which reduces the frequency at which they transmit disease to one another.

* If a creature eventually dies from an infection, that stops it from continuing to spread the disease, because it gets removed from the world upon its death.

* If the creature recovers, it can be reinfected, but by then the creatures have already spread out enough to prevent rapid chain reactions from occuring.

All of these conditions are effected by a number of variables, including population size, population density, duration of sick time, number of initially infected creatures, speed of movement of the creatures, and the probability of death vs. recovery in the infected population. In addition to this, the whole system is subject to some degree of fluctuation due to the randomness in both the movement and initial layout of the population.

Taking a purely analytical approach towards thinking through the relationships between all of these variables would be a challenging task to say the least. However, it does not take much specialized knowledge at all to model this problem using StarLogo-style creatures. In fact, the code below is all you need to implement this scenario. Try reading it one rule at a time while looking at the animation, and you should be able to piece together the main concepts even if you've never heard of StarLogo before reading this article:

```ruby
Terrarium::Scenario.define do
  healthy_color = :cyan
  sick_color    = :yellow

  initial_population = 200
  crowd_range        = 5..15
  sick_time          = 5
  infection_density  = 0.02
  movement_speed     = 0.2
  
  create_creatures(initial_population)
  
  # rule 1: arrange a group of healthy creatures into a crowd
  creatures do 
    lt rand(0..359)
    fd rand(crowd_range)

    data[:sick_time] = 0
    set_color healthy_color
  end

  # rule 2: infect some creatures
  creatures do
    with_probability(infection_density) do
      set_color sick_color
      
      data[:sick_time] = sick_time
    end
  end

  # rule 3: allow the creatures to move about randomly
  creatures! do
    lt rand(1..40)
    rt rand(1..40)

    fd movement_speed
  end

  # rule 4: spread disease on contact
  creatures! do
    next unless color == healthy_color

    if nearby_creatures.any? { |e| e.color == sick_color }
      set_color sick_color
      
      data[:sick_time] = sick_time
    end
  end

  # rule 5: recover or die based on probability 
  creatures!(1) do
    next unless color == sick_color

    if data[:sick_time] > 0
      data[:sick_time] -= 1 
    else
      coinflip ? set_color(healthy_color) : destroy
    end
  end
end
```

Because it's the live interactions in this system that are complex and not its rules, you cannot easily predict the patterns that will emerge from this program by simply reading its source code. However, by repeatedly running the program and testing various assumptions you have about the system, you can rapidly gain an intuitive sense for the patterns that arise. In that sense, exploratory programming environments can have an effect similar to that of plotting a mathematical formula: although they can't give you a precise answer to your question, they can very quickly communicate the main points of a story.

## Scenario III: Rabbits in a cabbage patch

As you may have already guessed, StarLogo's data model doesn't just give you creatures and patches, but it also supports interactions between the two. Because both the creatures and patches can encapsulate arbitrarily complex data, and because StarLogo provides a solid API for various kinds of common tasks, the richness of behavior that can be expressed through these interactions is mind boggling.

The full StarLogo environment can tackle problems like ant foraging behavior with ease, a problem that I labored with for weeks and spent two issues of Practicing Ruby on ([Issue 5.8](https://practicingruby.com/articles/92) and [Issue 5.9](https://practicingruby.com/articles/93)). However, the features I've ported from StarLogo into the Terrarium project are somewhat limited, so we'll tackle a more basic scenario that will still give you a sense of how creatures and patches can interact with one another.

We'll now take a stab at implementing a simple ecosystem in which hungry rabbits wander around doing what rabbits tend to do: eating, procreating, and dying. This is the sort of predator/prey modeling problem that you might find on a school math test, but we'll approach it informally rather than brushing up on our differential equations.

Here are the rules that will get our ecosystem up and running:

1. Create a cabbage patch by randomly coloring some patches green
2. On each iteration of the simulation, give each patch a small chance to sprout cabbage, facilitating regrowth.
3. Arrange a crowd of rabbits in the cabbage patch.
4. Allow the rabbits to wander randomly around the cabbage patch
5. Rabbits eat any cabbage they encounter. This sets the patch color back to black, and increases the energy of the rabbits.
6. Rabbits gradually lose energy over time. If their energy is fully depleted, they die.
7. Rabbits also breed (asexually!) when they have enough energy. The parent's energy is reduced, and then it produces an exact clone of itself at its current location.

Once set into action, these constaints give rise to the dynamic system you see in the animation below. To make sense of what's going on, ignore the rabbits and focus on the oscillating growing and shrinking of the cabbage patch:

![](http://i.imgur.com/3iese1f.gif)

What you're seeing happen here is a basic cycle that tends to proceed in the following fashion:

* Whenever the rabbits have plenty of cabbage to eat, they breed, and their population numbers rise.

* As the rabbit population rises, the cabbage gets eaten more rapidly, reducing the amount of total food available to the rabbits in the cabbage patch.

* As food sources dwindle, rabbits tend to stop breeding and some also die of starvation, causing their population levels to drop.

* A smaller rabbit population leads to slower cabbage consumption, which results in rapid regrowth and plenty of cabbage for the rabbits to eat.

* This in turn leads the rabbits to stop dying from starvation and start breeding again, starting the cycle all over again.

The fact that we've reproduced this cycle is not a particularly profound result: you could have guessed it without ever bothering to create a simulation. However, if you treat the basic problem as a starting point and then continue your explorations from there, many more surprising results can be found. 

In my casual experiments I found that the system is surprisingly tolerant to singular catastrophic events (such as killing off 90% of the rabbits or the cabbage), because the two populations naturally force each other into balance. However, very small changes to the rate of cabbage regrowth, or to the amount of energy the rabbits gain from eating the cabbage can have disasterous effects that lead to extinction. I found these patterns interesting, because they were opposite to my intuition. 

Perhaps a more significant point though is that I doubt I would have even thought to try out those ideas if I were working with a formal equation rather than a dynamic and lively visualization. Because I'm not a visually-oriented learner, this really surprised me!

The full source code for this scenario is shown below, and you're should skim it at least, but you don't need to get bogged down in the details unless you plan to play around with StarLogo or my Terrarium engine after you're done reading this article. If you're feeling a bit tired by now, you can skip right past it to the next section without losing too much.  

```ruby
Terrarium::Scenario.define do
  cabbage_density    = 0.5 
  regrowth_rate      = 0.02
  initial_population = 200
  initial_energy     = 8
  food_energy        = 5
  hatch_threshold    = 10
  hatched_energy     = 0.25

  cabbage_color = :green
  rabbit_color  = :white
  soil_color    = :black

  # rule 1: create cabbage patch
  patches do 
    set_color soil_color
    with_probability(cabbage_density) { set_color(cabbage_color) } 
  end

  # rule 2: cabbage regrowth
  patches! do
    with_probability(regrowth_rate) { set_color(cabbage_color) } 
  end

  create_creatures(200)

  # rule 3: arrange a crowd of rabbits
  creatures do 
    lt rand(0..359)
    fd rand(5..25)

    data[:energy] = initial_energy

    set_color rabbit_color
  end

  # rule 4: let the rabbits wander
  creatures! { rt(rand(1...40)); lt(rand(1..40)); fd(1) }
  
  # rule 5: rabbits eat any cabbage they encounter, gaining energy
  creatures! do 
    update_patch do |patch| 
      if patch.color == cabbage_color
        patch.set_color soil_color
        data[:energy] += food_energy
      end
    end
  end
  
  # rule 6: the rabbits are always losing energy
  creatures! { data[:energy] -= 1 }

  # rule 7: when the rabbits run out of energy, they die
  creatures! { destroy if data[:energy] < 1 }

  # rule 8: when rabbits have enough energy, they clone themselves
  #         (but it costs them some energy)
  creatures! do 
    if data[:energy] > hatch_threshold
      data[:energy] *= hatched_energy

      hatch
    end
  end
end
```

## Exploratory programming as a first-class paradigm?

Even though we've managed to pack a lot of interesting behavior into a small amount of code, the examples I've shown here barely scratch the surface of StarLogo and capabilities. While my Terrarium engine is nothing more than a poor man's implementation of a few of StarLogo's features, the full StarLogo language is elegantly designed and carefully thought out.

But the goal of this article was not to introduce you to a shiny piece of technological infrastucture, it was meant to get you thinking about a different kind of workflow than what we tend to use day to day.  Even through the smudged window I've had you look through, it should be clear to see that the style of programming used in StarLogo has several powerful benefits:

1. Thoughts can be expressed directly
2. Feedback is given continuously 
3. Failure comes at a very low cost
4. The problem domain is well constrained
5. Objects can be directly acted upon

While most of the tools I use when I'm programming have at least some of these positive traits, it's rare to experience the effect of all of them simultaneously. However, a few positive examples do come 
to mind. In particular, the various web browser development tools (like Firebug or the tools that ship with Chrome) support this kind of workflow.

When it comes to frontend web development tools, I've always been amazed at how much it is possible to incrementally evolve a design by tweaking various page elements until you're happy with them. I think that much of the effectiveness of this technique is due to the benefits listed above. Here is a specific example to illustrate that point:

1. If you want to change a font size of a given block of text, it's as easy as clicking that text and editing a single attribute.

2. You see the results immediately on your screen. 

3. If you don't like the results, you can easily revert your changes. And if you made a mistake when you were editing things, it should be immediately obvious based on what does (or doesn't) get displayed on the screen.

4. Although the environment is very sophisticated, the scope is constrained enough where the available actions are fairly clear at any given point in time. 

5. Finally, because you are often looking at things within the scope of a single element that you are working with directly, you can use extremely localized thinking without harmful consequences.

Unfortunately, I can't easily come up with similar examples when it comes to backend web frameworks. If you narrow the scope, similar workflows can be applied to very simple HTTP services running on Sinatra, but once you need anything more complex than that it becomes much too broad of a problem to solve.

To be fair, Rails has some elements baked into it that facilitate a certain amount of exploratory programming (the console, scaffolding, etc.). However, these features have always felt to me as if they were not taken nearly far enough, and that there is still room for a much higher level toolkit, even if it would only be useful for rapid prototyping. 

In an ideal world, I would love to be able to describe a useful full-stack feature in a web application in a dozen lines or less, but I've never seen anything that gets me even close to that level of abstraction. Of course, web architecture is sufficiently obtuse to make this a genuinely hard problem to solve, so I'm not surprised that there isn't an obvious solution out there just yet.

But web programming (particularly general-purpose web programming) is really at a lower level than where this paradigm really could shine. It seems to me that there is nearly infinite possibility for what one might call "domain-specific development environments". For example, could we build programmable tools for book publishers that sit somewhere between a WYSIWYG editor and DocBook XML? Could we build drop-in management panels for business metrics that can be programmed at a high enough level that an analyst could use them with minimal help from their programming team? Is there hope that we can put these kinds of high-powered but easy-to-use tools into the hands of musicians, artists, teachers, and charity volunteers?

Perhaps the best use of a general purpose programming language it to build domain-specific environments that help cross a bridge from low-level infrastructure to high-level ideas. But because this is all just a pie-in-the-sky dream that may never end up becoming a reality, I will let you be the judge! Please share your thoughts in the comments below.


================================================
FILE: articles/v7/002-http-server.md
================================================
*This article was written by Luke Francl, a Ruby developer living in
San Francisco. He is a developer at [Swiftype](https://swiftype.com) where he
works on everything from web crawling to answering support requests.*

Implementing a simpler version of a technology that you use every day can
help you understand it better. In this article, we will apply this
technique by building a simple HTTP server in Ruby.

By the time you're done reading, you will know how to serve files from your
computer to a web browser with no dependencies other than a few standard
libraries that ship with Ruby. Although the server
we build will not be robust or anywhere near feature complete,
it will allow you to look under the hood of one of the most fundamental
pieces of technology that we all use on a regular basis.

## A (very) brief introduction to HTTP

We all use web applications daily and many of us build
them for a living, but much of our work is done far above the HTTP level.
We'll need come down from the clouds a bit in order to explore
what happens at the protocol level when someone clicks a 
link to *http://example.com/file.txt* in their web browser. 

The following steps roughly cover the typical HTTP request/response lifecycle:

1) The browser issues an HTTP request by opening a TCP socket connection to
`example.com` on port 80. The server accepts the connection, opening a
socket for bi-directional communication.

2) When the connection has been made, the HTTP client sends a HTTP request:

```
GET /file.txt HTTP/1.1
User-Agent: ExampleBrowser/1.0
Host: example.com
Accept: */*
```

3) The server then parses the request. The first line is the Request-Line which contains
the HTTP method (`GET`), Request-URI (`/file.txt`), and HTTP version (`1.1`).
Subsequent lines are headers, which consists of key-value pairs delimited by `:`. 
After the headers is a blank line followed by an optional message body (not shown in
this example).

4) Using the same connection, the server responds with the contents of the file:

```
HTTP/1.1 200 OK
Content-Type: text/plain
Content-Length: 13
Connection: close

hello world
```

5) After finishing the response, the server closes the socket to terminate the connection.

The basic workflow shown above is one of HTTP's most simple use cases,
but it is also one of the most common interactions handled by web servers.
Let's jump right into implementing it!

## Writing the "Hello World" HTTP server

To begin, let's build the simplest thing that could possibly work: a web server
that always responds "Hello World" with HTTP 200 to any request. The following
code mostly follows the process outlined in the previous section, but is
commented line-by-line to help you understand its implementation details:

```ruby
require 'socket' # Provides TCPServer and TCPSocket classes

# Initialize a TCPServer object that will listen
# on localhost:2345 for incoming connections.
server = TCPServer.new('localhost', 2345)

# loop infinitely, processing one incoming
# connection at a time.
loop do

  # Wait until a client connects, then return a TCPSocket
  # that can be used in a similar fashion to other Ruby
  # I/O objects. (In fact, TCPSocket is a subclass of IO.)
  socket = server.accept

  # Read the first line of the request (the Request-Line)
  request = socket.gets

  # Log the request to the console for debugging
  STDERR.puts request

  response = "Hello World!\n"

  # We need to include the Content-Type and Content-Length headers
  # to let the client know the size and type of data
  # contained in the response. Note that HTTP is whitespace
  # sensitive, and expects each header line to end with CRLF (i.e. "\r\n")
  socket.print "HTTP/1.1 200 OK\r\n" +
               "Content-Type: text/plain\r\n" +
               "Content-Length: #{response.bytesize}\r\n" +
               "Connection: close\r\n"

  # Print a blank line to separate the header from the response body,
  # as required by the protocol.
  socket.print "\r\n"

  # Print the actual response body, which is just "Hello World!\n"
  socket.print response

  # Close the socket, terminating the connection
  socket.close
end
```
To test your server, run this code and then try opening `http://localhost:2345/anything`
in a browser. You should see the "Hello world!" message. Meanwhile, in the output for
the HTTP server, you should see the request being logged:

```
GET /anything HTTP/1.1
```

Next, open another shell and test it with `curl`:

```
curl --verbose -XGET http://localhost:2345/anything
```

You'll see the detailed request and response headers:

```
* About to connect() to localhost port 2345 (#0)
*   Trying 127.0.0.1... connected
* Connected to localhost (127.0.0.1) port 2345 (#0)
> GET /anything HTTP/1.1
> User-Agent: curl/7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7
              OpenSSL/0.9.8r zlib/1.2.3
> Host: localhost:2345
> Accept: */*
>
< HTTP/1.1 200 OK
< Content-Type: text/plain
< Content-Length: 13
< Connection: close
<
Hello world!
* Closing connection #0
```

Congratulations, you've written a simple HTTP server! Now we'll 
build a more useful one.

## Serving files over HTTP

We're about to build a more realistic program that is capable of 
serving files over HTTP, rather than simply responding to any request
with "Hello World". In order to do that, we'll need to make a few 
changes to the way our server works.

For each incoming request, we'll parse the `Request-URI` header and translate it into
a path to a file within the server's public folder. If we're able to find a match, we'll
respond with its contents, using the file's size to determine the `Content-Length`,
and its extension to determine the `Content-Type`. If no matching file can be found,
we'll respond with a `404 Not Found` error status.

Most of these changes are fairly straightforward to implement, but mapping the
`Request-URI` to a path on the server's filesystem is a bit more complicated due
to security issues. To simplify things a bit, let's assume for the moment that a
`requested_file` function has been implemented for us already that can handle
this task safely. Then we could build a rudimentary HTTP file server in the following way:

```ruby
require 'socket'
require 'uri'

# Files will be served from this directory
WEB_ROOT = './public'

# Map extensions to their content type
CONTENT_TYPE_MAPPING = {
  'html' => 'text/html',
  'txt' => 'text/plain',
  'png' => 'image/png',
  'jpg' => 'image/jpeg'
}

# Treat as binary data if content type cannot be found
DEFAULT_CONTENT_TYPE = 'application/octet-stream'

# This helper function parses the extension of the
# requested file and then looks up its content type.

def content_type(path)
  ext = File.extname(path).split(".").last
  CONTENT_TYPE_MAPPING.fetch(ext, DEFAULT_CONTENT_TYPE)
end

# This helper function parses the Request-Line and
# generates a path to a file on the server.

def requested_file(request_line)
  # ... implementation details to be discussed later ...
end

# Except where noted below, the general approach of
# handling requests and generating responses is
# similar to that of the "Hello World" example
# shown earlier.

server = TCPServer.new('localhost', 2345)

loop do
  socket       = server.accept
  request_line = socket.gets

  STDERR.puts request_line

  path = requested_file(request_line)

  # Make sure the file exists and is not a directory
  # before attempting to open it.
  if File.exist?(path) && !File.directory?(path)
    File.open(path, "rb") do |file|
      socket.print "HTTP/1.1 200 OK\r\n" +
                   "Content-Type: #{content_type(file)}\r\n" +
                   "Content-Length: #{file.size}\r\n" +
                   "Connection: close\r\n"

      socket.print "\r\n"

      # write the contents of the file to the socket
      IO.copy_stream(file, socket)
    end
  else
    message = "File not found\n"

    # respond with a 404 error code to indicate the file does not exist
    socket.print "HTTP/1.1 404 Not Found\r\n" +
                 "Content-Type: text/plain\r\n" +
                 "Content-Length: #{message.size}\r\n" +
                 "Connection: close\r\n"

    socket.print "\r\n"

    socket.print message
  end

  socket.close
end
```

Although there is a lot more code here than what we saw in the
"Hello World" example, most of it is routine file manipulation
similar to the kind we'd encounter in everyday code. Now there
is only one more feature left to implement before we can serve
files over HTTP: the `requested_file` method.

## Safely converting a URI into a file path

Practically speaking, mapping the Request-Line to a file on the 
server's filesystem is easy: you extract the Request-URI, scrub 
out any parameters and URI-encoding, and then finally turn that 
into a path to a file in the server's public folder:

```ruby
# Takes a request line (e.g. "GET /path?foo=bar HTTP/1.1")
# and extracts the path from it, scrubbing out parameters
# and unescaping URI-encoding.
#
# This cleaned up path (e.g. "/path") is then converted into
# a relative path to a file in the server's public folder
# by joining it with the WEB_ROOT.
def requested_file(request_line)
  request_uri  = request_line.split(" ")[1]
  path         = URI.unescape(URI(request_uri).path)

  File.join(WEB_ROOT, path)
end
```

However, this implementation has a very bad security problem that has affected
many, many web servers and CGI scripts over the years: the server will happily
serve up any file, even if it's outside the `WEB_ROOT`.

Consider a request like this:

```
GET /../../../../etc/passwd HTTP/1.1
```

On my system, when `File.join` is called on this path, the ".." path components
will cause it escape the `WEB_ROOT` directory and serve the `/etc/passwd` file.
Yikes! We'll need to sanitize the path before use in order to prevent this
kind of problem.

> **Note:** If you want to try to reproduce this issue on your own machine,
you may need to use a low level tool like *curl* to demonstrate it. Some browsers change the path to remove the ".." before sending a request to the server.

Because security code is notoriously difficult to get right, we will borrow our
implementation from [Rack::File](https://github.com/rack/rack/blob/master/lib/rack/file.rb).
The approach shown below was actually added to `Rack::File` in response to a [similar
security vulnerability](http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2013-0262) that
was disclosed in early 2013:

```ruby
def requested_file(request_line)
  request_uri  = request_line.split(" ")[1]
  path         = URI.unescape(URI(request_uri).path)

  clean = []

  # Split the path into components
  parts = path.split("/")

  parts.each do |part|
    # skip any empty or current directory (".") path components
    next if part.empty? || part == '.'
    # If the path component goes up one directory level (".."),
    # remove the last clean component.
    # Otherwise, add the component to the Array of clean components
    part == '..' ? clean.pop : clean << part
  end

  # return the web root joined to the clean path
  File.join(WEB_ROOT, *clean)
end
```

To test this implementation (and finally see your file server in action), 
replace the `requested_file` stub in the example from the previous section 
with the implementation shown above, and then create an `index.html` file 
in a `public/` folder that is contained within the same directory as your
server script. Upon running the script, you should be able to 
visit `http://localhost:2345/index.html` but NOT be able to reach any
files outside of the `public/` folder.

## Serving up index.html implicitly

If you visit `http://localhost:2345` in your web browser, you'll see a 404 Not
Found response, even though you've created an index.html file. Most real web
servers will serve an index file when the client requests a directory. Let's
implement that.

This change is more simple than it seems, and can be accomplished by adding
a single line of code to our server script:

```diff
# ...
path = requested_file(request_line)

+ path = File.join(path, 'index.html') if File.directory?(path)

if File.exist?(path) && !File.directory?(path)
# ...
```

Doing so will cause any path that refers to a directory to have "/index.html" appended to
the end of it. This way, `/` becomes `/index.html`, and `/path/to/dir` becomes
`path/to/dir/index.html`.

Perhaps surprisingly, the validations in our response code do not need
to be changed. Let's recall what they look like and then examine why
that's the case:

```ruby
if File.exist?(path) && !File.directory?(path)
  # serve up the file...
else
  # respond with a 404
end
```

Suppose a request is received for `/somedir`. That request will automatically be converted by our server into `/somedir/index.html`. If the index.html exists within `/somedir`, then it will be served up without any problems. However, if `/somedir` does not contain an `index.html` file, the `File.exist?` check will fail, causing the server to respond with a 404 error code. This is exactly what we want!

It may be tempting to think that this small change would make it possible to remove the `File.directory?` check, and in normal circumstances you might be able to safely do with it. However, because leaving it in prevents an error condition in the edge case where someone attempts to serve up a directory named `index.html`, we've decided to leave that validation as it is.

With this small improvement, our file server is now pretty much working as we'd expect it to. If you want to play with it some more, you can grab the [complete source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/tree/master/v7/002) from GitHub.

## Where to go from here

In this article, we reviewed how HTTP works, then built a simple web
server that can serve up files from a directory. We've also examined
one of the most common security problems with web applications and
fixed it. If you've made it this far, congratulations! That's a lot
to learn in one day.

However, it's obvious that the server we've built is extremely limited.
If you want to continue in your studies, here are a few recommendations
for how to go about improving the server:

* According to the HTTP 1.1 specification, a server must minimally
respond to GET and HEAD to be compliant. Implement the HEAD response.
* Add error handling that returns a 500 response to the client
if something goes wrong with the request.
* Make the web root directory and port configurable.
* Add support for POST requests. You could implement CGI by executing
a script when it matches the path, or implement 
the [Rack spec](http://rack.rubyforge.org/doc/SPEC.html) to
let the server serve Rack apps with `call`.
* Reimplement the request loop using [GServer](http://www.ruby-doc.org/stdlib-2.0/libdoc/gserver/rdoc/GServer.html)
(Ruby's generic threaded server) to handle multiple connections.

Please do share your experiences and code if you decide to try any of
these ideas, or if you come up with some improvement ideas of your own.
Happy hacking!

*We'd like to thank Eric Hodel, Magnus Holm, Piotr Szotkowski, and 
Mathias Lafeldt for reviewing this article and providing feedback 
before we published it.*

> NOTE: If you'd like to learn more about this topic, consider doing the Practicing Ruby self-guided course on [Streams, Files, and Sockets](https://practicingruby.com/articles/study-guide-1?u=dc2ab0f9bb). You've already completed one of its reading exercises by working through this article!

> SEE ALSO: A [similar HTTP server](https://github.com/emilyhorsman/practicing-ruby-examples/blob/v7_002python/v7/002python27/http_server.py) written in Python, contributed by [Emily Horsman](http://emilyhorsman.com/).


================================================
FILE: articles/v7/003-stats.md
================================================
> This issue was a collaboration with my wife, Jia Wu. Jia is an associate scientist at the Yale Child Study Center, where she spends a good portion of her time analyzing brainwave data from various EEG experiments. Although this article focuses on very basic concepts, her background in statistical programming was very helpful whenever I got stuck on something. That said, if you find any mistakes in this article, you can blame me, not her.

One human quirk that fascinates me is the huge disparity between our moment-to-moment experiences and our perception  of past events. This is something that I've read about a lot in pop-psych books, and also is one of the main reasons that I practice insight meditation. However, it wasn't until I read Daniel Kahneman's book "Thinking, Fast and Slow" that I realized just how strongly separated our *experiencing self* is from our *remembering self*. 

In both Kahneman's book and [his talk at TED
2010](http://www.ted.com/talks/daniel_kahneman_the_riddle_of_experience_vs_memory.html),
he uses a striking example comparing two colonoscopy patients who recorded their
pain levels periodically throughout their procedure. Although modern sedation
techniques have made this a much less painful procedure, no anethesia was used
during this study, which pretty much guaranteed that both patients would
be in for an unpleasant experience.

From the data Kahneman shows, the first patient had a much shorter procedure 
and reported much less overall pain than the 
second patient. However, when asked later about how painful their colonoscopy 
were, the first patient remembered it to be much more unpleasant than 
the second patient did. How can that be?

As it turns out, how an event ends has a lot to do with how we will perceive the overall experience when we recall it down the line. In the colonoscopy study, the first patient reported a high pain spike immediately before the end of their procedure, where the second patient had pain that was gradually reduced before the procedure ended. This is the explanation Kahneman offers as to why the first patient remembered their colonoscopy to be far worse of an experience than the second patient remembered it to be. 

This disparity between experience and memory isn't just a one-off observation -- it's a robust finding, and it is has been repeated in many different contexts. The lesson to be learned here is that we cannot trust our remembering mind to give a faithful account of the things we experience day-to-day. The unfortunate cost that comes along with this reality is that we're not as good about making judgements about our own well being as we could be if we did not have this cognitive limitation.

I thought about this idea for a long time, particularly as it related to my day-to-day happiness. Like most software developers (and probably *all* writers), my work has a lot of highs and lows to it -- so my gut feeling was that my days could be neatly divided into good days and bad days. But because Kahneman had taught me that my intuitions couldn't be trusted, I eventually set out to turn this psychological problem into an engineering problem by recording and analyzing my own mood ratings over time.

## Designing an informal experiment

I wanted my mood study to be rigorous enough to be meaningful on a personal level, but I had no intentions of conducting a tightly controlled scientific study. What I really wanted was to build a simple breadcrumb trail of mood ratings so that I didn't need to rely on memory alone to gauge my overall sense of well-being over time.

After thinking through various data collection strategies, I eventually settled on SMS messages as my delivery mechanism. The main reason for going this route was that I needed a polling device that could follow me everywhere, but one that wouldn't badly disrupt whatever I was currently doing. Because I use a terrible phone that pretty much can only be used for phone calls and texting, this approach made it possible for me to regularly update my mood rating without getting sucked into all the things that would distract me on a computer.

To make data entry easy, I used a simple numerical scale for tracking my mood:

* Very Happy (9): No desire to change anything about my current experience.
* Happy (7-8):  Pleased by the current experience, but may still be slightly tired, distracted, or anxious.
* Neutral (5-6): Not bothered by my current experience, but not necessarily enjoying it.
* Unhappy (3-4): My negative feelings are getting in the way of me doing what I want to do.
* Very Unhappy (1-2): Unable to do what I want because I am overwhelmed with negative feelings.

Originally I had intended to collect these mood updates over the course of several weeks without any specific questions in mind. However, Jia convinced me that having at least a general sense of what questions I was interested in would help me organize the study better -- so I started to think about what I might be able to observe from this seemingly trivial dataset.

After a short brainstorming session, we settled on the following general questions:

* How stable is my mood in general? In other words, how much variance is there over a given time period?
* Are there any patterns in the high and low points that I experience each day? How far apart are the two?
* Does day of the week and time of day have any effect on my mood?

These questions helped me ensure that the data I intended to collect was sufficient. Once we confirmed that was the case, we were ready to start writing some code!

## Building the necessary tools

To run this study, I used two small toolchains: one for data collection, and one for reporting.

The job of the data collection toolchain was primarily to deal with sending and receiving text messages at randomized intervals. It stored my responses into database records similar to what you see below:

```
[{:id=>485, :message=>"8", :recorded_at=>1375470054},
 {:id=>484, :message=>"8", :recorded_at=>1375465032},
 {:id=>483, :message=>"8", :recorded_at=>1375457397},
 {:id=>482, :message=>"9", :recorded_at=>1375450750},
 {:id=>481, :message=>"8", :recorded_at=>1375411347}, ...]
```

To support this workflow, I relied almost entirely on external services, including Twilio and Heroku. As a result, the whole data collection toolchain I built consisted of around 80 lines of code spread across two simple [rake tasks](https://github.com/sandal/dwbh/blob/pr-7.3/Rakefile) and a small Sinatra-based [web service](https://github.com/sandal/dwbh/blob/pr-7.3/dwbh.rb). Here's the basic storyline that describes how these two little programs work:

1. Every ten minutes between 8:00am and 11:00pm each day, the randomizer in the `app:remind` task gets run. It has a 1:6 chance of triggering a mood update reminder.

2. Whenever the randomizer sends a reminder, it does so by hitting the `/send-reminder` route on my web service, which causes Twilio to deliver a SMS message to my phone.

3. I respond to those messages with a mood rating. This causes Twilio to fire a webhook that hits the `/record-mood` route on the Sinatra app with the message data as GET parameters. The response data along with a timestamp are then stored in a database for later processing.

4. Some time later, the reporting toolchain will hit the `/mood-logs.csv` route to download a dump of the whole dataset, which includes the raw data shown above along with a few other computed fields that make reporting easier.

After a bit of hollywood magic involving a menagerie of R scripts, some more rake tasks, and a bit of Prawn-based PDF generation code, the reporting toolchain ends up spitting out a [two-page PDF report](http://notes.practicingruby.com/docs/7.3-mood-report.pdf) that looks like what you see below:

[![](http://i.imgur.com/Ersv9fw.png)](http://notes.practicingruby.com/docs/7.3-mood-report.pdf)

We'll be discussing some of the details about how the various graphs get generated and the challenges involved in implementing them later on in this article, but if you want to get a sense of what the Ruby glue code looks in the reporting toolchain, I'd recommend looking at its [Rakefile](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/Rakefile). The tasks it provides allow me to type `rake generate-report` in a console and cause the following chain of events to happen:

1. The latest mood data get downloaded from the Sinatra app in CSV format.

2. All of the R-based graphing scripts are run, outputting a bunch of image files.

3. A PDF is generated to cleanly present those images in a single document.

4. The CSV data and image files are then be deleted, because they're no longer needed.

Between this reporting code and the data aggregation toolchain, I ended up with a system that has been very easy to work with for the many weeks that I have been running this study. The whole user experience boils down to pressing a couple buttons on my phone when I'm prompted to do so, and then typing a single command to generate reports whenever I want to take a look at them.

At a first glance, the way this system is implemented may look a bit like its hung together with shoestrings and glue, but the very loose coupling between its components has made it easy to both work on individual pieces in isolation, and to make significant changes without a ton of rework. It seems like the [worse is better](http://en.wikipedia.org/wiki/Worse_is_better) mantra applies well to this sort of project.

I'd be happy to discuss the design of these two toolchains with you once you've finished this article, but for now let's look at what all those graphs are saying about my mood.

## Analyzing the results

The full report for my mood study consists of four different graphs generated via the R stats language, each of which gives us a different way of looking at the data:

* Figure 1 provides a view of the average mood ratings across the whole time period
* Figure 2 tracks the daily minimum and maximums for the whole time period.
* Figure 3 shows the average mood rating and variance broken out by day of week
* Figure 4 plots the distribution of the different mood ratings at various times of day.

The order above is the same as that of the PDF report, and it is essentially sorted by the largest time scales down to the shortest ones. Since that is a fairly natural way to look at this data, we'll discuss it in the same order in this article.

---

**Figure 1 ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/moving-summary.R)):**

![Summary](http://i.imgur.com/KtdTzkI.jpg)

I knew as soon as I started working on this study that I'd want to somehow capture the general trend of the entire data series, but I didn't anticipate how noisy it would be to [plot nearly 500 data points](http://i.imgur.com/NlIlgMI.png), many of which were too close together to visually distinguish from one another. To lessen the noise, I decided to plot a moving average instead of the individual ratings over time, which is what you see in **Figure 1** above.

It's important to understand the tradeoffs here: by smoothing out the data, I lost the ability to see what the individual ratings were at any given time. However, I gained the ability to easily discern the following bits of useful information:

* How my experiences over a period of a couple days compare to the global average (green horizontal line), and to the global standard deviation (gray horizontal lines). This information could tell me whether my day-to-day experience has been improving or getting worse over time, and also how stable the swings in my mood have been recently compared to what might be considered "typical" for me across a large time span.

* Whether my recent mood updates indicated that my mood was trending upward or downward, and roughly how long I could expect that to last.

Without rigorous statistical analysis and a far less corruptable means of studying myself, these bits of information could never truly predict the future or even be used as the primary basis for decision making. However, the extra information has been helping me put my mind in a historical perspective that isn't purely based on my remembered experiences, and that alone has turned out to be extremely useful to me.

> **Implementation notes ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/moving-summary.R)):**
>
> I chose to use an exponentially-smoothed weighted average here, mostly because I wanted to see the trend line change direction as quickly as possible whenever new points of data hinted that my mood was getting better or worse over time. There are lots of different techniques for doing weighted averages, and this one is actually a little more complicated than some of the other options out there. If I had to implement the computations myself I may have chosen a more simple method. But since an exponential moving average function already existed in the [TTR package](http://rss.acs.unt.edu/Rdoc/library/TTR/html/MovingAverages.html), it didn't really cost me any extra effort to model things this way.

>I had first seen this technique used in [The Hacker's Diet](http://www.fourmilab.ch/hackdiet/www/subsection1_2_4_0_4.html#SECTION0240400000000000000), where it proved to be a useful means of cancelling out the noise of daily weight fluctuations so that you could see if you were actually gaining or losing weight. I was hoping it would have the same effect for me with my mood monitoring, and so far it has worked as well as I expected it would.

>
>It's also worth noting that in this graph, the curve represents something close to a continous time scale. To accomplish this, I converted the UNIX timestamps into fractional days from the moment the study had started. It's not perfect, but it has the neat effect of making visible changes to the graph after even a single new data point has been recorded.

---

**Figure 2 ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/daily-min-max.R)):**


![Min Max](http://i.imgur.com/hv9X1vA.jpg)

In a purely statistical sense, the highest and lowest values reported for each day might not be especially relevant. However, the nature of this particular study made me feel it was important to track them. After all, even if the "average" mood for two days were both around 7, a day where the lowest mood rating was a 1 will certainly be different sort of day than one where the lowest rating was a 5! For this reason, **Figure 2** shows the extreme high and low for each day in the study. This information is useful for the following purposes:

* Determining what my daily peak experiences are like on average. For example, we can see from this data that there was only one day where I didn't report at least a single rating of 7 or higher, and that most days my high point was either an 8 or 9. 

* Determining what my daily low points are like on average. Reading the data shown above, we can see that there were only three days in the entire study that I reported a low rating of 1, but that about one in five days had a low rating of 4 or less. 

* Visualizing the range between high and low points on a daily basis. This can be seen by looking at the space between the two lines: the smaller the distance, the smaller the range of the mood swing for that day.

A somewhat obvious limitation of this visualization is that the range of moods recorded in a day do not necessarily reflect the range of moods actually experienced throughout that day. In most of the other ways I've sliced up the dataset, we can hope that averaging will smooth out some of the ill effects of missing information, but this view in particular can be easily corrupted by a single "missed event" per day. The key point here is that **Figure 2** can only be viewed as a rough sketch of the overall trend, and not a precise picture of day-to-day experience.

> **Implementation notes ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/daily-min-max.R)):**
>
> This was an extremely straightforward graph to produce using standard R functions, so there isn't too much to discuss about it. However, it's worth pointing out for folks who are unfamiliar with R that the support for data aggregation built into the language is excellent. Here is the code that takes the raw mood log entries and rolls them up by daily minimum and maximum:
>
> `data_max <- aggregate(rating ~ day, data, max)`
>
> `data_min <- aggregate(rating ~ day, data, min)`
>
> Because R is such a special-purpose language, it includes many neat data manipulation features similar to this one.

---

**Figure 3 ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/day-of-week.R)):**


![Day of week](http://i.imgur.com/yTSuSLW.jpg)

This visualization shows the mean and standard deviation for all mood updates broken out by day of week. Looking at my mood data in this way provides the following information:

* Whether or not certain days of the week have better mood ratings on average than others.
* Whether or not certain days of the week have more consistent mood ratings than others.
* What the general ups-and-downs look like in a typical week in my life

If you look at the data points shown in **Figure 3** above, you'll see that the high points (Monday and Friday) stand out noticeably from the low points (Wednesday and Saturday). However, to see whether that difference is significant or not, we need to be confident that what we're observing isn't simply a result of random fluctuations and noise. This is where some basical statistical tests are needed.

To test for difference in the averages between days, we ran a one-way ANOVA test, and then did a pairwise test with FDR correction. Based on these tests we were able to show a significant difference (p < 0.01) between Monday+Wednesday, Monday+Saturday, and Friday+Saturday. The difference between Wednesday+Friday was not significant, but was close (p = 0.0547). I don't want to get into a long and distracting stats tangent here, but if you are curious about what the raw results of the computations ended up looking like, take a look at [this gist](https://gist.github.com/sandal/6147469).

> **Implementation notes ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/day-of-week.R)):**

> An annoying thing about R is that despite having very powerful graphing functionality built into the language, it does not have a standard feature for drawing error bars. We use a small [helper function](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/helpers.R#L2-L5) to handle this work, which is based on code we found in [this blog post](http://bmscblog.wordpress.com/2013/01/23/error-bars-with-r ).

> Apart from the errorbars issue and the calls to various statistical reporting functions, this code is otherwise functionally similar to what is used to generate **Figure 2**.

---

**Figure 4 ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/frequency.R)):**


![Frequency](http://i.imgur.com/cbJxa8K.png)

The final view of the data shows the distribution of mood ratings broken out by time of day. Because the number of mood ratings recorded in each time period weren't evenly distributed, I decided to plot the frequency of the mood rating values by percentage rather than total count for each. Presenting the data this way allows the five individual graphs to be directly compared to one another, because it ensures that they all use the same scale.

Whenever I look at this figure, it provides me with the following information:

* How common various rating values are, broken out by time of day.
* How stable my mood is at a given time of day
* What parts of the day are more or less enjoyable than others on average

The most striking pattern I saw from the data shown above was that the percentage of negative and negative-leaning ratings gradually increased throughout the day, up until 8pm, and then they rapidly dropped back down to similar levels as the early morning. In the 8am-11am time period, mood ratings of five or under account for about 7% of the overall distribution, but in the 5pm to 8pm slot, they account for about 20% of the ratings in that time period. Finally, the whole thing falls off a cliff in the 8pm-11pm slot and the ratings of five or lower drop back down to under 7%. It will be interesting to see whether or not this pattern holds up over time.

> **Implementation notes ([view source code](https://github.com/elm-city-craftworks/practicing-ruby-examples/blob/pr-7.3/v7/003/frequency.R)):**

> Building this particular visualization turned out to be more complicated than I had hoped for it to be. It may be simply due to my relative inexperience with R, but I found the `hist()` function to be cumbersome to work with due to a bunch of awkward defaults. For example, the default settings caused the mood ratings of 1 and 2 to be grouped together, for reasons I still only vaguely understand. Also, the way that I implemented grouping by time period can probably be improved greatly.

> Feedback on how to clean up this code is welcome!

## Mapping a story to the data

Because this was a very personal study, and because the data itself has very low scientific validity, I shouldn't embellish the patterns I observed with wild guesses about their root causes. However, I can't resist, so here are some terrible narrations for you to enjoy!

*I learned that although genuine bad days are actually somewhat rare in my life, when they're bad, they can be really bad:*

![](http://i.imgur.com/j0p6Nie.png)

*I learned that I probably need to get better at relaxing during my days off:* 

![](http://i.imgur.com/ktCRWsC.png)

*I learned that like most people, as I get tired it's easier for me to get into a bad mood, and that rest helps recharge my batteries:*

![](http://i.imgur.com/F3nfsHf.gif)

Although these lessons may not be especially profound, it is fun to see even rudimentary evidence for them in the data I collected. If I keep doing this study, I can use these observations to try out some different things in the hopes of optimizing my day-to-day sense of well being.

## Reflections

Given that this article started with a story about a colonoscopy and ended with an animated GIF, I think it's best to leave it up to you to draw your own conclusions about what you can take away from it. But I would definitely love to hear your thoughts on any part of this project, so please do share them!


================================================
FILE: articles/v7/004-incremental.md
================================================
When you look at this photograph of highway construction, what do you see?

![](http://i.imgur.com/eej11xZ.jpg)

If your answer was "ugly urban decay", then you are absolutely right! But because this construction project is only a few miles away from my house, I can tell you a few things about it that reveal a far more interesting story:

* On the far left side of the photo, you can see the first half of a newly constructed suspension bridge. At the time this picture was taken, it was serving five lanes of northbound traffic.

* Directly next to that bridge, cars are driving southbound on what was formerly the northbound side of our old bridge, serving 3 lanes of traffic.

* Dominating the rest of the photograph is the mostly deconstructed southbound side of our old bridge, a result of several months of active work.

So with those points in mind, what you are looking at here is an *incremental improvement* to a critical traffic bottleneck along the main route between New York City and Boston. This work was accomplished with hardly any service interruptions, despite the incredibly tight constraints on the project. This is legacy systems work at the highest level, and there is much we can learn from it that applies equally well to code as it does to concrete.

## Case study: Improving one of Practicing Ruby's oldest features

Now that we've set the scene with a colorful metaphor, it is time to see how these ideas can influence the way we work on software projects. To do that, I will walk you through a major change we made to practicingruby.com that involved a fair amount of legacy coding headaches. You will definitely see some ugly code along the way, but hopefully a bit of cleverness will shine through as well.

The improvement that we will discuss is a complete overhaul of Practicing Ruby's content sharing features. Although I've encouraged our readers to share our articles openly since our earliest days, several awkward implementation details made this a confusing process:

* You couldn't just copy-paste links to articles. You needed to explictly click a share button that would generate a public share link for you.

* If you did copy-paste an internal link from the website rather than explicitly generating a share link, those who clicked on that link would be immediately asked for registration information without warning. This behavior was a side-effect of how we did authorization and not an intentional "feature", but it was super annoying to folks who encountered it.

* If you visited a public share link while logged in, you'd see the guest view rather than the subscriber view, and you'd need to click a "log in" button to see the comments, navbar, etc.

* Both internal paths and share paths were completely opaque (e.g. "articles/101" and "/articles/shared/zmkztdzucsgv"), making it hard to know what a URL pointed to without
visiting it.
 
Despite these flaws, subscribers did use Practicing Ruby's article sharing mechanism. They also made use of the feature in ways we didn't anticipate -- for example, it became the standard workaround for using Instapaper to read our content offline. As time went on, we used this feature for internal needs as well, whether it was to give away free samples, or to release old content to the public. To make a long story short, one of our most awkward features eventually also became one of the most important.

We avoided changing this system for quite a long while because we always had something else to work on that seemed more urgent. But after enough time had passed, we decided to pay down our debts. In particular, we wanted to make the following changes:

* We wanted to switch to subscriber-based share tokens rather than generating a new share token for each and every article. As long as a token was associated with an active subscriber, it could then be used to view any of our articles.

* We wanted to clean up and unify our URL scheme. Rather than having internal path like "/articles/101" and share path like "/articles/shared/zmkztdzucsgv", we would have a single path for both purposes that looked like this:

```
/articles/improving-legacy-systems?u=dc2ab0f9bb
```

* We wanted to make sure to be smart about authorization. Guests who visited a link with a valid share key would always see the "guest view" of that article, and logged in subscribers would always see the "subscriber view". If a key was invalid or missing, the guest would be explicitly told that the page was protected, rather than dropped into our registration process without warning.

* We wanted to make sure to make our links easy to share by copy-paste, whether it was from anywhere within our web interface, from the browser location bar, or even in the emails we send to subscribers. This meant making sure we put your share token pretty much anywhere you might click on an article link.

Laying out this set of requirements helped us figure out where the destination was, but we knew intuitively that the path to get there would be a long and winding road. The system we initially built for sharing articles did not take any of these concepts into account, and so we would need to find a way to shoehorn them in without breaking old behavior in any significant way. We also would need to find a way to do this *incrementally*, to avoid releasing a ton of changes to our system at once that could be difficult to debug and maintain. The rest of this article describes how we went on to do exactly that, one pull request at a time.

> **NOTE:** Throughout this article, I link to the "files changed" view of pull requests to give you a complete picture of what changed in the code, but understanding every last detail is not important. It's fine to dig deep into some pull requests while skimming or skipping others.

## Step 1: Deal with authorization failures gracefully

When we first started working on practicingruby.com, we thought it would be convenient to automatically handle Github authentication behind the scenes so that subscribers rarely needed to explicitly click a "sign in" button in order to read articles. This is a good design idea, but we only really considered the happy path while building and testing it.

Many months down the line, we realized that people would occasionally share internal links to our articles by accident, rather than explicitly generating public links. Whenever that happened, the visitor would be put through our entire registration process without warning, including:

* Approving our use of Github to authorize their account
* Going through an email confirmation process
* Getting prompted for credit card information

Most would understandably abandon this process part of the way through. In the best case scenario, our application's behavior would be seen as very confusing, though I'm sure for many it felt downright rude and unpleasant. It's a shame that such a bad experience could emerge from what was actually good intentions both on our part and on whoever shared a link to our content in the first place. Think of what a different experience it might have been if the visitor had been redirected to our landing page where they could see the following message:

![](http://i.imgur.com/kA3ePJI.png)

Although that wouldn't be quite as nice as getting free access to an article that someone wanted to share with them, it would at least avoid any confusion about what had just happened. My first attempt at introducing this kind of behavior into the system looked like what you see below:

```ruby
class ApplicationController < ApplicationController::Base
  # ...
  
  def authenticate
    return if current_authorization 
   
    flash[:notice] = 
      "That page is protected. Please sign in or sign up to continue"
      
    store_location
    redirect_to(root_path)
  end
end 
```

We deployed this code and for a few days, it seemed to be a good enough stop-gap measure for resolving this bug, even if it meant that subscribers might need to click a "sign in" button a little more often. However, I realized that it was a bit too naive of a solution when I received an email asking why it was necessary to click "sign in" in order to make the "subscribe" button work. My quick fix had broken our registration system. :cry:

Upon hearing that bad news, I immediately pulled this code out of production after writing a test that proved this problem existed on my feature branch but not in master. A few days later, I put together a quick fix that got my tests passing. My solution was to extract a helper method that decided how to handle authorization failures. The default behavior would be to redirect to the root page and display an error message as we did above, but during registrations, we would automatically initiate a Github authentication as we had done in the past:

```ruby
class ApplicationController < ApplicationController::Base
  # ...
  
  def authenticate
    return if current_authorization 
   
    store_location
    redirect_on_auth_failure
  end
  
  def redirect_on_auth_failure
    flash[:notice] = 
      "That page is protected. Please sign in or sign up to continue"
      
    redirect_to(root_path)
 end
end

class RegistrationController < ApplicationController
  # ...
  
  def redirect_on_auth_failure
    redirect_to login_path 
  end
end 
```

This code, though not especially well designed, seemed to get the job done without too much trouble. It also served as a useful reminder that I should be on the lookout for holes in the test suite, which in retrospect should have been obvious given the awkward behavior of the original code. As they say, hindsight is 20/20!


> HISTORY: Deployed 2013-07-26 and then reverted a few days later due to the registration bug mentioned above. Redeployed on 2013-08-06, then merged three days later. 
>
>[View complete diff](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/145/files)

## Step 2: Add article slugs

When we first started working on practicingruby.com, we didn't put much thought to what our URLs looked like. In the first few weeks, we were rushing to get features like syntax highlighting and commenting out the door while keeping up with the publication schedule, and so we didn't have much energy to think about the minor details.

Even if it made sense at the time, this is one decision I came to regret. In particular, I  disliked the notion that the paths that subscribers saw (e.g. "/articles/101") were completely different than the ones we generated for public viewing (e.g. "/articles/shared/zmkztdzucsgv"), with no direct way to associate the two. When you add in the fact that both of these URL schemes are opaque, it definitely stood out as a poor design decision on our part.

Technically speaking, it would be possible to unify the two different schemes using subscriber tokens without worrying about the descriptiveness of the URLs, perhaps using paths like "/articles/101?u=dc20f9bb". However, since we would need to be messing around with article path generation as it was, it seemed like a good idea to make those paths much more attractive by adding slugs. The goal was to have a path like: "/articles/improving-legacy-systems?u=dc2ab0f9bb". 

Because we knew article slugs would be easy to implement, we decided to build and ship them before moving on to the more complicated changes we had planned to make. The pair of methods below are the most interesting implementation details from this changeset:

```ruby
class Article < ActiveRecord::Base
  # ...

  def self.[](key)
    find_by_slug(key) || find_by_id(key)
  end

  def to_param
    if slug.present?
      slug
    else
      id.to_s
    end
  end
end
```

The `Article[]` method is a drop-in replacement for `Article.find` that allows lookup by slug or by id. This means that both `Article[101]` and `Article['improving-legacy-code']` are valid calls, each of them returning an `Article` object. Because we only call `Article.find()` in a few places in our codebase, it was easy to swap those calls out to use `Article[]` instead.

The `Article#to_params` method is used internally by Rails to generate paths. So wherever `article_url` or `article_path` get called with an `Article` object, this method will be called to determine what gets returned. If the article has a slug associated, it'll return something like "/articles/improving-legacy-code". If it doesn't have a slug set yet, it will return the familiar opaque database ids, i.e. "/articles/101".

There is a bit of an inconsistency in this design worth noting: I chose to override the `to_params` method, but not the `find` method on my model. However, since the former is a method that is designed to be overridden and the latter might be surprising to override, I felt somewhat comfortable with this design decision.

Although it's not worth showing the code for it, I also added a redirect to the new style URLs whenever a slug existed for an article. By doing this, I was able to effectively deprecate the old URL style without breaking existing links. While we won't ever disable lookup by database ID, this at least preserves some consistency at the surface level of the application.

> HISTORY: Deployed 2013-08-16 and then merged the next day. Adding slugs to articles was a manual process that I completed a few days after the feature shipped.
>
> [View complete diff](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/155/files)

## Step 3: Add subscriber share tokens

In theory it should have been nearly trivial to implement subscriber-based share tokens. After all, we were simply generating a random string for each subscriber and then appending it to the end of article URLs as a GET parameter (e.g. "u=dc20f9bb"). In practice, there were many edge cases that would complicate our implementation.

The ideal situation would be to override the `article_path` and `article_url` methods to add the currently logged in user's share token to any article links throughout the application. However, we weren't able to find a single place within the Rails call chain where such a global override would make sense. It would easy enough to get this kind of behavior in both our views and controllers by putting the methods in a helper and then mixing that helper into our ApplicationController, but it wasn't easy to take the same approach in our tests and mailers. To make matters worse, some of the places we wanted to use these path helpers would have access to the ones rails provided by default, but would not include our overrides, and so we'd silently lose the behavior we wanted to add.

We were unable to find an elegant solution to this problem, but eventually settled on a compromise. We built a low level object for generating the URLs with subscriber tokens, as shown below:

```ruby
class ArticleLink
  include Rails.application.routes.url_helpers

  def initialize(article, params)
    self.article = article
    self.params = params
  end

  def path(token)
    article_path(article, params_with_token(token))
  end

  def url(token)
    article_url(article, params_with_token(token))
  end

  private

  attr_accessor :params, :article

  def params_with_token(token)
    {:u => token}.merge(params)
  end
end
```

Then in our `ApplicationHelper`, we added the following bits of glue code:

```ruby
module ApplicationHelper
  def article_url(article, params={})
    return super unless current_user

    ArticleLink.new(article, params).url(current_user.share_token)
  end

  def article_path(article, params={})
    return super unless current_user

    ArticleLink.new(article, params).path(current_user.share_token)
  end
end
```

Adding these simple shims made it so that we got the behavior we wanted in the ordinary use cases of `article_url` and `article_path`, which were in our controllers and views. In our mailers and tests, we opted to use the `ArticleLink` object directly, because we needed to explicitly pass in tokens in those areas anyway. Because it was impossible for us to make this code completely DRY, this convention-based design was the best we could come up with.

As part of this changeset, I modified the redirection code that I wrote when we were introducing slugs to also take tokens into account. If a subscriber visited a link that didn't include a share token, it would rewrite the URL to include their token. This was yet another attempt at introducing a bit of consistency where there previously was none.

> HISTORY: Deployed code to add tokens upon visiting an article on 2013-08-20, then did a second deploy to update the archives and library links the next day, merged on 2013-08-23.
>
> [View complete diff](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/158/files)

## Step 4: Redesign and improve broadcast mailer

I use a very basic web form in our admin panel to send email announcements out to Practicing Ruby subscribers. Originally, this feature relied on sending messages in batches, which was the simple thing to do when we assumed we'd be sending an identical message to everyone:

```ruby
class BroadcastMailer < ActionMailer::Base
  def deliver_broadcast(message={})
    @body = message[:body]

    user_batches(message) do |users|
      mail(
        :to => "gregory@practicingruby.com",
        :bcc => users,
        :subject => message[:subject]
      ).deliver
    end
  end

  private

  def user_batches(message)
    yield(message[:to]) && return if message[:commit] == "Test"

    User.where(:notify_updates => true).to_notify.
      find_in_batches(:batch_size => 25) do |group|
        yield group.map(&:contact_email)
    end
  end
end
```

Despite being a bit of a hack, this code served us well enough for a fairly long time. It even supported a basic "test mode" that allowed me to send a broadcast email to myself before sending it out everyone. However, the design would need to change greatly if we wanted to include share tokens in the article links we emailed to subscribers. We'd need to send out individual emails rather than sending batched messages, and we'd also need to implement some sort of basic mail merge functionality to handle article link generation.

I don't want to get too bogged down in details here, but this changeset turned out to be far more complicated than I expected. For starters, the way we were using `ActionMailer` in our original code was incorrect, and we were relying on undefined behavior without realizing it. Because the `BroadcastMailer` had been working fine for us in production and its (admittedly mediocre) tests were passing, we didn't notice the problem until we attempted to change its behavior. After attempting to introduce code that looked like this, I started to get all sorts of confusing test failures:

```ruby
class BroadcastMailer < ActionMailer::Base
  # NOTE: this is an approximation, but it captures the basic idea...
  def deliver_broadcast(message={})
    @body = message[:body]

    User.where(:notify_updates => true).to_notify.each do |user|
      mail(:to => user.contact_email, :subject => message[:subject]).deliver
    end
  end
end
```

Even though this code appeared to work as expected in development (sending individual emails to each recipient), in my tests, `ActionMailer::Base.deliveries` was returning N copies of the first email sent in this loop. After some more playing around with ActionMailer and semi-fruitless internet searches, I concluded that this was because we weren't using the mailers in the officially sanctioned way. We'd need to change our code so that the mailer returned a `Mail` object, rather than handling the delivery for us.

Because I didn't want that logic to trickle up into the controller, and because I expected things might get more complicated as we kept adding more features to this object, I decided to introduce an intermediate service object to handle some of the work for us, and then greatly simplify the mailer object. I also wanted to make the distinction between sending a test message and sending a message to everyone more explicit, so I took the opportunity to do that as well. The resulting code ended up looking something similar to what you see below:

```ruby
class Broadcaster
  def self.notify_subscribers(params)
    BroadcastMailer.recipients.each do |email|
      BroadcastMailer.broadcast(params, email).deliver
    end
  end

  def self.notify_testers(params)
    BroadcastMailer.broadcast(params, params[:to]).deliver
  end
end

class BroadcastMailer < ActionMailer::Base
  def self.recipients
    User.where(:notify_updates => true).to_notify.map(&:contact_email)
  end

  def broadcast(message, email)
    mail(:to => email,
         :subject => message[:subject])
  end
end
```

With this code in place, I had successfully converted the batch email delivery to individual emails. It was time to move on to adding a bit of code that would give me mail-merge functionality. I decided to use Mustache for this purpose, which would allow me to write emails that look like this:

```
Here is an awesome article I wrote:

{{#article}}improving-legacy-systems{{/article}}
```

Mustache would then run some code behind the scenes and turn that message body into the following output:

```
Here is an awesome article I wrote:

http://practicingruby.com/articles/improving-legacy-systems?u=dc20f9bb
```

As a proof of concept, I wrote a bit of code that handled the article link expansion, but didn't handle share tokens yet. It only took two extra lines in `BroadcastMailer#broadcast` to add this support:

```ruby
class BroadcastMailer < ActionMailer::Base
  # ...
  
  def broadcast(message, email)
    article_finder = ->(e) { article_url(Article[e]) }

    @body = Mustache.render(message[:body], :article => article_finder)

    mail(:to => email,
         :subject => message[:subject])
  end
end
```

I deployed this code in production and sent myself a couple test emails, verifying that the article links were getting expanded as I expected them to. I had planned to work on adding the user tokens immediately after running those live tests, but at that moment realized that I had overlooked an important issue related to performance.

Previous to this changeset, the `BroadcastMailer` was responsible for sending about 16 emails at a time (25 people per email). But now, it would be sending about 400 of them! Even though we use a DelayedJob worker to handle the actual delivery of the messages, it might take some significant amount of time to insert 400 custom-generated emails into the queue. Rather than investigating that problem right away, I decided to get myself some rest and tackle it the next day with Jordan.    

> HISTORY: Deployed on 2013-08-22, and then merged the next day.
>
> [View complete diff](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/162/files)

## Step 5: Test broadcast mailer's performance

Before we could go any farther with our work on the broadcast mailer, we needed to check the performance implications of switching to non-batched emails. We didn't need to do a very scientific test -- we just needed to see how severe the slowdown was. Because our previous code ran without a noticeable delay, pretty much anything longer than a second or two would be concerning to us.

To conduct our test, we first populated our development environment with 2000 users (about 5x as many active users as we had on Practicing Ruby at the time). Then, we posted a realistic email in the broadcast mailer form, and kept an eye on the messages that were getting queued up via the Rails console. After several seconds we hadn't even queued up 100 jobs, so it became clear that performance very well could be a concern.

To double check our estimates, and to form a more realistic test, we temporarily disabled our DelayedJob worker on the server and then ran the broadcast mailer in our live environment. Although the mailer did finish up queuing its messages without the request timing out, it took about half a minute to do so. With this information in hand, we cleared out the test jobs so that they wouldn't actually be delivered, and then spent a bit of time lost in thought.

Ultimately, we learned several important things from this little experiment:

1. The mail building and queuing process was definitely slow enough to worry us.
2. In the worst case scenario, I would be able to deal with a 30 second delay in delivering broadcasts, but we would need to fix this problem if we wanted to unbatch other emails of ours, such as comment notifications.
3. The most straightforward way to deal with this problem would be to run the entire mail building and queuing process in the background.

The first two points were not especially surprising to us, but the third concerned us a bit. While we have had good luck using DelayedJob in conjunction with the MailHopper gem to send email, we had some problems in the past with trying to handle arbitrary jobs with it. We suspected this had to do with some of our dependencies being outdated, but never had time to investigate properly. With our fingers crossed, we decided to hope for the best and plan for the worst.

## Step 6: Process broadcast mails using DelayedJob

Our first stab at backgrounding the work done by
`Broadcaster.notify_subscribers`  was to simply change the call to
`Broadcaster.delay.notify_subscribers`. 

In theory, this small change should have done the
trick: the method is conceptually nothing more than a "fire and forget"
function that did not need to interact in any way with its caller. But after
spending a long time staring at an incredibly confusing error log, we
realized that it wasn't safe to assume that DelayedJob would cleanly serialize
a Rails `params` hash. Constructing our own hash to pass into the
`Broadcaster.notify_subscribers` method resolved those issues, and we ended up
with the following code in `BroadcastsController`:

```ruby
module Admin
  class BroadcastsController < ApplicationController
    def create
      # ...

      # build our own hash to avoid DelayedJob serialization issues
      message = { :subject => params[:subject],
                  :body    => params[:body] } 

      if params[:commit] == "Test"
        message[:to] = params[:to]

        Broadcaster.notify_testers(message)
      else
        Broadcaster.delay.notify_subscribers(message)
      end

      # ...
    end
  end
end
```

After tweaking our test suite slightly to take this change into account, we
were back to green fairly quickly. We experimented with the delayed broadcasts
locally and found that it resolved our slowness issue in the UI. The worker
would still take a little while to build all those mails and get them queued
up, but since it was being done in the background it no longer was much of a
concern to us.

We were cautiously optimistic that this small change might fix our issues, so
we deployed the code to production and did another live test. Unfortunately,
this lead us to a new error condition, and so we had to go back to the drawing board. 
Eventually we came across [this Github issue](https://github.com/collectiveidea/delayed_job/issues/350), which hinted (indirectly) that we might be running into one of the many issues with YAML parsing on Ruby 1.9.2.

We could have attempted to do yet another workaround to avoid updating our
Ruby version, but we knew that this was not the first, second, or even third time that we had been bitten by the fact that we were still running an ancient
and poorly supported version of Ruby. In fact, we realized that wiping the
slate clean and provisioning a whole new VPS might be the way to go, because
that way we could upgrade all of our platform dependencies at once.

So with that in mind, Jordan went off to work on getting us a new production
environment set up, and we temporarily put this particular changeset on hold.
There was still plenty of work for me to do that didn't rely on upgrading our
production environment, so I kept working against our old server while he tried to spin up a new one.

> HISTORY: Deployed for live testing on 2013-08-23 but then immediately pulled
> from production upon failure. Redeployed to our new server on 2013-08-30,
> then merged the following day.
>
> [View complete diff](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/164)

## Step 7: Support share tokens in broadcast mailer

Now that we had investigated the performance issues with the mailer and had a plan in place to fix them, it was time for me to finish what I had planned to work on in the first place: adding share tokens to article links in emails.

The changes to `BroadcastMailer` were fairly straightforward: pass a `User` rather than an email address into the `broadcast` method, and then use `ArticleLink` to generate a customized link based on the subscriber's share token: 

```ruby
class BroadcastMailer < ActionMailer::Base
  def self.recipients
    User.where(:notify_updates => true).to_notify
  end

  def broadcast(message, subscriber)
    article_finder = ->(e) { 
      ArticleLink.new(Article[e]).url(subscriber.share_token) 
    }

    @body = Mustache.render(message[:body], :article => article_finder)

    mail(:to => subscriber.contact_email,
         :subject => message[:subject])
  end
end
```

The only complication of rewiring `BroadcastMailer` this way is that it broke our test mailer functionality. Because the test mailer could send a message to any email address (whether there was an account associated with it or not), we wouldn't be able to look up a valid `User` record to pass to the `BroadcastMailer`. The code below shows my temporary solution to this API compatibility problem:

```ruby
class Broadcaster
   # ...
   
  def self.notify_testers(params)
    subscriber = Struct.new(:contact_email, :share_token)
                       .new(params[:to], "testtoken")

    BroadcastMailer.broadcast(params, subscriber).deliver
  end
end
```

Using a `Struct` object to generate an interface shim as I've done here is not the most elegant solution, but it gets the job done. A better solution would be to create a container object that could be used by both `notify_subscribers` and `notify_testers`, but I wasn't ready to make that design decision yet.

With these changes in place, I was able to do some live testing to verify that we had managed to get share tokens into our article links. Now all that remained was to add the logic that would allow these share tokens to permit guest access to articles.

> HISTORY: Deployed 2013-08-24, then merged on 2013-08-29.
>
> [View complete diff](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/165)

## Step 8: Allow guest access to articles via share tokens

With all the necessary underplumbing in place, I was finally ready to model the new sharing mechanism. The end goal was to support the following behavior:

- Subscribers see the full article w. comments whenever they are logged in
- With a valid token in the URL, guests see our "shared article" view.
- Without a valid token, guests see the "protected page" error
- Links that use a token from an expired account are disabled
- Old-style share links redirect to the new-style subscriber token links

The main challenge was that there wasn't an easy way to separate these concepts from each other, at least not in a meaningful way. However, we were able to reuse large chunks of existing code to do this, so most of the work was just tedious rewiring of controller actions while layering in a few more tests here and there.

The changes that needed to be made to support these behaviors were not that hard to make, but I did feel concerned about how complicated our `ArticlesController#show` action was getting. Including the relevant filters, here is what it looked like after all the changes were made (skim it, but don't bother trying to understand it!):

```ruby
class ArticlesController < ApplicationController
  before_filter :find_article, :only => [:show, :edit, :update, :share]
  before_filter :update_url, :only => [:show]
  before_filter :validate_token, :only => [:show]

  skip_before_filter :authenticate, :only => [:show, :shared, :samples]
  skip_before_filter :authenticate_user, :only => [:show, :shared, :samples]

  def show
    store_location
    decorate_article

    if current_user
      mixpanel.track("Article Visit", :title => @article.subject,
                                      :user_id => current_user.hashed_id)

      @comments = CommentDecorator.decorate(@article.comments
                                                    .order("created_at"))
    else
      shared_by = User.find_by_share_token(params[:u]).hashed_id

      mixpanel.track("Shared Article Visit", :title => @article.subject,
                                            :shared_by => shared_by)

      render "shared"
    end
  end

  private

  def find_article
    @article = Article[params[:id]]

    render_http_error(404) unless @article
  end

  def update_url
    slug_needs_updating = @article.slug.present? && params[:id] != @article.slug
    missing_token = current_user && params[:u].blank?

    redirect_to(article_path(@article)) if slug_needs_updating || missing_token
  end

  def validate_token
    return if current_user.try(:active?)

    unless params[:u].present? && 
           User.find_by_share_token_and_status(params[:u], "active")
      attempt_user_login # helper that calls authenticate + authenticate_user
    end
  end
end
```

This is clearly not a portrait of healthy code! In fact, it looks suspiciously similiar to the code samples that "lost the plot" in Avdi Grimm's contributed article on [confident coding](https://practicingruby.com/articles/confident-ruby). That said, it's probably more fair to say that there wasn't much of a well defined plot when this code was written in the first place, and my attempts to modify it only muddied things further. 

It was hard for me to determine whether or not I should attempt to refactor this code right away or wait until later. From a purely technical perspective, the answer was obvious that this code needed to be cleaned up. But looking at it from another angle, I wanted to make sure that the external behavior of the system was what I actually wanted before I invested more time into optimizing its implementation. I didn't have insight at this point in time to answer that question, so I decided to leave the code messy for the time being until I had a chance to see how well the new sharing mechanism performed in production. 

> HISTORY: Deployed on 2013-08-26 and then merged on 2013-08-29.
>
> [View complete diff](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/173)

## Step 9: Get practicingruby.com running on our new VPS

While I kept working on the sharing mechanism, Jordan was busy setting up a new production environment for us. We set up a temporary subdomain new.practicingruby.com for this purpose, and that allowed us to do some live testing while he got everything up and running.

Technically speaking, we only need to upgrade our Ruby version in order to fix the problems we encountered with DelayedJob, so spinning up a new VPS instance purely for that purpose might sound a bit overkill at first glance. However, starting with a blank slate environment allowed us to upgrade the rest of our serverside dependencies at a relaxed pace, without worrying about potentially causing large amounts of site downtime. Spinning up the new environment in parallel before decommissioning the old one also meant that we could always switch back to our old environment if we encountered any problems during the migration to the new server.

On 2013-08-30, we decided to migrate to the new environment. The first step was to pull in the delayed broadcast mailer code and do live tests similar to the ones we had done earlier. After we found that those went smoothly, we decided to do a complete end-to-end test by using the new system to deliver an announcement about the planned maintenance downtime. That worked without any issues, and so at that time we were ready to perform the cut over.

We made sure to copy over all the data from our old environment immediately after putting it into maintenance mode, and then we updated our DNS entries to point practicingruby.com at the new server. After the DNS records propagated, we used a combination of watching our logs and our analytics dashboard (Mixpanel) to see how things were going. There were only two minor hiccups before everything got back to normal:

* We had a small issue with our Oauth configuration on Github, but resolving it was trivial after we realized that it was not in fact a DNS-related problem, but an issue on our end.

* We realized as soon as we spun up our cron jobs on the new server that our integration with Mailchimp's API had broken. The gem we were using was not Ruby 2.0 compatible, but we never realized this in development because we use it only in a tiny cleanup script that runs behind the scenes on the server. Thankfully because we had isolated this dependency from our application code, [changing it was very easy](https://github.com/elm-city-craftworks/practicing-ruby-web/pull/177/files).

These two issues were the only problems we needed to debug under pressure throughout all the work we described in this article. Given how much we changed under the hood, I am quite proud of that fact.

## Reflections

The state of practicingruby.com immediately after our server migration was roughly comparable to that of the highway construction photograph that you saw at the beginning of this article: some improvements had been made, but there was still plenty of old cruft left around, and lots of work left to be done before things could be considered finished. My goal in writing this article was not to show a beautiful end result, but instead to illustrate a process that is seldom discussed.

In the name of preserving realism, I dragged you through some of our oldest and worst code, and also showed you some newer code that isn't much nicer looking than our old stuff. Along the way, we used countless techniques that feel more like plumbing work than interesting programming work. Each step along the way, we used a different technique to glue one bit of code to another bit of code without breaking old behaviors, because there was no good one-size fits all solution to turn to. We got the job done, but we definitely got our hands dirty in the process.

I feel fairly confident that some of the changes I showed in this article are ones that I will be thankful for in the long haul, while others I will come to regret. The trouble of course is knowing which will be which, and only time and experience can get me there. But hopefully by sharing my own experiences with you, you can learn something from my mistakes, too!

> Special thanks goes to Jordan Byron (the maintainer of practicingruby.com) for collaborating with me on this article, and for helping Practicing Ruby run smoothly over the years.


================================================
FILE: articles/v7/005-low-level.md
================================================
> This issue of Practicing Ruby was directly inspired by Nick Morgan's
> [Easy 6502](http://skilldrick.github.io/easy6502/) tutorial. While
> the Ruby code in this article is my own, the bytecode for the
> Snake6502 game was shamelessly stolen from Nick. Be sure to check
> out [Easy 6502](http://skilldrick.github.io/easy6502/) if this topic 
> interests you; it's one of the best programming tutorials I've ever seen.


The sea of numbers you see below is about as close to the metal as programming gets:

```
0600: 20 06 06 20 38 06 20 0d 06 20 2a 06 60 a9 02 85 
0610: 02 a9 04 85 03 a9 11 85 10 a9 10 85 12 a9 0f 85 
0620: 14 a9 04 85 11 85 13 85 15 60 a5 fe 85 00 a5 fe 
0630: 29 03 18 69 02 85 01 60 20 4d 06 20 8d 06 20 c3 
0640: 06 20 19 07 20 20 07 20 2d 07 4c 38 06 a5 ff c9 
0650: 77 f0 0d c9 64 f0 14 c9 73 f0 1b c9 61 f0 22 60 
0660: a9 04 24 02 d0 26 a9 01 85 02 60 a9 08 24 02 d0 
0670: 1b a9 02 85 02 60 a9 01 24 02 d0 10 a9 04 85 02 
0680: 60 a9 02 24 02 d0 05 a9 08 85 02 60 60 20 94 06 
0690: 20 a8 06 60 a5 00 c5 10 d0 0d a5 01 c5 11 d0 07 
06a0: e6 03 e6 03 20 2a 06 60 a2 02 b5 10 c5 10 d0 06 
06b0: b5 11 c5 11 f0 09 e8 e8 e4 03 f0 06 4c aa 06 4c 
06c0: 35 07 60 a6 03 ca 8a b5 10 95 12 ca 10 f9 a5 02 
06d0: 4a b0 09 4a b0 19 4a b0 1f 4a b0 2f a5 10 38 e9 
06e0: 20 85 10 90 01 60 c6 11 a9 01 c5 11 f0 28 60 e6 
06f0: 10 a9 1f 24 10 f0 1f 60 a5 10 18 69 20 85 10 b0 
0700: 01 60 e6 11 a9 06 c5 11 f0 0c 60 c6 10 a5 10 29 
0710: 1f c9 1f f0 01 60 4c 35 07 a0 00 a5 fe 91 00 60 
0720: a2 00 a9 01 81 10 a6 03 a9 00 81 10 60 a2 00 ea 
0730: ea ca d0 fb 60 
```

Although you probably can't tell by looking at it, what you see here
is assembled machine code for the venerable 6502 processor that powered 
many of the classic video games of the 1980s. When executed in simulated
environment, this small set of cryptic instructions produces a minimal
version of the Snake arcade game, as shown below:

![](http://i.imgur.com/0DsKeoy.gif)

In this article, we will build a stripped down 6502 simulator 
in JRuby that is complete enough to run this game. If you haven't done much 
low-level programming before, don't worry! Most of what follows is 
just ordinary Ruby code. I will also be showing you a ton of examples 
along the way, and those should help keep you on track. You might also
want to grab [full source code](https://github.com/sandal/vintage) for 
the simulator, so that you can experiment with it while 
reading through this article.

## Warmup exercise: Reverse engineering Snake6502

An interesting property of machine code is that if you know its structure,
you can convert it back into assembly language. Among other things,
the ability to disassemble machine code is useful for debugging and
exploration purposes. Let's try this out on Snake6502! 

The output below shows memory locations, machine code, and assembly code for the
first 28 instructions of the game. These instructions are responsible for
initializing the state of the snake and the apple before the main event 
loop kicks off. You don't need to understand exactly how they work right
now, just try to get a feel for how the code in the `hexdump` column corresponds
to the code in the `assembly` column:

```
address  hexdump     assembly
------------------------------
$0600    20 06 06    JSR $0606
$0603    20 38 06    JSR $0638
$0606    20 0d 06    JSR $060d
$0609    20 2a 06    JSR $062a
$060c    60          RTS
$060d    a9 02       LDA #$02
$060f    85 02       STA $02
$0611    a9 04       LDA #$04
$0613    85 03       STA $03
$0615    a9 11       LDA #$11
$0617    85 10       STA $10
$0619    a9 10       LDA #$10
$061b    85 12       STA $12
$061d    a9 0f       LDA #$0f
$061f    85 14       STA $14
$0621    a9 04       LDA #$04
$0623    85 11       STA $11
$0625    85 13       STA $13
$0627    85 15       STA $15
$0629    60          RTS
$062a    a5 fe       LDA $fe
$062c    85 00       STA $00
$062e    a5 fe       LDA $fe
$0630    29 03       AND #$03
$0632    18          CLC
$0633    69 02       ADC #$02
$0635    85 01       STA $01
$0637    60          RTS
```

If you look at the output carefully, you'll be able to notice some patterns even
if you don't understand what the instructions themselves are meant to do. For
example, each instruction is made up of between 1-3 bytes of machine code. The
first byte in each instruction tells us what operation it is, and the remaining
bytes (if any) form its operand.

If you take a look at the first four instructions, it is easy to see that the
opcode `20` corresponds to the `JSR` instruction. Forming its operand is
similarly straightforward, because it's the same number in both places, 
just with opposite byte order:

```
20 06 06 -> JSR $0606  
20 38 06 -> JSR $0638
20 0d 06 -> JSR $060d
20 2a 06 -> JSR $062a
```

If you ignore the symbols in front of the numbers for the moment, mapping single
byte operands is even easier, because they're represented the same way in both
the machine code and the assembly code. Knowing that the `85` opcode maps
to the `STA` operation, it should be easy to see how `11, 13, 15` map to
`$11, $13, $15` in the following example:

```
85 11  -> STA $11
85 13  -> STA $13
85 15  -> STA $15
```

But the symbols in front of the numbers in assembly language obviously mean
something. If you carefully look at the machine code, you'll be able to find
that the same operation can have multiple different opcodes, each of which
identify a particular kind of operand:

```
a9 0f -> LDA #$0f
a5 fe -> LDA $fe
```

Without getting into too much detail here, the example above shows us that both
`a9` and `a5` correspond to the `LDA` instruction. The difference between the
two opcodes is that `a9` treats its operand as an immediate value, and `a5` 
interprets it as a memory address. In assembly code, this difference is
represented syntactically (`#$xx` vs. `$xx`), but in the machine code we must
rely on numbers alone.

The various ways of interpreting operands (called "addressing modes") are
probably the most confusing part of working with 6502 code. There are
about a dozen of them, and to get Snake6502 running, we need to implement
most of them. The good news is that every addressing mode is just a
roundabout way of converting an operand into a particular address in memory, and once you have that
address, the operations themselves do not care about how you computed it.
Once you sweep all that stuff under the rug, you can end up with clean
operation definitions like this:

```ruby
# NOTE: 'e' refers to the address that was computed from the instruction's
# operand and addressing mode.

LDA { cpu[:a] = mem[e]  } 
STA { mem[e]  = cpu[:a] }
```

This realization also tells us that the memory module will not need to take
addressing modes into account as long as they're precomputed elsewhere. With
that in mind, let's get started building a storage model for our simulator.
We'll deal with the hairy problem of addressing modes later.

## Memory

Except for a few registers that are used to store intermediate
computations, the 6502 processor relies on its memory for pretty much
everything. Program code, data, and the system stack all reside in 
the same 16-bit addressing space. Even flow control is entirely
dependent on memory: the program counter itself is nothing more
than an address that is used to look up the next instruction to run.

This "all in one bucket" approach is a double-edged sword. It makes it harder to
write safe programs, but the tradeoff is that the storage model itself is very
simple. Conceptually, the memory module is nothing more than a mapping 
between 16-bit addresses and 8-bit values:

```ruby
describe "Storage" do
  let(:mem) { Vintage::Storage.new }

  it "can get and set values" do
    mem[0x1337] = 0xAE

    mem[0x1337].must_equal(0xAE)
  end

  # ...
end
```

But because the program counter keeps track of a 'current location' 
in memory at any point in time, there is a lot more we can do with 
this simple structure. Let's walk through the remaining tests 
for `Vintage::Storage` to see what else it implements.

**Program loading**

When a program is loaded into memory, there is nothing special about the 
way it is stored, it's just like any other data. In a real 6502 processer,
a register is used to store the address of the 
next instruction to be run, and that address is used to read an opcode
from memory. In our simulator, we can let the `Storage` class keep track 
of this number for us, incrementing it whenever we call 
the `Storage#next` method.

The following test shows how to load a program and then walk its code one byte at a time:

```ruby
it "can load a bytecode sequence into memory and traverse it" do
  bytes = [0x20, 0x06, 0x06]

  mem.load(bytes)
  mem.pc.must_equal(program_offset) # load() does not increment counter

  bytes.each { |b| mem.next.must_equal(b) }

  mem.pc.must_equal(program_offset + 3)
end
```

The starting position of the program can be an arbitrary location, but
to maintain compatibility with the simulator from the Easy6502 tutorial, we
initialize the program counter to `0x600`:

```ruby
let(:program_offset) { Vintage::Storage::PROGRAM_OFFSET }

it "sets an initial position of $0600" do
  program_offset.must_equal(0x0600)

  mem.pc.must_equal(program_offset)
end
```

**Flow control + branching**

Very rudimentary flow control is supported by setting the 
program counter to a particular address, which causes the 
processor to `jump` to the instruction at that address:

```ruby
it "implements jump" do
  mem.jump(program_offset + 0xAB)

  mem.pc.must_equal(program_offset + 0xAB)
end
```

Branching can be implemented by only calling `jump` when a
condition is met:

```ruby
it "implements conditional branching" do
  big   = 0xAB
  small = 0x01

  # a false condition does not affect mem.pc
  mem.branch(small > big, program_offset + 5)
  mem.pc.must_equal(program_offset)

  # true condition jumps to the provided address
  mem.branch(big > small, program_offset + 5)
  mem.pc.must_equal(program_offset + 5)
end
```

This test case is a bit contrived, so let's take a look at 
some real Snake6502 code that illustrates how branching meant to be used:

```
$064d    a5 ff     LDA $ff      # read the last key pressed on the keyboard
$064f    c9 77     CMP #$77     # check if the key was "w" (ASCII code 0x77)
$0651    f0 0d     BEQ $0660    # if so, jump forward to $0660 
$0653    c9 64     CMP #$64     # check if the key was "d" (ASCII code 0x64)
$0655    f0 14     BEQ $066b    # if so, jump forward to $066b
$0657    c9 73     CMP #$73     # check if the key was "s" (ASCII code 0x73)
$0659    f0 1b     BEQ $0676    # if so, jump forward to $0676
$065b    c9 61     CMP #$61     # check if the key was "a" (ASCII code 0x61)
$065d    f0 22     BEQ $0681    # if so, jump forward to $0681
```

Presumably, the code at `$0660` starts a procedure that moves the snake's
head up, the code at `$066b` moves it to the right, and so on. In other words,
if one of these `BEQ` instructions finds a match, it will jump to the right place 
in the code to handle the relevant condition. But if no match is found, the 
processor will happily continue on to whatever code comes after this set of 
instructions in the program.

The tricky thing about using instructions that rely on `jump` (and consequently,
`branch`) is that they are essentially GOTO statements. When you see one of
these statements in the code, you know exactly what instruction will be executed
next, but there's no way of telling if it will ever return to the location
it was called from. To get around this problem, we need support for subroutines
that know how to return to where they've been called from. And to implement
*those*, we need a system stack.

**Stack operations**

Here are the tests for how we'd like our stack to behave:

```ruby
let(:stack_origin) { Vintage::Storage::STACK_ORIGIN }
let(:stack_offset) { Vintage::Storage::STACK_OFFSET }

it "has a 256 element stack between 0x0100-0x01ff" do
  stack_offset.must_equal(0x0100)
  stack_origin.must_equal(0xff) # this value gets added to the offset
end

it "implements stack-like behavior" do
  mem.sp.must_equal(stack_origin)

  mem.push(0x01)
  mem.push(0x03)
  mem.push(0x05)

  mem.sp.must_equal(stack_origin - 3)

  mem.pull.must_equal(0x05)
  mem.pull.must_equal(0x03)
  mem.pull.must_equal(0x01)

  mem.sp.must_equal(stack_origin)
end
```

As the tests indirectly suggest, the stack is a region in memory 
between`$0100` and `$01ff`, indexed by a stack pointer (`sp`).
Each time a value is pushed onto the stack, the value of the 
stack pointer is decremented, and each time a value is pulled, 
the pointer is incremented. This makes it so that the stack
pointer always tells you where the "top of the stack" is.

**Subroutines**

With a stack in place, we'll have most of what we need to implement
"Jump to subroutine" (`jsr`) and "Return from subroutine" (`rts`)
functionality. The behavior of these features will end up 
looking something like this:

```ruby
it "implements jsr/rts" do
  mem.jsr(0x0606)
  mem.jsr(0x060d)

  mem.pc.must_equal(0x060d)

  mem.rts
  mem.pc.must_equal(0x0606)

  mem.rts
  mem.pc.must_equal(program_offset)
end
```

To make the above test pass, `jsr` needs to `push` the current 
program counter onto the stack before executing a `jump` to the 
specified address. Later when `rts` is called, the address is
pulled out of the stack, and then another `jump` is executed
to bring you back to where the last `jsr` command was executed.
This works fine even in nested subroutine calls, due to the
nature of how stacks work.

The only tricky part is that addresses are 16-bit values, but 
stack entries are limited to single byte values. To get around
this problem, we need a couple helper functions to convert
a 16-bit number into two bytes, and vice-versa:

```ruby
it "can convert two bytes into a 16 bit integer" do
  mem.int16([0x37, 0x13]).must_equal(0x1337)
end

it "can convert a 16 bit integer into two bytes" do
  mem.bytes(0x1337).must_equal([0x37, 0x13])
end
```

These helpers will also come in handy later, when we need to deal with
addressing modes.

**Implementation**

Behavior-wise, there is a lot of functionality here. In a high level
environment it would feel a lot like we were mixing distinct concerns,
but at the low level we're working at it's understandable that nearly
infinite flexibility is desireable.

Despite the conceptual complexity, the `Storage` class is extremely easy to 
implement. In fact, it takes less than 80 lines of code if you don't
worry about validations and robustness:

```ruby
module Vintage
  class Storage
    PROGRAM_OFFSET = 0x0600
    STACK_OFFSET   = 0x0100
    STACK_ORIGIN   = 0xff

    def initialize
      @memory = Hash.new(0)
      @pc     = PROGRAM_OFFSET
      @sp     = STACK_ORIGIN
    end

    attr_reader :pc, :sp

    def load(bytes)
      index = PROGRAM_OFFSET

      bytes.each_with_index { |c,i| @memory[index+i] = c }
    end

    def [](address)
      @memory[address]
    end

    def []=(address, value)
      @memory[address] = (value & 0xff)
    end

    def next
      @memory[@pc].tap { @pc += 1 }
    end

    def jump(address)
      @pc = address
    end

    def branch(test, address)
      return unless test

      @pc = address
    end

    def jsr(address)
      low, high = bytes(@pc)

      push(low)
      push(high)

      jump(address)
    end

    def rts
      h = pull
      l = pull

      @pc = int16([l, h])
    end

    def push(value)
      @memory[STACK_OFFSET + @sp] = value
      @sp -= 1
    end

    def pull
      @sp += 1

      @memory[STACK_OFFSET + @sp]
    end

    def int16(bytes)
      bytes.pack("c*").unpack("v").first
    end

    def bytes(num)
      [num].pack("v").unpack("c*")
    end
  end
end
```

For such boring code, its a bit surprising to think that it can be a fundamental
building block for generic computing. Keep in mind of course that we're building
a simulation and not a real piece of hardware, and we're doing it in one of the
highest level languages you can use.

If it already feels like we're cheating, just wait until you see the next trick!

## Memory-mapped I/O

To implement Snake6502, our simulator needs to be able to generate random
numbers, read keyboard input, and also display graphics on the screen. None of
these features are directly supported by the 6502 instruction set, so that means
that every individual system had to come up with its own way of doing things.
This is one of many things that causes machine code (especially old-school
machine code) to not be directly portable from one system to another.

Because we're trying to get Snake6502 to run in our simulator without modifying
its bytecode, we're more-or-less constrained to following the approach used by
the Easy6502 simulator: memory-mapped I/O.

This approach is actually very easy to implement in a simulated environment: you
add hooks around certain memory addresses so that when they are accessed, they
execute some custom code rather than directly reading or writing a 
value to memory. In the case of Snake6502, we expect the following behaviors:

* Reading from `$fe`  returns a random 8-bit integer.
* Reading from `$ff` retrieves the ASCII code of the last key 
pressed on the keyboard.
* Writing to addresses between `$0200` to `$05ff` will render
pixels to the screen. (`$0200` is the top-left corner
of the 32x32 display, and `$05ff` is the bottom-right corner.)

These features could be added directly to the `Storage` class,  but it would
feel a bit awkward to clutter up a generic module with some very specific edge
cases. For that reason, it is probably better to implement them as a module
mixin: 

```ruby
module Vintage
  module MemoryMap
    RANDOMIZER  = 0xfe
    KEY_PRESS   = 0xff
    PIXEL_ARRAY = (0x0200..0x05ff)

    attr_accessor :ui

    def [](address)
      case address
      when RANDOMIZER
        rand(0xff)
      when KEY_PRESS
        ui.last_keypress
      else
        super
      end
    end

    def []=(k, v)
      super

      if PIXEL_ARRAY.include?(k)
        ui.update(k % 32, (k - 0x0200) / 32, v % 16)
      end
    end
  end
end
```

You probably already have a good idea of how `MemoryMap` works from seeing
its implementation, but it wouldn't hurt to see an example of how it is
used before we move on. Here's how to display a single pixel on the 
screen, randomly varying its color until the spacebar (ASCII code 0x20) 
is pressed:

```ruby
mem = Vintage::Storage.new
mem.extend(Vintage::MemoryMap)

mem.ui = Vintage::Display.new 

(mem[0x0410] = mem[0xfe]) until mem[0xff] == 0x20 
```

It's worth noting that this is the only code in the entire simulator that
directly depends on a connection to some sort of user interface, and the
protocol consists of just two methods: `ui.update(x, y, color)` and
`ui.last_keypress`. In our case, we use a JRuby-based GUI, but anything
else could be substituted as long as it implemented these two methods.

At this point, our storage model is pretty much complete. We now can 
turn our attention to various number crunching features.

## Registers and Flags

In order to get Snake6502 to run, we need all six of
the programmable registers that the processor provides. We've handled two of
them already (the stack pointer and the program counter), so we just have four
more to implement: A, X, Y, and P. A few design constraints will help make this
work go a whole lot faster:

* Most of the operations that can be done on A are done the same way on X and Y,
so we can implement some generic functions that operate on all three of them.

* We can implement the status register (P) as a collection of individual
attributes, rather than seven 1-bit flags packs into a single byte.

* Because Snake6502 only relies on the (c)arry, (n)egative, and (z)ero flags
from the status register, we can skip implementing the other four status flags 
and still have a playable game.

With those limitations in mind, let's work through some specs to understand
how this model ought to behave. For starters, we'll be building a `Vintage::CPU` 
that implements three registers and three flags, initializing them all to 
zero by default:

```ruby
describe "CPU" do
  let(:cpu) { Vintage::CPU.new }

  let(:registers) { [:a, :x, :y] }
  let(:flags)     { [:c, :n, :z] }
  
  it "initializes registers and flags to zero" do
    (registers + flags).each { |e| cpu[e].must_equal(0) }
  end

   #...
end
```

It will be possible to directly set registers via the `#[]=` method, because
the behavior will be the same for all three registers:

```ruby
it "allows directly setting registers" do
  registers.each do |e|
    value  = rand(0xff)

    cpu[e] = value
    cpu[e].must_equal(value)
  end
end
```

However, because flags don't have the same update semantics as registers, we 
will not allow directly setting them via `#[]=`:

```ruby
it "does not allow directly setting flags" do
  flags.each do |e|
    value  = rand(0xff)

    err = -> { cpu[e] = value }.must_raise(ArgumentError)
    err.message.must_equal "#{e.inspect} is not a register"
  end
end
```

The carry flag (c) can toggled via the `set_carry` and `clear_carry` 
methods. We'll need this later for getting the `CPU`  into
a clean state whenever we do addition and subtraction 
operations:

```ruby
it "allows setting the c flag via set_carry and clear_carry" do
  cpu.set_carry
  expect_flags(:c => 1)

  cpu.clear_carry
  expect_flags(:c => 0)
end
```

Some other instructions will require us to set the carry flag
based on arbitrary conditions, so we'll need support for that as well:

```ruby
it "allows conditionally setting the c flag via carry_if" do
  # true condition
  x = 3
  cpu.carry_if(x > 1)

  expect_flags(:c => 1)

  # false condition
  x = 0
  cpu.carry_if(x > 1)

  expect_flags(:c => 0)
end
```

The N and Z flags are set based on whatever result the `CPU` last processed:

```ruby
it "sets z=1 when a result is zero, sets z=0 otherwise" do
  cpu.result(0)
  expect_flags(:z => 1)

  cpu.result(0xcc)
  expect_flags(:z => 0)
end

it "sets n=1 when result is 0x80 or higher, n=0 otherwise" do
  cpu.result(rand(0x80..0xff))
  expect_flags(:n => 1)

  cpu.result(rand(0x00..0x7f))
  expect_flags(:n => 0)
end
```

The `result` method also returns a number truncated to fit in a single byte,
because pretty much every place we could store a number in this system
expects 8-bit integers:

```ruby
it "truncates results to fit in a single byte" do
  cpu.result(0x1337).must_equal(0x37)
end  
```

To help keep the `CPU` in a consistent state and to simplify the work
involved in many of the 6502 instructions, we automatically call `cpu.result`
whenever a register is set via `CPU#[]=`. The tests below show the 
the effects of that behavior:

```ruby
  it "implicitly calls result() when registers are set" do
    registers.each do |e|
      cpu[e] = 0x100
      
      cpu[e].must_equal(0)
      expect_flags(:z => 1, :n => 0)

      cpu[e] -= 1
      
      cpu[e].must_equal(0xff)
      expect_flags(:z => 0, :n => 1)
    end
  end
```

Here's an implementation that satisfies all of the tests we've seen so far:

```ruby
module Vintage
  class CPU
    def initialize
      @registers = { :a => 0, :x => 0, :y => 0 }
      @flags     = { :z => 0, :c => 0, :n => 0 }
    end

    def [](key)
      @registers[key] || @flags.fetch(key)
    end

    def []=(key, value)
      unless @registers.key?(key)
        raise ArgumentError, "#{key.inspect} is not a register" 
      end

      @registers[key] = result(value)
    end

    def set_carry
      @flags[:c] = 1
    end

    def clear_carry
      @flags[:c] = 0
    end

    def carry_if(test)
      test ? set_carry : clear_carry
    end

    def result(number)
      number &= 0xff

      @flags[:z] = (number == 0 ? 1 : 0)
      @flags[:n] = number[7]

      number
    end
  end
end
```
  
Putting it all together, the role of the `CPU` class is mostly just to do some
basic numerical housekeeping that will make implementing 6502 instructions
easier. Consider for example, the `CMP` and `BEQ` operations, which can
be used together to form a primitive sort of `if` statement. We saw these two
operations used together in the earlier example of keyboard input handling:

```
$064f    c9 77     CMP #$77     # check if the key was "w" (ASCII code 0x77)
$0651    f0 0d     BEQ $0660    # if so, jump forward to $0660 
```

Using a combination of the `CPU` and `Storage` objects we've already built, we'd
be able to define the `CMP` and `BEQ` operations as shown below:

```ruby
CMP do 
  cpu.carry_if(cpu[:a] >= mem[e])

  cpu.result( cpu[:a] - mem[e] )
end

BEQ { mem.branch(cpu[:z] == 1, e) }
```

Even if we ignore the `cpu.carry_if` call, we know from what we've seen
already that if `CPU#result` is called with a zero value, it will set the Z flag
to 1. We also know that when `Storage#branch` is called with a true value, it
will jump to the specified address, otherwise it will do nothing at all. Putting
those two facts together with the Snake6502 shown above tells us that if the
value in the A register is `0x77`, execution will jump to `$0600`.

At this point, we're starting to see how 6502 instructions can be
mapped onto the objects we've already built, and that means we're 
close to the finish line.  Before we get there, we only have two obstacles
to clear: implementing addressing modes to handle operands, and building
a program runner that knows how to map raw 6502 code to the operation 
definitions shown above.

## Addressing Modes

> **NOTE:** The explanation that follows barely scrapes the surface of
this topic. If you want to really understand 6502 addressing modes, you should check
out the [relevant section](http://skilldrick.github.io/easy6502/#addressing)
in the Easy6502 tutorial.

In the very first exercise where we disassembled the first few instructions
of Snake6502, we discovered the presence of several addressing modes
that cause operands to be interpreted in various different ways. To get
the game running, we will need to handle a total of eight different 
addressing modes.

This is a lot of different ways to generate an address, and its intimidating 
to realize we're only implementing an incomplete subset of what the 6502 processor 
provides. However, its important to keep in mind that the only data structure 
we have to work with is a simple mapping from 16-bit integers to 8-bit 
integers. Among other things, clever indexing can give us the functionality we'd
expect from variables, references, and arrays -- all the stuff that doesn't have
a direct representation in machine code.

I'm going to show the definitions for all of the addressing modes used by
Snake6502 below, which probably won't make much sense at first glance. But try
to see if you can figure out what some of this code doing:

```ruby
module Vintage
  module Operand
    def self.read(mem, mode, x, y)
      case mode
      when "#" # Implicit 
        nil
      when "@" # Relative
        offset = mem.next

        mem.pc + (offset <= 0x80 ? offset : -(0xff - offset + 1)) 
      when "IM" # Immediate
        mem.pc.tap { mem.next }
      when "ZP" # Zero Page
        mem.next
      when "ZX" # Zero Page, X
        mem.next + x
      when  "AB" # Absolute
        mem.int16([mem.next, mem.next])
      when "IX" # Indexed Indirect
        e = mem.next

        mem.int16([mem[e + x], mem[e + x + 1]])
      when "IY" # Indirect Indexed
        e = mem.next

        mem.int16([mem[e], mem[e+1]]) + y
      else
        raise NotImplementedError, mode.inspect
      end
    end
  end
end
```

Now let's walk through them one-by-one. You can refer to the source code above as needed
to make sense of the following examples.

1) The implicit addressing mode is meant for instructions that either don't operate 
on a memory address at all, or can infer the address internally. An example
we've already seen is the `RTS` operations that is used to return from a subroutine --
it gets its data from the stack rather than from an operand, making it a single
byte instruction. 

2) The relative addressing mode is used by branches only. Consider
the following example:

```
$0651    f0 0d     BEQ $0660    # if Z=1, jump to $0660 
```

By the time the `$0d` operand is read, the program counter will be set to
`$0653`. If you add these two numbers together, you get the address to jump to
if Z=1: `$0660`.

3) Immediate addressing is used when you want to have an instruction work on the
operand itself. To do so, we return the operand's address, then increment the 
program counter as normal. In the example below, the computed address (`e`) 
is `0x0650`, and `mem[e] == 0x77`:

```
$064f    c9 77     CMP #$77
```

4) Zero page addressing is straightforward, it is simply refers to any address
between `$00` and `$ff`. These are convenient for storing program data in, and
are faster to access because they do not require combining two bytes into a 16
bit integer. We've already seen copious use of this address mode throughout
the examples in this article, particularly when working with keyboard input
(`$ff`) and random number generation (`$fe`).

5) Zero page, X indexing is used for iterating over some simple sequences in
memory. For example, Snake6502 stores the position of each part of the snakes
body in byte pairs starting at memory location `$10`. Using this addressing
mode, it is possible to walk over the array by simply incrementing the X
register as you go.

6) We've also seen plenty of examples of absolute addressing, especially when
looking at `JSR` operations. The only complication involved in processing
these addresses is that two bytes need to be read and then assembled into
a 16bit integer. But since we've had to do that in several places already,
it should be easy enough to understand.

7) Indexed indirect addressing gives us a way to dynamically compute an address
from other addresses that we've stored in memory. That sounds really confusing,
but the following example should help clear it up. The code below is responsible
for moving the snake by painting a white pixel at its updated head position, and
painting a black pixel at its old tail position:

```
$0720    a2 00       LDX #$00
$0722    a9 01       LDA #$01
$0724    81 10       STA ($10,X) 
$0726    a6 03       LDX $03
$0728    a9 00       LDA #$00
$072a    81 10       STA ($10,X) 
```

The first three lines are hardcoded to look at memory locations `$10` and `$11` 
to form an address in the pixel array that refers to the new head of the 
snake. The next three lines do something similar for the tail of the snake,
but with a twist: because the length of the snake is dynamic, it needs to
be looked up from memory. This value is stored in memory location `$03`.
So to unpack the whole thing, `STA ($10, X)` will take the address `$10`, add to
it the number of bytes in the whole snake array, and then look up the address
stored in the last position of that array. That address points to the snake's
tail in the pixel array, which ends up getting set to black by this instruction.

8) Indirect indexed addressing gives us yet another way to walk over multibyte
structures. In nake6502, this addressing mode is only used for drawing the
apple on the screen. Its position is stored in a 16-bit value stored 
in `$00` and `$01`, and the following code is used to set its color to a 
random value:

```
$0719    a0 00       LDY #$00
$071b    a5 fe       LDA $fe
$071d    91 00       STA ($00),Y
```

There are bound to be more interesting uses of these addressing modes, but we
we've certainly covered enough ground for now! Don't worry if you didn't
understand this section that well, it took me many times reading the Easy6502
tutorial and the source code for Snake6502 before I figured these out myself.

## 6502 Simulator (finally!)

We are now finally at the point where all the hard stuff is done, and all that
remains is to wire up the simulator itself. In other words, it's time for
the fun part of the project.

The input for the simulator will be a binary file containing the
assembled program code for Snake6502. The bytes in that file not meant to
be read as printable characters, but they can be inspected using a hex editor:

```
$ hexdump examples/snake.rom
0000000 20 06 06 20 38 06 20 0d 06 20 2a 06 60 a9 02 85
0000010 02 a9 04 85 03 a9 11 85 10 a9 10 85 12 a9 0f 85
0000020 14 a9 04 85 11 85 13 85 15 60 a5 fe 85 00 a5 fe
0000030 29 03 18 69 02 85 01 60 20 4d 06 20 8d 06 20 c3
0000040 06 20 19 07 20 20 07 20 2d 07 4c 38 06 a5 ff c9
0000050 77 f0 0d c9 64 f0 14 c9 73 f0 1b c9 61 f0 22 60
0000060 a9 04 24 02 d0 26 a9 01 85 02 60 a9 08 24 02 d0
0000070 1b a9 02 85 02 60 a9 01 24 02 d0 10 a9 04 85 02
0000080 60 a9 02 24 02 d0 05 a9 08 85 02 60 60 20 94 06
0000090 20 a8 06 60 a5 00 c5 10 d0 0d a5 01 c5 11 d0 07
00000a0 e6 03 e6 03 20 2a 06 60 a2 02 b5 10 c5 10 d0 06
00000b0 b5 11 c5 11 f0 09 e8 e8 e4 03 f0 06 4c aa 06 4c
00000c0 35 07 60 a6 03 ca 8a b5 10 95 12 ca 10 f9 a5 02
00000d0 4a b0 09 4a b0 19 4a b0 1f 4a b0 2f a5 10 38 e9
00000e0 20 85 10 90 01 60 c6 11 a9 01 c5 11 f0 28 60 e6
00000f0 10 a9 1f 24 10 f0 1f 60 a5 10 18 69 20 85 10 b0
0000100 01 60 e6 11 a9 06 c5 11 f0 0c 60 c6 10 a5 10 29
0000110 1f c9 1f f0 01 60 4c 35 07 a0 00 a5 fe 91 00 60
0000120 a2 00 a9 01 81 10 a6 03 a9 00 81 10 60 a2 00 ea
0000130 ea ca d0 fb 60
0000135
```

The challenge that is left to be completed is to process
the opcodes and operands in this file and turn them into
a running program. To do that, we will make use of a CSV file 
that lists the operation name and addressing mode for each opcode 
found in file:

```
00,BRK,#
10,BPL,@
18,CLC,#
20,JSR,AB
# ... rest of instructions go here ...
E6,INC,ZP
E8,INX,#
E9,SBC,IM
F0,BEQ,@
```

Once we know the addressing mode for a given operation, we can read its
operand and turn it into an address (denoted by `e`). And once we have *that*, 
we can execute the commands that are defined in following DSL:

```ruby
# NOTE: This file contains definitions for every instruction used 
# by Snake6502. Most of the functionality here is a direct result
# of simple calls to Vintage::Storage and Vintage::CPU instances.

NOP { }
BRK { raise StopIteration }


LDA { cpu[:a] = mem[e] }
LDX { cpu[:x] = mem[e] }
LDY { cpu[:y] = mem[e] }

TXA { cpu[:a] = cpu[:x] }

STA { mem[e] = cpu[:a] }

## Counters

INX { cpu[:x] += 1 }
DEX { cpu[:x] -= 1 }

DEC { mem[e] = cpu.result(mem[e] - 1) }
INC { mem[e] = cpu.result(mem[e] + 1) } 

## Flow control

JMP { mem.jump(e) }

JSR { mem.jsr(e) }
RTS { mem.rts }

BNE { mem.branch(cpu[:z] == 0, e) }
BEQ { mem.branch(cpu[:z] == 1, e) }
BPL { mem.branch(cpu[:n] == 0, e) }
BCS { mem.branch(cpu[:c] == 1, e) }
BCC { mem.branch(cpu[:c] == 0, e) }

## Comparisons

CPX do 
  cpu.carry_if(cpu[:x] >= mem[e])

  cpu.result(cpu[:x] - mem[e]) 
end

CMP do 
  cpu.carry_if(cpu[:a] >= mem[e])

  cpu.result(cpu[:a] - mem[e]) 
end


## Bitwise operations

AND { cpu[:a] &= mem[e] }
BIT { cpu.result(cpu[:a] & mem[e]) }

LSR do
  t = (cpu[:a] >> 1) & 0x7F
 
  cpu.carry_if(cpu[:a][0] == 1)
  cpu[:a] = t
end

## Arithmetic

SEC { cpu.set_carry   }
CLC { cpu.clear_carry }

ADC do 
  t = cpu[:a] + mem[e] + cpu[:c]

  cpu.carry_if(t > 0xff)
  cpu[:a] = t
end

SBC do
  t  = cpu[:a] - mem[e] - (cpu[:c] == 0 ? 1 : 0)

  cpu.carry_if(t >= 0)
  cpu[:a] = t
end
```

We can treat both the opcode lookup CSV and the instructions definitions DSL 
as configuration files, to be loaded into the configuration object 
shown below:

```ruby
require "csv"

module Vintage
  class Config
    CONFIG_DIR = "#{File.dirname(__FILE__)}/../../config"

    def initialize(name)
      load_codes(name)
      load_definitions(name)
    end

    attr_reader :definitions, :codes

    private

    def load_codes(name)
      csv_data = CSV.read("#{CONFIG_DIR}/#{name}.csv")
                    .map { |r| [r[0].to_i(16), [r[1].to_sym, r[2]]] }

      @codes = Hash[csv_data]
    end

    def load_definitions(name)
      @definitions = {}

      instance_eval(File.read("#{CONFIG_DIR}/#{name}.rb"))
    end

    def method_missing(id, *a, &b)
      return super unless id == id.upcase

      @definitions[id] = b
    end
  end
end
```

Then finally, we can tie everything together with a `Simulator` object that
instantiates all the objects we need, and kicks off a program execution loop:

```ruby
module Vintage
  class Simulator
    EvaluationContext = Struct.new(:mem, :cpu, :e)
      
    def self.run(file, ui)
      config = Vintage::Config.new
      cpu    = Vintage::CPU.new
      mem    = Vintage::Storage.new

      mem.extend(MemoryMap)
      mem.ui = ui
      
      mem.load(File.binread(file).bytes)

      loop do
        code = mem.next

        op, mode = config.codes[code]
        if name
          e = Operand.read(mem, mode, cpu[:x], cpu[:y])

          EvaluationContext.new(mem, cpu, e)
                           .instance_exec(&config.definitions[op])
        else
          raise LoadError, "No operation matches code: #{'%.2x' % code}"
        end
      end
    end
  end
end
```

At this point, you're ready to play Snake! Or if you've been following closely
along with this article all the way to the end, you're probably more likely to
have a cup of coffee or take a nap from information overload. Either way,
congratulations for making it all the way through this long and winding
issue of Practicing Ruby!

## Further Reading

This article and the [Vintage simulator](http://github.com/sandal/vintage) is built on top of a ton of other
people's ideas and learning resources. Here are some of the works I referred to
while researching this topic:

* [Easy 6502](http://skilldrick.github.io/easy6502/) by Nick Morgan
* [Mos Technology 6502](http://en.wikipedia.org/wiki/MOS_Technology_6502) @ Wikipedia
* [Rockwell 6502 Programmer's Manual](http://homepage.ntlworld.com/cyborgsystems/CS_Main/6502/6502.htm)  by Bluechip
* [NMos 6502 opcodes](http://www.6502.org/tutorials/6502opcodes.html) by John Pickens
* [r6502](https://github.com/joelanders/r6502) by Joe Landers


================================================
FILE: articles/v7/006-infrastructure.md
================================================
> This issue of Practicing Ruby was a collaboration with Mathias Lafeldt
([@mlafeldt](https://twitter.com/mlafeldt)), an Infrastructure
Developer living in Hamburg, Germany. If Mathias had to choose the one
Internet meme that best describes his work, it would certainly be
_Automate all the things!_ 

For at least as long as Ruby has been popular among web developers, it has also
been recognized as a useful tool for system administration work. Although it was
first used as a clean alternative to Perl for adhoc scripting, Ruby quickly
evolved to the point where it became an excellent platform for large scale 
infrastructure automation projects. 

In this article, we'll explore realistic code that handles various system
automation tasks, and discuss what benefits the automated approach has over 
doing things the old-fashioned way. We'll also see first-hand what it means to treat
"infrastructure as code", and the impact it has on building maintainable systems. 

## Prologue: Why does infrastructure automation matter?

Two massive infrastructure automation systems have been built in 
Ruby ([Puppet][puppet] and [Chef][chef]), both of which have entire open-source
ecosystems supporting them. But because these frameworks were built by and for
system administrators, infrastructure automation is often viewed as a
specialized skillset by Ruby programmers, rather than something that everyone
should learn. This is probably an incorrect viewpoint, but it is one that is
easy to hold without realizing the consequences.

Speaking from my own experiences, I had always assumed that infrastructure
automation was a problem that mattered mostly for large-scale public web
applications, internet service providers, and very complicated enterprise
projects. In those kinds of environments, the cost of manually setting up
servers would obviously be high enough to justify using a 
sophisticated automation framework. But because I never encountered those
scenarios in my own work, I was content to do things the old-fashioned way:
reading lots of "works for me" instructions from blog posts, manually typing
commands on the console, and swearing loudly whenever I broke something. For
things that really matter or tasks that seemed too tough for me to do on my own,
I'd find someone else to take care of it for me.

The fundamental problem was that my system-administration related pain wasn't 
severe enough to motivate me to learn a whole new way of doing things. Because
I never got curious enough about the topic, I didn't realize that infrastructure 
automation has other benefits beyond eliminating the costs
of doing repetitive and error-prone manual configuration work. In particular,
I vastly underestimated the value of treating "infrastructure as code",
especially as it relates to creating systems that are abstract, modular,
testable, understandable, and utterly hackable. Narrowing the problem down to
the single issue of reducing repetitive labor, I had failed to see that
infrastructure automation has the potential to eliminate an entire class of
problems associated with manual system configuration.

To help me get unstuck from this particular viewpoint, Mathias Lafeldt offered
to demonstrate to me why infrastructure automation matters, even if you aren't
maintaining hundreds of servers or spending dozens of hours a week babysitting
production systems. To teach me this lesson, Mathias built a [Chef cookbook][pr-cookbook] to completely automate the process of building an environment suitable for running [Practicing Ruby's web application][pr-web], starting with nothing but a bare Ubuntu
Linux installation. The early stages of this process weren't easy: Jordan and I
had to answer more questions about our system setup than I
ever thought would be necessary. But as things fell into place and
recipes started getting written, the benefits of being able to conceptualize a
system as code rather than as an amorphous blob of configuration files and
interconnected processes began to reveal themselves.

The purpose of this article is not to teach you how to get up and running with
Chef, nor is it meant to explain every last detail of the cookbook that
Mathias built for us. Instead, it will help you learn about the core concepts of
infrastructure automation the same way I did: by tearing apart a handful of real
use cases and seeing what you can understand about them. If you've never used
an automated system administration workflow before, or if you've only ever run
cookbooks that other people have provided for you, this article will give you a
much better sense of why the idea of treating "infrastructure as code" matters.
If you already know the answer to that question, you may still benefit from
looking at the problem from a beginner's mindset. In either case, we have
a ton of code to work our way through, so let's get started!

## A recipe for setting up Ruby 

Let's take a look at how Chef can be used 
to manage a basic Ruby installation. As you can see below, Chef
uses a pure Ruby domain-specific language for defining its recipes,
so it should be easy to read even if you've never worked with
the framework before:

```ruby
include_recipe "ruby_build"

ruby_version = node["practicingruby"]["ruby"]["version"]

ruby_build_ruby(ruby_version) { prefix_path "/usr/local" }

bash "update-rubygems" do
  code   "gem update --system"
  not_if "gem list | grep -q rubygems-update"
end

gem_package "bundler"
```

At the high level, this recipe is responsible for handling the following tasks: 

1. Installing the `ruby-build` command line tool.
2. Using `ruby-build` to compile and install Ruby to `/usr/local`.
3. Updating RubyGems to the latest version.
4. Installing the bundler gem.

Under the hood, a lot more is happening. Let's take a closer look at each
step to understand a bit more about how Chef recipes work.

**Installing ruby-build**

```ruby
include_recipe "ruby_build"
```

Including the default recipe from the [ruby_build cookbook](https://github.com/fnichol/chef-ruby_build) 
in our own code takes care of installing the `ruby-build` command line utility, 
and also handles installing a bunch of low-level packages that are required to compile Ruby 
on an Ubuntu system. But all of this work happens behind the scenes -- we just need 
to make use of the `ruby_build_ruby` command this cookbook provides and the rest will be 
taken care of for us.

**Compiling and installing Ruby**

```ruby
ruby_version = node["practicingruby"]["ruby"]["version"]

ruby_build_ruby(ruby_version) { prefix_path "/usr/local" }
```

In our recipe, the version of Ruby we want to install is not specified
explicitly, but instead set elsewhere using Chef's attribute system.
In the cookbook's [default attributes file][pr-cookbook-attributes], you'll find an entry that
looks like this:

```ruby
default["practicingruby"]["ruby"]["version"] = "2.0.0-p247"
```

Chef has a very flexible and very complicated [attribute management system][chef-attributes], but its main purpose is the same as any configuration 
system: to keep source code as generic as possible by not hard-coding
application-specific values. By getting these values out of the
source file and into well-defined locations, it also makes it
easy to see all of our application-specific configuration 
data at once.

**Updating RubyGems**

```ruby
bash "update-rubygems" do
  code   "gem update --system"
  not_if "gem list | grep -q rubygems-update"
end
```

In this code we make use of a couple shell commands, the 
first of which is obviously responsible for updating RubyGems.
The second command is a guard that prevents the gem update
command from running more than once.

Most actions in Chef have similar logic baked into them to
make sure operations are only carried out when necessary. These 
guard clauses are handled internally whenever there is a well defined 
condition to check for, so you don't need to think about them often.
In the case of shell commands the operation is potentially arbitrary,
so a custom guard clause is necessary.

**Installing bundler**

```ruby
gem_package "bundler"
```

This command is roughly equivalent to typing `gem install bundler` on the
command line. Because we installed Ruby into `/usr/local`, it will be used as
our system Ruby, and so we can use `gem_package` without any additional
settings. More complicated system setups would involve a bit more
code than what you see above, but for our purposes we're able to keep 
things simple.

Putting all of these ideas together, we end up not just with an understanding of
how to go about installing Ruby using a Chef recipe, but also a glimpse
of a few of the benefits of treating "infrastructure as code". As we
continue to work through more complicated examples, those benefits
will become even more obvious.

## A recipe for setting up process monitoring 

Now that we've tackled a simple example of a Chef recipe, let's work through 
a more interesting one. The following code is what we use for installing
and configuring the [God][god] process monitoring framework:

```ruby
include_recipe "practicingruby::_ruby"

gem_package "god"

directory "/etc/god" do
  owner "root"
  group "root"
  mode  "0755"
end

file "/etc/god/master.conf" do
  owner    "root"
  group    "root"
  mode     "0644"
  notifies :restart, "service[god]"

  home     = node["practicingruby"]["deploy"]["home_dir"] 
  god_file = "#{home}/current/config/delayed_job.god"

  content "God.load('#{god_file}') if File.file?('#{god_file}')"
end

cookbook_file "/etc/init/god.conf" do
  source "god.upstart"
  owner  "root"
  group  "root"
  mode   "0644"
end

service "god" do
  provider Chef::Provider::Service::Upstart
  action   [:enable, :start]
end
```

The short story about this recipe is that it handles the following tasks:

1. Installing the `god` gem.
2. Setting up some configuration files for `god`.
3. Registering `god` as a service to run at system boot.
4. Starting the `god` service as soon as the recipe is run.

But that's just the 10,000 foot view -- let's get down in the weeds a bit.

**Installing god via RubyGems**

```ruby
include_recipe "practicingruby::_ruby"

gem_package "god"
```

God is distributed as a gem, so we need to make sure Ruby is installed
before we can make use of it. To do this, we include the Ruby installation
recipe that was shown earlier. If the Ruby recipe hasn't run yet, it will
be executed now, but if it has already run then `include_recipe` will
do nothing at all. In either case, we can be sure that we have a
working Ruby configuration by the time the `gem_package` command is called.

The `gem_package` command itself works exactly the same way as it did when we
used it to install Bundler in the Ruby recipe, so there's nothing new to say
about it.

**Setting up a master configuration file**

```ruby
directory "/etc/god" do
  owner "root"
  group "root"
  mode  "0755"
end

file "/etc/god/master.conf" do
  owner    "root"
  group    "root"
  mode     "0644"
  notifies :restart, "service[god]"

  home     = node["practicingruby"]["deploy"]["home_dir"] 
  god_file = "#{home}/current/config/delayed_job.god"

  content "God.load('#{god_file}') if File.file?('#{god_file}')"
end
```

A master configuration file is typically used with God to load
all of the process-specific configuration files for a whole system 
when God starts up. In our case, we only have one process to watch, 
so our master configuration is a simple one-line shim that points at the
[delayed_job.god][pr-web-dj] file that is deployed alongside our Rails 
application.

Because our `/etc/god/master.conf` file is so trivial, we directly specify 
its contents in the recipe itself rather than using one of Chef's more
complicated mechanisms for dealing with configuration files. In this
particular case, manually creating the file would certainly involve
less work, but we'd lose some of the benefits that Chef is providing here.

In particular, it's worth noticing that file permissions and ownership
are explicitly specified in the recipe, that the actual location
of the file is configurable, and that Chef will send a notification
to restart God whenever this file changes. All of these things
are the sort of minor details that are easily forgotten when
manually managing configuration files on servers.

**Running god as a system service**

God needs to be running at all times, so we want to make sure that it started on
system reboot and cleanly terminated when the system is shut down. To do that, we
can configure God to run as an Upstart service. To do that, we need to create
yet another configuration file:

```ruby
cookbook_file "/etc/init/god.conf" do
  source "god.upstart"
  owner  "root"
  group  "root"
  mode   "0644"
end
```

The `cookbook_file` command used here is similar to the `file` command, but has a
specialized purpose: To copy files from a cookbook's `files` directory to
some location on the system being automated. In this case, we're
using the `files/default/god.upstart` cookbook file as our source, and it
looks like this:

```
description "God is a monitoring framework written in Ruby"

start on runlevel [2345]
stop on runlevel [!2345]

pre-start exec god -c /etc/god/master.conf
post-stop exec god terminate
```

Here we can see exactly what commands are going to be used to start and 
shutdown God, as well as the runlevels that it will be started and
stopped on. We can also see that the `/etc/god/master.conf` file we
created earlier will be loaded by God whenever it starts up.

Now all that remains is to enable the service to run when the system
boots, and also tell it to start up right now:

```ruby
service "god" do
  provider Chef::Provider::Service::Upstart
  action   [:enable, :start]
end
```

It's worth mentioning here that if we didn't explicitly specify the
`Service::Upstart` provider, Chef would expect the service
configuration file to be written as a [System-V init
script][god-init], which are written at a much lower level of abstraction. There
isn't anything wrong with doing things that way, but Upstart
scripts are definitely more readable. 

By this point, we've already seen how Chef can be used to install packages,
manage configuration files, run arbitrary shell commands, 
and set up system services. That knowledge alone will take you far,
but let's look at one more recipe to discover a few more 
advanced features before we wrap things up.

## A recipe for setting up an Nginx web server 

The recipe we use for configuring Nginx is the most complicated one in
Practicing Ruby's cookbook, but it mostly just combines and expands upon the
concepts we've already discussed. Try to see what you can
understand of it before reading the explanations that follow, but don't
worry if every last detail isn't immediately clear to you:

```ruby
node.set["nginx"]["worker_processes"]     = 4
node.set["nginx"]["worker_connections"]   = 768
node.set["nginx"]["default_site_enabled"] = false

include_recipe "nginx::default"

ssl_dir = ::File.join(node["nginx"]["dir"], "ssl")
directory ssl_dir do
  owner "root"
  group "root"
  mode  "0600"
end

domain_name = node["practicingruby"]["rails"]["host"]
bash "generate-ssl-files" do
  cwd   ssl_dir
  flags "-e"
  code <<-EOS
    DOM=#{domain_name}
    openssl genrsa -out $DOM.key 4096
    openssl req -new -batch -subj "/CN=$DOM" -key $DOM.key -out $DOM.csr
    openssl x509 -req -days 365 -in $DOM.csr -signkey $DOM.key -out $DOM.crt
    rm $DOM.csr
  EOS
  notifies :reload, "service[nginx]"
  not_if   { ::File.exists?(::File.join(ssl_dir, domain_name + ".crt")) }
end

template "#{node["nginx"]["dir"]}/sites-available/practicingruby" do
  source "nginx_site.erb"
  owner  "root"
  group  "root"
  mode   "0644"
  variables(:domain_name => domain_name)
end

nginx_site "practicingruby" do
  enable true
end
```

When you put all the pieces together, this recipe is responsible for the
following tasks:

1. Overriding some default Nginx configuration values.
2. Installing Nginx and managing it as a service.
3. Generating a self-signed SSL certificate based on a configurable domain name.
4. Using a template to generate a site-specific configuration file.
5. Enabling Nginx to serve up our Rails application.

In this recipe even more than the others we've looked at, a lot of the details
are handled behind the scenes. Let's dig a bit deeper to see what's really
going on.

**Installing and configuring Nginx**

We rely on the nginx cookbook to do most of the hard work of 
setting up our web server for us. Apart
from overriding a few default attributes, we only need to include the
`nginx:default` recipe into our own code to install the relevant software 
packages, generate an `nginx.conf` file, and to provide all the necessary
init scripts to manage Nginx as a service. The following four lines
of code take care of all of that for us:

```ruby
node.set["nginx"]["worker_processes"]     = 4
node.set["nginx"]["worker_connections"]   = 768
node.set["nginx"]["default_site_enabled"] = false

include_recipe "nginx::default"
```

The interesting thing to notice here is that unlike the typical server
configuration file, only the things we explicitly changed are visible here.
All the rest of the defaults are set automatically for us, and we don't
need to be concerned with their values until the time comes when we decide we
need to change them. By hiding all the details that do not matter to us,
Chef recipes tend to be much more intention revealing than
the typical server configuration file.

**Generating SSL keys**

In a real production environment, we would probably copy SSL credentials
into place rather than generating them on the fly. However, since
this particular cookbook provides a blueprint for building an experimental testbed
rather than an exact clone of our live system, we handle this task internally to make the system a little bit more developer-friendly.

The basic idea behind the following code is that we want to generate an SSL
certificate and private key for whatever domain name you'd like, so that 
it is possible to serve up the application over SSL within a virtualized 
staging environment. But since that is somewhat of an obscure use case, you
can focus on what interesting Chef features are being used
in the following code rather than the particular shell code being executed:

```ruby
ssl_dir = ::File.join(node["nginx"]["dir"], "ssl")
directory ssl_dir do
  owner "root"
  group "root"
  mode  "0600"
end

domain_name = node["practicingruby"]["rails"]["host"]
bash "generate-ssl-files" do
  cwd   ssl_dir
  flags "-e"
  code <<-EOS
    DOM=#{domain_name}
    openssl genrsa -out $DOM.key 4096
    openssl req -new -batch -subj "/CN=$DOM" -key $DOM.key -out $DOM.csr
    openssl x509 -req -days 365 -in $DOM.csr -signkey $DOM.key -out $DOM.crt
    rm $DOM.csr
  EOS
  notifies :reload, "service[nginx]"
  not_if   { ::File.exists?(::File.join(ssl_dir, domain_name + ".crt")) }
end
```

As you read through this code, you may have noticed that `::File` is used
instead of `File`, which looks a bit awkward. The problem here is that
Chef defines its own `File` class that ends up having a naming collision with
Ruby's core class. So to safely make use of Ruby's `File` class, we need to
explicitly do our constant lookup from the top-level namespace. This is just a
small side effect of how Chef's recipe DSL is implemented, but it is
worth noting to clear up any confusion.

With that distraction out of the way, we can skip right over the `directory`
code which we've seen in earlier recipes, and turn our attention to the `bash`
command and its options. This example is far more interesting than the one we
used to update RubyGems earlier, because in addition to specifying a command to
execute and a `not_if` guard clause, it also does all of the following things:

* Switches the working directory to the SSL directory we created within our Nginx directory.
* Sets the `-e` flag, which will abort the script if any command fails to run successfully.
* Uses a service notification to tell Nginx to reload its configuration files

From this we see that executing shell code via a Chef recipe isn't quite the
same thing as simply running some commands in a console. The entire surrounding
context is also specified and verified, making it a whole lot more likely
that things will work the way you expect them to. If these benefits were
harder to see in the Ruby installation recipe, they should be easier to
recognize now.

**Configuring Nginx to serve up Practicing Ruby**

Although the [nginx cookbook](https://github.com/opscode-cookbooks/nginx) takes care 
of setting up our `nginx.conf` file for us, it does not manage site 
configurations for us. We need to take care of that ourselves and
tweak some settings dynamically, so that means telling our
recipe to make use of a template:

```ruby
template "#{node["nginx"]["dir"]}/sites-available/practicingruby" do
  source "nginx_site.erb"
  owner  "root"
  group  "root"
  mode   "0644"
  variables(:domain_name => domain_name)
end
```

The [full template](https://github.com/elm-city-craftworks/practicing-ruby-cookbook/blob/master/templates/default/nginx_site.erb)
is a rather long file full of the typical Nginx boilerplate, but the small
excerpt below shows how it is customized using ERB to insert some dynamic
content:

```erb
server {
  listen 80;
  server_name <%= "#{@domain_name} www.#{@domain_name}" %>;
  rewrite ^ https://$server_name$request_uri? permanent;
}
```

Once the configuration file is generated and stored in the right place, we
enable it using the following command:

```ruby
nginx_site "practicingruby" do
  enable true
end
```

Under the hood, the [nxensite](https://github.com/Dreyer/nxensite) script is used 
to do the actual work of enabling the site, but that implementation detail is 
deliberately kept hidden from view.

At this point, we have studied enough features of Chef to establish a basic
literacy that will facilitate reading a wide range of recipes with only
a little bit of effort. At the very least, you now have enough
knowledge to make sense of every recipe in Practicing Ruby's cookbook.

## A cookbook for building a (mostly) complete Rails environment 

The goal of this article was to give you a sense of what kinds of building
blocks that Chef recipes are made up of so that you could see various
infrastructure automation concepts in practice. If you feel like you've
made it that far, you may now be interested in looking at how a complete
automation project is sewn together.

The full [Practicing Ruby cookbook][pr-cookbook] contains a total of eight recipes,
three of which we've already covered in this article. The five recipes
we did not discuss are responsible for handling the
following chores:

* Creating and managing a deployment user account to be used by Capistrano.
* Installing PostgreSQL and configuring a database for use with our Rails app.
* Configuring Unicorn and managing it as an Upstart service.
* Setting up some folders and files needed to deploy our Rails app.
* Installing and managing MailCatcher as a service, to make email testing easier.

If you are curious about how these recipes work, go ahead and read them! Many
are thin wrappers around external cookbook dependencies, and none of them use
any Chef features that we haven't already discussed. Attempting to
make sense of how these recipes work would be a great way to test your 
understanding of what we covered in this article.

If you want to take things a step farther, you can actually try to provision a
production-like environment for Practicing Ruby on your own system. The
cookbook's [README file](https://github.com/elm-city-craftworks/practicing-ruby-cookbook#readme) is fairly detailed, and we have things set up to work within a
virtual machine that can run in isolation without having a negative impact
on your own development environment. We also simplify a few things to make
setup easier, such as swapping out GitHub authentication for OmniAuth developer
mode, making most service integrations optional, and other little tweaks that
make it possible to try things out without having to do a bunch of 
configuration work.

I absolutely recommend trying to run our cookbook on your own to learn a whole
lot more about Chef, but fair warning: to do so you will need to become familiar
with the complex network of underplumbing that we intentionally avoided
discussing in this article. It's not too hard to work your way through, but
expect some turbulence along the way.

## Epilogue: What are the costs of infrastructure automation?

The process of learning from Practicing Ruby's cookbook, and the act
of writing this article really convinced me that I had greatly underestimated
the potential benefits that infrastructure automation has to offer. However, it
is important to be very clear on one point: there's no such thing as a 
free lunch.

At my current stage of understanding, I feel the same about Chef as I do about
Rails: impressed by its vast capabilities, convinced of its utility, and shocked
by its complexity. There are a tremendous amount of moving parts that you need
to understand before it becomes useful, and many layers of subsystems that need
to be wired up before you can actually get any of your recipes to run.

Another concern is that "infrastructure as code" comes with the drawbacks
associated with code and not just the benefits. Third-party cookbooks vary in
quality and sometimes need to be patched or hacked to get them to work the way
you want, and some abstractions are leaky and leave you doing some tedious work 
at a lower level than you'd want. Dependency management is also complicated: using external cookbooks means introducing at least one more fragile package 
installer into your life.

In the case of Chef in particular, it is also a bit strange that although its
interface is mostly ordinary Ruby code, it has developed in a somewhat parallel
universe where the user is assumed to know a lot about system administration,
and very little about Ruby. This leads to some design choices that aren't
necessarily bad, but are at least surprising to an experienced Ruby developer.

And as for infrastructure automation as a whole, well... it doesn't fully free
you from knowing quite a few details about the systems you are trying to manage.
It does allow you to express ideas at a higher level, but you still need to
be able to peel back the veneer and dive into some low level system
administration concepts whenever something doesn't work the way you expect it
would or doesn't support the feature you want to use via its high level
interface. In that sense, an automated system will not necessarily reduce
learning costs, it just has you doing a different kind of learning.

Despite all these concerns, I have to say that this is one skillset that I wish
I had picked up years ago, and I fully intend to look for opportunities
to apply these ideas in my own projects. I hope after reading this article,
you will try to do the same, and then share your stories about your experiences.

## Recommendations for further reading

Despite having a very complex ecosystem, the infrastructure automation world
(and especially the Chef community) have a ton of useful documentation that is
freely available and easy to get started with. Here are a few resources to try
out if you want to continue exploring this topic on your own:

* [Opscode Chef documentation](http://docs.opscode.com): The official Chef documentation; comprehensive and really well organized. 

* [Opscode public cookbooks](https://github.com/opscode-cookbooks): You can learn a lot by reading some of the most widely-used cookbooks in the Chef community. For complex examples, definitely check out the [apache2](https://github.com/opscode-cookbooks/apache2) and [mysql](https://github.com/opscode-cookbooks/mysql) cookbooks.

* [#learnchef](https://learnchef.opscode.com/): A collection of tutorials and screencasts designed to help you learn Chef.

* [Common Idioms in Chef Recipes](http://www.opscode.com/blog/2013/09/04/demystifying-common-idioms-in-chef-recipes/): Explanation of (possibly surprising) idioms that sometimes appear in recipe code.

* [Learning Chef](http://mlafeldt.github.io/blog/2012/09/learning-chef): A friendly introduction to Chef written by Mathias.

If you've got some experience with infrastructure automation and have found
other tutorials or articles that you like which aren't listed here, please leave
a comment. Mathias will also be watching the comments for this article, so
don't be afraid to ask any general questions you have about infrastructure
automation or Chef, too.

Thanks for making it all the way to the end of this article, and happy automating!

[puppet]: http://projects.puppetlabs.com/projects/puppet
[chef]: http://www.opscode.com/chef/
[pr-cookbook]: https://github.com/elm-city-craftworks/practicing-ruby-cookbook/tree/1.0.8
[pr-cookbook-attributes]: https://github.com/elm-city-craftworks/practicing-ruby-cookbook/blob/1.0.8/attributes/default.rb
[pr-web]: https://github.com/elm-city-craftworks/practicing-ruby-web
[chef-attributes]: http://docs.opscode.com/essentials_cookbook_attribute_files.html
[God]: http://godrb.com/
[god-init]: https://raw.github.com/elm-city-craftworks/practicing-ruby-cookbook/37ca12dc6432dfee955a70b6f2cc288e40782733/files/default/god.sh
[pr-web-dj]: https://github.com/elm-city-craftworks/practicing-ruby-web/blob/master/config/delayed_job.god


================================================
FILE: articles/v7/007-wumpus.md
================================================
 [Hunt the Wumpus][wikipedia] is a hide-and-seek game that takes place in an underground
cave network full of interconnected rooms. To win the game, the player
needs to locate the evil Wumpus and kill it while avoiding various different 
hazards that are hidden within in the cave.

Originally written by Gregory Yob in the 1970s, this game is traditionally
played using a text-based interface, which leaves plenty up to the
player's imagination, and also makes programming easier for those who
want to build Wumpus-like games of their own.

Because of its simple but clever nature, Hunt the Wumpus has been ported 
to many different platforms and programming languages over the last several
decades. In this article, you will discover why this blast from the past 
serves as an excellent example of creative computing, and you'll also 
learn how to implement it from scratch in Ruby.

## Gameplay demonstration

There are only two actions available to the player throughout the game: to move
from room to room, or to shoot arrows into nearby rooms in an attempt to kill 
the Wumpus. Until the player knows for sure where the Wumpus is, most of their actions 
will be dedicated to moving around the cave to gain a sense of its layout:

    You are in room 1.
    Exits go to: 2, 8, 5
    -----------------------------------------
    What do you want to do? (m)ove or (s)hoot? m
    Where? 2
    -----------------------------------------
    You are in room 2.
    Exits go to: 1, 10, 3
    -----------------------------------------
    What do you want to do? (m)ove or (s)hoot? m
    Where? 10
    -----------------------------------------
    You are in room 10.
    Exits go to: 2, 11, 9

Even after only a couple actions, the player can start to piece together
a map of the cave's topography, which will help them avoid getting lost
as they continue their explorations:

![](//i.imgur.com/5gCTOAt.png)

Play continues in this fashion, with the player wandering around until 
a hazard is detected:

    What do you want to do? (m)ove or (s)hoot? m
    Where? 11
    -----------------------------------------
    You are in room 11.
    Exits go to: 10, 8, 20
    -----------------------------------------
    What do you want to do? (m)ove or (s)hoot? m
    Where? 20
    -----------------------------------------
    You are in room 20.
    You feel a cold wind blowing from a nearby cavern.
    Exits go to: 11, 19, 17

In this case, the player has managed to get close
to a bottomless pit, which is detected by the presence of
a cold wind emanating from an adjacent room.

Because hazards are sensed indirectly, the player needs to use a deduction
process to know for sure which hazards are in what rooms. With the knowledge of
the cave layout so far, the only thing that is for certain is there is at least one
pit nearby, with both rooms 17 and 19 being possible candidates. One of them
might be safe, but there is also a chance that BOTH rooms contain pits.
In a literal sense, the player might have reached a dead end:

![](//i.imgur.com/D6aA2wl.png)

A risky player might chance it and try one of the two rooms, but
that isn't a smart way to play. The safe option is to 
backtrack in search of a different path through the cave:

    What do you want to do? (m)ove or (s)hoot? m
    Where? 11
    -----------------------------------------
    You are in room 11.
    Exits go to: 10, 8, 20
    -----------------------------------------
    What do you want to do? (m)ove or (s)hoot? m
    Where? 8
    -----------------------------------------
    You are in room 8.
    You smell something terrible nearby
    Exits go to: 11, 1, 7

Changing directions ends up paying off. Upon entering room 8,
the terrible smell that is sensed indicates that the Wumpus is nearby,
and because rooms 1 and 11 have already been visited, there
is only one place left for the Wumpus to be hiding:

    What do you want to do? (m)ove or (s)hoot? s
    Where? 7
    -----------------------------------------
    YOU KILLED THE WUMPUS! GOOD JOB, BUDDY!!!

At the end of the hunt, the player's map ended up looking like this:

![](//i.imgur.com/IZnqNNw.png)

In less fortunate circumstances, the player would need to do a lot more
exploration before they could be certain about where the Wumpus 
was hiding. Other hazards might also be encountered, including giant bats 
that are capable of moving the player to a random location in the cave.
Because all these factors are randomized in each new game, Hunt the Wumpus
can be played again and again without ever encountering an identical
cave layout.

We will discuss more about the game rules throughout the rest of this
article, but the few concepts illustrated in this demonstration are more 
than enough for us to start modeling some of the key game objects.
Let's get to work!

## Implementing "Hunt the Wumpus" from scratch

Like many programs from its era, Hunt the Wumpus was designed to 
be hackable. If you look at one of the [original publications][atari]
about the game, you can see that the author actively encourages
tweaking its rules, and even includes the full source code 
of the game.

Before you rush off to study the original implementation, remember that 
it was written four decades ago in BASIC. Unless you consider yourself
a technological archaeologist, it's probably not the best way to
learn about the game. With that in mind, I've put together a learning
exercise that will guide you through implementing some of the core 
game concepts of Hunt the Wumpus -- without getting bogged down in
specific game rules or having to write boring user interface code.

In particular, I want you to implement three classes that I have 
already written the tests for:

1. A `Wumpus::Room` class to manage hazards and connections between rooms
2. A `Wumpus::Cave` class to manage the overall topography of the cave
3. A `Wumpus::Player` class that handles sensing and encountering hazards

You can work through this exercise by [cloning its git repository][wumpus-diy], 
and following the instructions in the README. I have put the tests for each 
class on its own branch, so that you can merge them into your own code 
one at a time until you end up with a complete passing test suite.

Once these three classes are written, you'll be able to use my UI code 
and game logic to play a rousing round of Hunt the Wumpus. You'll
also be able to compare your own work to my [reference implementation][wumpus-ref]
of the game, and discuss any questions or thoughts with me about
the differences between our approaches.

Throughout the rest of this article, I will provide design and implementation
notes for each class, as well as a brief overview of how the game rules for
Hunt the Wumpus can be implemented using these objects. These notes
should help you interpret what the test suite is actually asking
you to build, and will also help you understand my reference
implementation.

> **NOTE:** If you're short on time or aren't in the mood for hacking
right now, you can still get a lot out of this exercise by simply 
thinking about how you'd write the code to pass the provided test 
suite, and then looking my implementation. But it's definitely
better to at least *try* to write some code yourself, even
if you don't complete the full exercise.

## Modeling rooms 

Structurally speaking, rooms and their connections form a simple undirected graph:

![](//i.imgur.com/p81T0Gn.png)

Our `Room` class will manage these connections, and also make it easy 
to query and manipulate the hazards that can be found in a room --
including bats, pits, and the wumpus itself. In particular, we will
build an object with the following attributes and behaviors:

```ruby
describe "A room" do
  it "has a number"
  it "may contain hazards"

  describe "with neighbors" do
    it "has two-way connections to neighbors"
    it "knows the numbers of all neighboring rooms"
    it "can choose a neighbor randomly"
    it "is not safe if it has hazards" 
    it "is not safe if its neighbors have hazards"
    it "is safe when it and its neighbors have no hazards"
  end
end
```

Let's walk through each of these requirements individually and fill
in the necessary details.

1) Every room has an identifying number that helps the player keep 
track of where they are:

```ruby
describe "A room" do
  let(:room) { Wumpus::Room.new(12) }

  it "has a number" do
    room.number.must_equal(12)
  end

  # ...
end
```

2) Rooms may contain hazards, which can be added or removed as the 
game progresses:

```ruby
it "may contain hazards" do 
  # rooms start out empty
  assert room.empty?

  # hazards can be added
  room.add(:wumpus)
  room.add(:bats)

  # a room with hazards isn't empty
  refute room.empty?

  # hazards can be detected by name
  assert room.has?(:wumpus)
  assert room.has?(:bats)

  refute room.has?(:alf)

  # hazards can be removed
  room.remove(:bats)
  refute room.has?(:bats)
end
```

3) Each room can be connected to other rooms in the cave:

```ruby
describe "with neighbors" do
  let(:exit_numbers) { [11, 3, 7] }

  before do
    exit_numbers.each { |i| room.connect(Wumpus::Room.new(i)) }
  end

   # ...
end
```

4) One-way paths are not allowed, i.e. all connections between rooms are
bidirectional:

```ruby
it "has two-way connections to neighbors" do
  exit_numbers.each do |i| 
    # a neighbor can be looked up by room number
    room.neighbor(i).number.must_equal(i)

    # Room connections are bidirectional
    room.neighbor(i).neighbor(room.number).must_equal(room)
  end
end
```

5) Each room knows all of its exits, which consist of
all neighboring room numbers:

```ruby
it "knows the numbers of all neighboring rooms" do
  room.exits.must_equal(exit_numbers)
end
```

6) Neighboring rooms can be selected at random, which is
useful for certain game events:

```ruby
it "can choose a neighbor randomly" do
  exit_numbers.must_include(room.random_neighbor.number)
end
```

7) A room is considered safe only if there are no hazards within it
or any of its neighbors:

```ruby
it "is not safe if it has hazards" do
  room.add(:wumpus)

  refute room.safe?
end

it "is not safe if its neighbors have hazards" do
  room.random_neighbor.add(:wumpus)

  refute room.safe?
end

it "is safe when it and its neighbors have no hazards" do
  assert room.safe?
end
```

**Implementation notes**

Because this object only handles basic data tranformations, it
shouldn't be hard to implement. But if you get stuck, you
can always look at [my version of the Wumpus::Room class][wumpus-room].

## Modeling the cave

Although a game of Hunt the Wumpus can be played with an arbitrary cave layout,
the traditional Wumpus cave is based on the [dodecahedron][]. To
model things this way, a room is placed at each vertex, and the edges form
the connections between rooms. If you squash the structure to fit in a
two-dimensional space, you end up with the following graph:

![](//i.imgur.com/Myxk4vS.png)

Even though it would be technically possible to construct this structure without
a collection object by connecting rooms together in an ad-hoc fashion,
traversing the structure and manipulating it would be cumbersome. For that
reason, we will build a `Wumpus::Cave` object with the following properties:

```ruby
describe "A cave" do
  it "has 20 rooms that each connect to exactly three other rooms" 
  it "can select rooms at random"
  it "can move hazards from one room to another"
  it "can add hazards at random to a specific number of rooms"
  it "can find a room with a particular hazard"
  it "can find a safe room to serve as an entrance"
end
```

Some of these features a bit tricky to explain comprehensively through
tests, but the following examples should give you a basic idea of
how they're meant to work.

1) The cave has 20 rooms, and each room is connected to exactly 
three other rooms:

```ruby
describe "A cave" do
  let(:cave)  { Wumpus::Cave.dodecahedron }
  let(:rooms) { (1..20).map { |i| cave.room(i) } }

  it "has 20 rooms that each connect to exactly three other rooms" do
    rooms.each do |room|
      room.neighbors.count.must_equal(3)
      
      assert room.neighbors.all? { |e| e.neighbors.include?(room) }
    end
  end
end
```

The intent here is to loosly verify that the layout is dodecahedron 
shaped, but it is more of a sanity check than a strict validation.
A stronger check would require us to compute things like minimal
cycles for each point, which would make for a much more 
complicated test.

In my implementation I use a JSON file that hard-codes the 
connections between each room explicitly rather than trying to 
automatically generate the layout, so this test is mostly just to catch errors 
with that configuration file. If you reuse the [dodecahredon.json][json] 
file in your own code, it should make passing these tests easy.

2) Rooms in the cave can be selected randomly:

```ruby
it "can select rooms at random" do
  sampling = Set.new

  # see test/helper.rb for how this assertion works
  must_eventually("randomly select each room") do
    new_room = cave.random_room 
    sampling << new_room

    sampling == Set[*rooms] 
  end
end
```

This feature is important for implementing the behavior of giant bats, who move
the player to a random location in the cave. It is also useful for hazard
placement, as we'll see later. The way I test the behavior is a bit awkward,
but the basic idea is that if you keep selecting rooms at random, you'll
eventually hit every room in the cave.

3) Hazards can be moved from one room to another:

```ruby
it "can move hazards from one room to another" do
  room      = cave.random_room
  neighbor  = room.neighbors.first

  room.add(:bats)

  assert room.has?(:bats)
  refute neighbor.has?(:bats)

  cave.move(:bats, :from => room, :to => neighbor)

  refute room.has?(:bats)
  assert neighbor.has?(:bats)
end
```

This test shows bats being moved from a random room to
one of its neighbors, but `Cave#move` can used to move any hazard
between any two rooms in the cave, even if they are not
adajecent to each other.

4) Hazards can be randomly distributed throughout the cave:

```ruby
it "can add hazards at random to a specific number of rooms" do
  cave.add_hazard(:bats, 3)

  rooms.select { |e| e.has?(:bats) }.count.must_equal(3)
end
```

For the most part, the work to be done here is just to pick
some rooms at random and add hazards
to them. However, because there is no sense in adding a single
type of hazard to a room more than once, `Cave#add_hazard`
should take care to select only rooms that do not already have
the specified hazard in them. This is hinted at by the specs,
but because the check is a loose one, just keep this detail
in mind while implementing this method.

5) Rooms can be looked up based on the hazards they contain:

```ruby
it "can find a room with a particular hazard" do
  cave.add_hazard(:wumpus, 1)

  assert cave.room_with(:wumpus).has?(:wumpus)
end
```

In my implementation, I just grab the first room that matches the 
criteria, but any matching room would be acceptable. It
would also make sense to have a `Cave#all_rooms_with` method, but it isn't needed for a basic implementation
of the game.

6) A safe entrance can be located:

```ruby
it "can find a safe room to serve as an entrance" do
  cave.add_hazard(:wumpus, 1)
  cave.add_hazard(:pit, 3)
  cave.add_hazard(:bats, 3)

  entrance = cave.entrance

  assert entrance.safe?
end
```

This is where the `Wumpus::Room#safe?` method comes in handy. Picking any room
that passes that condition is enough to get the job done here.

**Implementation notes**

The desired behavior of the `Wumpus::Cave` class is admittedly a bit
underspecified here, but in many cases minor variations won't effect
gameplay all that much. Some of these operations are also intentionally 
a bit more general than what is strictly needed for the game, to permit 
some experimentation with rule changes once you have a working implementation. 

This was a challenging object for me to design and test, because many 
of the features which are intuitively obvious are hard to specify 
formally. Do the best you can with building it, and refer
to [my implementation of the Wumpus::Cave class][wumpus-cave] whenever 
you hit any snags.

## Modeling the player

Despite the complexity of the cave layout, most game events in 
Hunt the Wumpus are triggered by local conditions based on the 
player's current room and its direct neighbors. For example, 
imagine that the player is positioned in Room #1 as shown in 
following diagram:

![](//i.imgur.com/A0e5pMn.png)

With this setup, the player would sense the nearby hazards,
resulting in the following output:

    You are in room 1.
    You hear a rustling sound nearby
    You smell something terrible nearby
    Exits go to: 2, 3, 4

Ordinarily we'd need to do some investigation work to discover which hazards
were where, but because this is a contrived scenario, we don't 
need to guess. Knowing the layout of the neighborhood, we can enumerate the 
possible outcomes for any player action:

* The player will encounter the wumpus upon moving into room 2.
* The player will encounter bats upon moving into room 3.
* The player will not encounter any hazards in room 4.
* The player can shoot into room 2 to kill the wumpus.
* The player will miss the wumpus by shooting into room 3 or 4.

If you take this single example and generalize it, you'll find that every turn
of Hunt the Wumpus involves only three distinct kinds of events:

```ruby
describe "the player" do
  it "can sense hazards in neighboring rooms" 
  it "can encounter hazards when entering a room"
  it "can perform actions on neighboring rooms" 
end
```

With these requirements in mind, it is possible for us to model 
the `Wumpus::Player` class as an event-driven object that handles 
each event type listed above. The only state it needs to explicitly
maintain is a reference to the room currently being explored: everything 
else can be managed externally through callbacks. You'll see why this
is useful when we look at how the game rules are implemented later,
but for now just try to follow along as best as you can.

The test setup for the `Wumpus::Player` class is a bit complicated, mostly 
because we need to reconstruct something similar to the layout shown in the
previous diagram in order to meaningfully test its behavior:

```ruby
describe "the player" do
  let(:player) { Wumpus::Player.new }

  let(:empty_room) { Wumpus::Room.new(1) }

  let(:wumpus_room) do
    Wumpus::Room.new(2).tap { |e| e.add(:wumpus) }
  end

  let(:bat_room) do
    Wumpus::Room.new(3).tap { |e| e.add(:bats) }
  end

  # ...
end
```


In addition to wiring up some rooms, I also register all of the events we're 
interested in tracking during setup, using some dummy callbacks that are
meant to serve as stand-ins for real game logic. This is not an
elegant way of building a test harness, but it gets the job done:

```ruby
let(:sensed)      { Set.new }
let(:encountered) { Set.new }

before do
  empty_room.connect(bat_room)
  empty_room.connect(wumpus_room)

  player.sense(:bats) do
    sensed << "You hear a rustling"
  end

  player.sense(:wumpus) do
    sensed << "You smell something terrible"
  end

  player.encounter(:wumpus) do
    encountered << "The wumpus ate you up!"
  end

  player.encounter(:bats) do
    encountered << "The bats whisk you away!"
  end

  player.action(:move) do |destination|
    player.enter(destination)
  end
end
```

Once all of that is taken care of, the callbacks can be tested in isolated
scenarios:

```ruby
it "can sense hazards in neighboring rooms" do
  player.enter(empty_room)
  player.explore_room

  sensed.must_equal(Set["You hear a rustling", "You smell something terrible"])
  
  assert encountered.empty?
end

it "can encounter hazards when entering a room" do
  player.enter(bat_room)
  encountered.must_equal(Set["The bats whisk you away!"])
  
  assert sensed.empty? 
end

it "can perform actions on neighboring rooms" do
  player.act(:move, wumpus_room)
  player.room.must_equal(wumpus_room)

  encountered.must_equal(Set["The wumpus ate you up!"])
  assert sensed.empty?
end
```

These test cases verify that the right callbacks have been called
by manipulating simple sets of strings, but the real use case for 
the `Wumpus::Player` class is to trigger  operations on 
game objects as well as the user interface. If you are having
trouble imagining what that would look like, it may help to 
read ahead a bit further before attempting to get these 
tests to pass.

**Implementation notes:**

Like the `Wumpus::Cave` class, this object is underspecified, but you probably
don't need to build something identical to [my implementation of Wumpus::Player][wumpus-player]
in order to get the game to run. However, you may want to make an effort
to ensure that callbacks are triggered in the order that they are registered,
otherwise you can run into some interesting edge cases when more than one
condition is satisfied at the same time.

## Defining the game rules

With a foundation in place, implementing the game logic for Hunt the
Wumpus is very easy. My version of the game simplifies the rules, but
hopefully still captures the spirit of the original.  

As you walk through the following code, you can treat the
`Wumpus::Narrator` object as a black box. This is a boring object that
only does some basic I/O under the hood, so your time
would be better spent focusing on the game logic.

With that caveat out of the way, let's take a look at how Hunt the Wumpus can be
implemented in terms of the three game objects we just built. To get started, we
need a cave!

```ruby
cave = Wumpus::Cave.dodecahedron
```

This cave will contain three pits, three giant bats, and the most evil and
stinky Wumpus you could ever imagine:

```ruby
cave.add_hazard(:wumpus, 1)
cave.add_hazard(:pit, 3)
cave.add_hazard(:bats, 3)
```

We also need a player to navigate the cave, and a narrator to regale us with
tales about the player's adventures:

```ruby
player    = Wumpus::Player.new
narrator  = Wumpus::Narrator.new
```

Whenever a player senses a hazard nearby, the narrator will give us a hint
of what kind of trouble lurks just around the bend:

```ruby
player.sense(:bats) do
  narrator.say("You hear a rustling sound nearby") 
end

player.sense(:wumpus) do
  narrator.say("You smell something terrible nearby")
end

player.sense(:pit) do
  narrator.say("You feel a cold wind blowing from a nearby cavern.")
end
```

If upon entering a room the player encounters the Wumpus, it
will become startled. We'll discuss the detailed consequences
of this later, but the basic idea is that it will cause the
Wumpus to either run away to an adjacent room, or to gobble
the player up:

```ruby
player.encounter(:wumpus) do
  player.act(:startle_wumpus, player.room)
end
```

When bats are encountered, the narrator will inform us of
the event, then a random room will be selected to drop
the player off in. If any hazards are encountered
in that room, the effects will be applied immediately,
possibly leading to the player's demise.

But assuming that the player managed to survive the flight, 
the bats will take up residence in the new location. This
can make navigation very complicated, because stumbling
back into that room will cause the player to be moved
to yet another random location:

```ruby
player.encounter(:bats) do
  narrator.say "Giant bats whisk you away to a new cavern!"

  old_room = player.room
  new_room = cave.random_room

  player.enter(new_room)

  cave.move(:bats, from: old_room, to: new_room)
end
```

If the player happens to come across a bottomless pit, the
story ends immediately, even though the player's journey
will probably go on forever:

```ruby
player.encounter(:pit) do
  narrator.finish_story("You fell into a bottomless pit. Enjoy the ride!")
end
```

The player's actions are what ultimately ends up triggering game events. 
The movement action is straightforward: it simply updates the player's
current location and then fires callbacks for any hazards encountered:

```ruby
player.action(:move) do |destination|
  player.enter(destination)
end
```

Shooting is more complicated, although the way it is implemented here
is still a simplification of how the original game worked. In Gregory Yob's
version, you had only five arrows, but they could travel a distance of up to
five rooms, even shooting around corners if you knew the right path. In my
version, arrows are unlimited but can only fire into neighboring rooms.

If the player shoots into the room that the Wumpus is hiding in, the beast 
is slayed and the story ends happily ever after. If instead the player shoots
into the wrong room, then no matter where the Wumpus is in the cave, it will 
be startled by the sound.

```ruby
player.action(:shoot) do |destination|
  if destination.has?(:wumpus)
    narrator.finish_story("YOU KILLED THE WUMPUS! GOOD JOB, BUDDY!!!") 
  else
    narrator.say("Your arrow missed!")

    player.act(:startle_wumpus, cave.room_with(:wumpus))
  end
end
```

When the Wumpus is startled, it will either stay where it is or move into
one of its neighboring rooms. The player will be able to hear the Wumpus
move anywhere in the cave, even if it is not in a nearby room.

If the Wumpus is in the same room as the player at the end of this process,
it will gobble the player up and the game will end in sadness and tears:

```ruby
player.action(:startle_wumpus) do |old_wumpus_room|
  if [:move, :stay].sample == :move
    new_wumpus_room = old_wumpus_room.random_neighbor
    cave.move(:wumpus, from: old_wumpus_room, to: new_wumpus_room)

    narrator.say("You heard a rumbling in a nearby cavern.")
  end

  if player.room.has?(:wumpus)
    narrator.finish_story("You woke up the wumpus and he ate you!")
  end
end
```

And that pretty much sums it up. I omitted a few lines of boilerplate
code that fire up the main event loop, but this pretty much covers
all of the code that implements the actual game rules. It is designed
to be very hackable, so please do experiment with it however you'd like.

If you want to review the full game executable without the intermingled
commentary, please see [the bin/wumpus script][wumpus-script].

## Additional Exercises

Hopefully by working through this article you've seen for yourself why Hunt the
Wumpus is both fun to play and fun to implement. If you are looking for more
things to try, I'd suggest the following activities:

* Limit the number of arrows that the player can shoot, and end the game when
the player runs out of arrows.

* Try implementing the "crooked arrow" behavior of the original Wumpus game. To
do this allow the player to specify a path of up to five rooms. Whenever the
player guesses an incorrect path, have the arrow to bounce into a random room.
If the arrow ends up hitting the player because of this, they lose!

* Make it harder to guess the connections between rooms by randomizing
the room numbers for each new game while keeping the overall shape the same.

* Try out one of the alternative cave layouts described in Gregory Yob's
followup publication about [Wumpus 2][atari-2].

* Add new hazards of your own, or other types of game objects that
are beneficial, or provide some more depth to the story.

* Implement a solver bot that plays the game automatically.

* Build a better user interface for the game, either improving the text-based
UI or attempting something using a GUI or web-based interface. You should
only need to edit the `Wumpus::Narrator` and `Wumpus::Console` objects
in order to replace the current interface.

* Keep the game behavior the same, but try out a different design than the one
I provided here and/or improve the test suite.

If you try out any of these extra credit exercises, please share your work. I'd
be very interested to see what you come up with. Until then, happy hacking!

[atari]: http://www.atariarchives.org/bcc1/showpage.php?page=247
[atari-2]: http://www.atariarchives.org/bcc2/showpage.php?page=244
[wumpus-ref]: https://github.com/elm-city-craftworks/wumpus/tree/reference_implementation
[wumpus-diy]: https://github.com/elm-city-craftworks/wumpus
[wumpus-room]: https://github.com/elm-city-craftworks/wumpus/blob/reference_implementation/lib/wumpus/room.rb
[wumpus-cave]: https://github.com/elm-city-craftworks/wumpus/blob/reference_implementation/lib/wumpus/cave.rb
[wumpus-player]: https://github.com/elm-city-craftworks/wumpus/blob/reference_implementation/lib/wumpus/player.rb
[wumpus-script]: https://github.com/elm-city-craftworks/wumpus/blob/reference_implementation/bin/wumpus
[wikipedia]: http://en.wikipedia.org/wiki/Hunt_the_Wumpus
[dodecahedron]: http://en.wikipedia.org/wiki/Dodecahedron
[json]: https://raw.github.com/elm-city-craftworks/wumpus/reference_implementation/data/dodecahedron.json


================================================
FILE: articles/v7/008-language-learning.md
================================================
[ ] Day 1 review
[ ] Day 2 review
[ ] Day 3 review
[ ] Day 4 review
[ ] Day 5 review
[ ] Explain four step system and projects
[ ] Add explanations of code
[ ] Connect togther narrative


----------

http://alistapart.com/article/writing-is-thinking

First of all, why did this excerpt from your experience stand out to you, personally? 
Was this the moment something clicked for you regarding your work?

Secondly, why do you think things turned out the way they did? Were you surprised? 
Do you do things differently now as a result? When you spell this out, 
it’s the difference between journaling for yourself and writing for an audience.

Finally, is this something others in your line of work are prone to miss? 
Is it a rookie error, or something more like an industry-wide oversight? 
If you’ve tried to search online for similar opinions, do you get a lot 
of misinformation? Or is the good information simply not in a place
where others in your field are likely to see it?

----------

**Summarize my preliminaries and the four step structure, then make
case study presentation chronological but free-form narrative 
(i.e. don't put in clear headers for exercise, book, project, etc, make it 
more like a journal -- give the reader a 'riding shotgun' view of the
action, with a brief summary at the end of each day. Include evolution
in thought process and new realizations as I go. Wrap up with a where
to next section.**

** Look up dates for everything to linearize it, but don't be afraid to do mild
editing / resequencing for clarity**

## What is trivial code literacy?

**Starting with fizzbuzz may work, but consider condensing or replacing with a
shorter anecdote (the chinese one?)**

(rewrite to be a bit more positive, and to illustrate
Ruby vs. Erlang, and Reading vs. Writing)

Programmers often joke about how ridiculous it is to use the FizzBuzz problem as
a screening test, because it is so easy to solve. Derived from a word
game that's used to test the division skills of small children, FizzBuzz is
about as conceptually trivial as computing challenges get:

> Write a program that prints the numbers from 1 to 100. But for multiples 
of three print “Fizz” instead of the number and for the multiples of 
five print “Buzz”. For numbers which are multiples of both three and 
five print “FizzBuzz”.

Any working programmer or hobbyist that's built absolutely any software could
solve this problem in their sleep, as long as they were allowed to use a language 
they were already comfortable with. But suppose the interviewer asked a Ruby
programmer who had never worked in a functional programming language before to 
produce an Erlang solution instead. What would that programmer need to learn in
order to pass the test? Let's figure that out by working backwards from the 
following solution:

```erlang
-module(fizzbuzz).
-export([run/0]).

run() -> 
  io:format("~p~n", [lists:map(fun transform/1, lists:seq(1,100))]).

transform(X) when X rem 15 =:= 0 -> "FizzBuzz";
transform(X) when X rem 5  =:= 0  -> "Fizz";
transform(X) when X rem 3  =:= 0  -> "Buzz";
transform(X) -> X.
```

Let's assume that simply producing the source code shown above wouldn't be good
enough to pass the test, the programmer would actually need to execute it and
show that it produces the correct output, too. Right out of the gate, that means
understanding how to install Erlang, compile Erlang modules, and then call
functions on them. If that was all done successfully, the programmer might
produce something like the following output:

```erlang
$ erl
1> c(fizzbuzz).
{ok,fizzbuzz}
2> fizzbuzz:run().
[1,2,"Buzz",4,"Fizz","Buzz",7,8,"Buzz","Fizz",11,"Buzz",13,14,"Fizzbuzz",16,
 17,"Buzz",19,"Fizz","Buzz",22,23,"Buzz","Fizz",26,"Buzz",28,29,"Fizzbuzz",31,
 32,"Buzz",34,"Fizz","Buzz",37,38,"Buzz","Fizz",41,"Buzz",43,44,"Fizzbuzz",46,
 47,"Buzz",49,"Fizz","Buzz",52,53,"Buzz","Fizz",56,"Buzz",58,59,"Fizzbuzz",61,
 62,"Buzz",64,"Fizz","Buzz",67,68,"Buzz","Fizz",71,"Buzz",73,74,"Fizzbuzz",76,
 77,"Buzz",79,"Fizz","Buzz",82,83,"Buzz","Fizz",86,"Buzz",88,89,"Fizzbuzz",91,
 92,"Buzz",94,"Fizz","Buzz",97,98,"Buzz","Fizz"]
```

But imagine the interviewer was not convinced by this alone, and wanted the
programmer to walk through the code statement-by-statement and explain it.

```erlang
-module(fizzbuzz).
```

This code defines the module name, but it also implies what the filename should
be (`fizzbuzz.erl`). If the programmer didn't know the two must match,
autoloading would not work correctly when the Erlang `c()` shell command 
was used.

```erlang
-export([run/0]).
```

This code is necessary to make it possible to call the `fizzbuzz:run()` function
externally, because all Erlang functions are private by default. The programmer
would also need to explain that `run/0` means 
"the run function with zero arguments", demonstrating an understanding of the
concept of *function arity*.

```erlang
run() -> 
  io:format("~p~n", [lists:map(fun transform/1, lists:seq(1,100))]).
```

This line of code has a ton of features crammed into it, including calls to both
the `io` and `lists` standard libraries, along with the syntax for passing
an existing function as an argument to another function 
(e.g. `fun transform/1`).

```erlang
transform(X) when X rem 15 =:= 0 -> "FizzBuzz";
transform(X) when X rem 5  =:= 0  -> "Fizz";
transform(X) when X rem 3  =:= 0  -> "Buzz";
transform(X) -> X.
```

Finally, we see function overloading and guards, both concepts that don't exist
in Ruby, but are commonly used in Erlang.

When you add up all of these points, it takes a whole lot of knowledge for even
an experienced programmer to write such a trivial program in a language they're
unfamiliar with. When you throw in things like familiarizing yourself with new
syntax and grammar rules, it becomes easy to see that trivial code literacy
demands a whole lot more understanding of a language than it appears to
at a first glance.


```
Start every day coding, end every day thinking.

1. Warmup exercise (30 mins)

Make sure to have these ready the night before, pick stuff
that you can work on right away without having to study 
in advance. 

They can either be book exercises or stuff
from other sources, but they should be self-verifiable
for correctness. Goal is not to finish but just to 
learn as much as possible.

2. Book reading and exercises (90 mins)

Jumping around chapters is OK, but reading whole chapters
at a time is encouraged. Read what is most related to
the projects you're working on. 

Give reading a higher priority over exercises during this 
time, and do only exercises related to the current reading.

3. Work on projects (90 mins)

Start with bowling score calculator, then dining philosophers,
then IRC rover bot if time permits. Focus on getting working
code first, before worrying about correct code. But once you
have a working solution, figure out how to make it right,
and get help if necessary for style questions.

4. Review today's work and do next day's prep work (30 mins)

Prepare questions, TODO lists, and exercises for the next
day. Reflect on what was learned today, possibly looking
up tangential points that you didn't have time for in the
day, or seek solutions to exercises already published online,
or write notes asking for help. 

Try to do all four hours in a single day if possible,
otherwise try to do 1+2 and 3+4 in two sessions as close
together as possible.
```

Projects summary (what and why)
-----------------

Book learning
-------------

Typed in, ran, and tinkered with nearly every code snipped from 
CH 1 to CH 14. (First several chapters before the "practice week"
brought me up to fizz-buzz level knowledge, skipped a couple 
chapters, but wrote dozens of functions)

Went off on several tangents (find a couple examples)

Find some book learning highlights from each day, and note tie-ins with
exercises / projects.

(what's shown in this article is actually just a small fraction
of the code I typed, including all book snippets and most exercises
(get a count or rough estimate)

Maybe show the fizzbuzz example?

Daily Review
------------

Cover this by adding a wrap-up paragraph or two at the end of each day entry.
Summarize the most important lessons learned, the pitfalls and triumphs,
and what I had planned to do the next day.

Next actions
------------

What did I leave undone? What could I have done next?

* Process termination still not 100% clear to me
* I still suck at concurrency concepts

Wrapup
------

Re-state the four step system and its benefits in a couple paragraphs,
invite others to try it.

Learning is cyclical. Always go back and see how your new knowledge might have
been applied to old problems, particularly wherever you struggled before.


Raw journal notes + checklists
-------------------------------

Share, don't share, summarize?


Preliminaries: December 26 - Jan 5 (12 hrs)
-------------------------------------------

Summarize what was studied / learned during this time period.

Maybe create a bulleted list of "What I already know about erlang"
based on Ch 1-6 and my prior knowledge.


* structures: Atoms, Integers, Floats, Tuples, Lists, Records, Strings(`*`)
* constructs: Modules, annotations, functions.
* workflow: shell, compilation
* pattern matching, list comprehensions, guards, recursive coding, 
  single assignment
* Using io:format to print out output
* Basic error handling
* Standard library features like `erlang:*` and `lists:*` 

(Probably more)

Consider showing fizzbuzz example for this.

Day 1: January 6 (Monday)
-------------------------------------------

### Finding the smallest element of a list

Good refresheer on pattern matching and recursive coding style (from Journal)

Original solution:

```erlang
-module(mylists).
-export([minimum/1]).

minimum([H|T]) -> minimum(T, H).

minimum([],    Min) -> Min;
minimum([H|T], Min) -> 
  case Smallest < H of
    true  -> minimum(T, Min);
    false -> minimum(T, H)
  end.
```

In retrospect:

```erlang
-module(mylists).
-export([minimum/1]).

minimum([Min|T]) -> minimum(T, Min).

minimum([], Min) -> Min;

minimum([H|T], Min) when Min < H -> minimum(T, Min);

minimum([Min|T], _) -> minimum(T, Min).
```

## (Almost working ping-pong) 

I didn't have much to go on yet, but the file server example and hello world
example were useful in preparing this. It's always good to save all exercises
you work on / projects you work on because they become your library for
looking up features in context.

Almost works, but has a bug in it! Deal with that later.

```erlang
-module(ping_pong).
-export([start/1, loop/1]).

start(Message) -> spawn(ping_pong, loop, [Message]).

loop(Message) ->
  receive
    {Client, N} ->
      io:format("~p Received: ~s~n", [self(), Message]),

      %% FIXME: Find out why this isn't working! %%
      case N of
        1    -> Client ! { self(), N -1 }, exit(self(), ok);
        0    -> exit(self(), ok);
        true -> Client ! { self(), N - 1 }
      end
  end,
  loop(Message).
```

## Reading notes

Already read Ch 1-6 (skimmed 5) during preliminaries doing most of their
exercises. Decided to skip Ch 7 on binary processing, since none of my projects
would need it.

Focus for the day is on Ch 8, a misc. grab bag of erlang features. 

> TODO: Try to find features introduced in this chapter and point them out
if they're shown in other code samples, particularly in this day's project
or the next day's warmup exercise.

Possible points of interest:

* Code loading (pp124-126)
* Macros / preprocessor
* Annotations
* Non-short circuiting logic
* List subtraction
* Process dictionary

## Bowling

Discuss the first data modeling challenges. Note that at this point, I'm not
even sure what the differences between lists and tuples are (see the journal).

Note how pattern matching makes non-uniformity less awkward than in Ruby
(e.g. `{10}` vs `{A, B}`)

Note how I got hung up on `true -> ...` in case for a while, but eventually
came to realize that catchall would be something like `_ -> ...`. Discovering
the same bug in this code helped me realize what I'd need to do to fix
the other ping-pong code, but only after sleeping on it (maybe split
this explanation up into two parts, one in today's review, the other in the
next day). 

Note how it was fun to write simple unit tests this way, if brittle.

```erlang
-module(bowling).
-export([score/1, test/0]).

score([]) -> 0;
score([H|T]) -> 
  case H of
    {A} ->

      [H1|T1] = T,

      case H1 of
        { B, C } -> A + B + C + score(T);
        { B } -> 
          case T1 of
            []    -> 0;
            _ ->
              [H2|_] = T1,
              case H2 of
                { C, _ } -> A + B + C + score(T);
                { C }    -> A + B + C + score(T)
              end
          end
      end;

    {A, B} when 10 =:= A + B ->
      [H1|_] = T,

      case H1 of
        {C, _} -> A + B + C + score(T);
        {C}    -> A + B + C + score(T)
      end;


    {A, B} -> H, A + B + score(T)
  end.

test() -> 
  9  = score([{7, 2}]), 

  %            4     6     8     9     5     0     3     1     0     4
  40 = score([{1,3},{2,4},{3,5},{5,4},{2,3},{0,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8     S:12  5     0     3     1     0     4
  43 = score([{1,3},{2,4},{3,5},{5,5},{2,3},{0,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8     S:15 5     0     3     1     0     4
  46 = score([{1,3},{2,4},{3,5},{10},{2,3},{0,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8     S:14  S:11  1     3     1     0     4
  52 = score([{1,3},{2,4},{3,5},{5,5},{4,6},{1,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8    S:21 S:13    3   3     1     0     4
  63 = score([{1,3},{2,4},{3,5},{10},{10},{1,2},{1,2},{1,0},{0,0},{1,3}]),

  300 = score([{10},{10},{10},{10},{10},{10},{10},{10},{10},{10},{10},{10}]),

  %             30   30   30   30   29   20   20    30   30   30   
  279 = score([{10},{10},{10},{10},{10},{10},{9,1},{10},{10},{10},{10},{10}]),

  ok.
```

## Day wrapup notes

>  The code I wrote for my bowling score calculator has no error handling, and
is a terrible mess of case statements, but it seems to be working! 

Day 2: January 7 (Monday)
-------------------------------------------

Originally planned to spend a whole extra
day on sequential erlang, but then realized
that concurrency is more interesting to me. 
(unsure to put this point at the head, the summary, or omit it)

## Working ping pong + Refactored ping-pong

Realized `true -> ...` case wasn't working but didn't connect it
to yesterday's notes yet, switch to `if`.

(working example)

```erlang
-module(ping_pong).
-export([start/1, loop/1]).

start(N) -> 
  Ping = spawn(ping_pong, loop, [ping]),
  Pong = spawn(ping_pong, loop, [pong]),

  Ping ! { Pong, N },
  done.

loop(Message) ->
  receive
    {Client, N} ->
      io:format("[~p] ~p Received: ~s ~n", [N, self(), Message]),

      if 
        N > 2   -> Client ! { self(), N - 1 }, loop(Message);
        N =:= 2 -> Client ! { self(), N - 1 };
        true -> void
      end
  end.
```

Then realize that I literally used the same pattern in my bowling example, but
used `case`, and switched back to it:

```erlang
-module(ping_pong).
-export([start/1, loop/1]).

start(N) -> 
  Ping = spawn(ping_pong, loop, [ping]),
  Pong = spawn(ping_pong, loop, [pong]),

  Ping ! { Pong, N },
  done.

loop(Message) ->
  receive
    {Client, N} ->
      io:format("[~p] ~p Received: ~s ~n", [N, self(), Message]),

      case N of
        1 -> done;
        2 -> Client ! { self(), N - 1 }, done;
        _ -> Client ! { self(), N - 1 }, loop(Message)
      end
  end.
```

Here is where I struggled with process termination. Do we explicitly
call exit() when the process is done? Simply let the loop terminate?
Leave the loop running in a refreshed state? Is it a concern to have
processes accumulating in a zombie state of some sort? The exercise indicates
that we should make sure to terminate the processes gracefully, but I'm
unsure what that means in Erlang.

ED NOTE: Did not look this up at the time, but there is some relevant 
discussion here: http://stackoverflow.com/questions/14515480/processes-exiting-normally

Seems that a function simply returning does not terminate a process in erlang.

### Reading notes

Decided to skip Ch 9 on type system and Ch 10 on compiler tools, because neither
were essential for my projects and I was concerned about the limited time I'd
have in the practice week.

Possible points of interest:

* Tuple modules
* Message processing semantics (esp unmatched messages) -- 194
  (contrast this to our actor article, at least my understanding of it
  being roughly equivalent to a work queue. Possible look at Celluloid)
* More confusion around code loading
* Request/response pattern  + RPC style
* Receive timeouts
* Register
* TCO caveat
* Processes are cheap (seems like a broken record on this)
* Boilerplate ??? (maybe not)

Find a way to discuss the above without drawing too much from the book,
maybe pick a few topics and show original examples.

## Refactored bowling

Note the piecewise problem decomposition, and my concerns about being too
golf-ish, brittle. But also not my feeling of how it helps clarity / 
elegance / simplicity, and linerizes the code to eliminate nested conditionals.

```erlang
-module(bowling).
-export([score/1, test/0]).

score([])                          -> 0;
score([{10}|T])                    -> strike(T) + score(T);
score([{A,B}|T]) when 10 =:= A + B -> spare(T) + score(T);
score([{A,B}|T])                   -> A + B + score(T).

strike([])                   -> 0;
strike([{10}])               -> 0;
strike([{10}, {10}|_])       -> 30;
strike([{10}, {Ball2, _}|_]) -> 20 + Ball2;
strike([{Ball1, Ball2}|_])   -> 10 + Ball1 + Ball2.

spare([])                -> 0;
spare([{10}|_])          -> 20;
spare([{NextBall, _}|_]) -> 10 + NextBall.

test() -> 
  9  = score([{7, 2}]), 

  %            4     6     8     9     5     0     3     1     0     4
  40 = score([{1,3},{2,4},{3,5},{5,4},{2,3},{0,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8     S:12  5     0     3     1     0     4
  43 = score([{1,3},{2,4},{3,5},{5,5},{2,3},{0,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8     S:15 5     0     3     1     0     4
  46 = score([{1,3},{2,4},{3,5},{10},{2,3},{0,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8     S:14  S:11  1     3     1     0     4
  52 = score([{1,3},{2,4},{3,5},{5,5},{4,6},{1,0},{1,2},{1,0},{0,0},{1,3}]),

  %            4     6     8    S:21 S:13    3   3     1     0     4
  63 = score([{1,3},{2,4},{3,5},{10},{10},{1,2},{1,2},{1,0},{0,0},{1,3}]),

  300 = score([{10},{10},{10},{10},{10},{10},{10},{10},{10},{10},{10},{10}]),

  %             30   30   30   30   29   20   20    30   30   30   
  279 = score([{10},{10},{10},{10},{10},{10},{9,1},{10},{10},{10},{10},{10}]),

  %             30   30   30   30   29   20   20    29   20   20   
  258 = score([{10},{10},{10},{10},{10},{10},{9,1},{10},{10},{9,1},{10}]),

  %             30   30   30   30   29   20   20    29   20   20   
  257 = score([{10},{10},{10},{10},{10},{10},{9,1},{10},{10},{9,1},{9,1}]),

  ok.
```

(refactoring also applies to list:min, though I didn't know
it at the time)

## Inital very broken dining philosophers

https://github.com/sandal/erlang-practice/commit/df17dddec3588bff7b73417d0290c48beb2cf6bf

(But maybe Chopstick is almost right? **CHECK THIS**)

## Day wrapup notes

Used most of my wrapup time on reading, because I had exercises queued
up for the next day (register race, ring).

Day 3: January 8 (Wednesday) -- cut short
-------------------------------------------

(May want to streamline the discussion for this day because of its lack of consistency with the
other days, but see how it plays out.)

## Register race condition

Note my struggle and lessons learned from this, even though I couldn't solve 
it myself. Note epiphany about Erlang not being totally immune to race
conditions, and my understanding of the problem. Also note request/response
pattern seen again here for sync (point out synthesis used to get from
here to dining philosophers).

```erlang
-module(concurrency).
-export([start/2]).

% Solution is from http://forums.pragprog.com/forums/27/topics/124

start(Atom, Fun) ->
    Registrant = self(),
    spawn(
        fun() ->
            try register(Atom, self()) of
                true ->
                    Registrant ! true,
                    Fun()
            catch
                error:badarg ->
                    Registrant ! false
            end
        end),
    receive
        true  -> true;
        false -> erlang:error(badarg)
    end.
```

( consider cleaning up code )


My understanding after investigation (see journal for more details):

> It's not possible to use `whereis` to verify that a process has been
registered or not, because both processes could pass that test BEFORE their call
to `register` is processed. If one process completes the register process, the
other will fail, but only after `start()` returns, which is not what the
exercise calls for.

> To verify success or failure BEFORE `start()` returns, the spawned processes
communicates back to the process that spawned them about their status. The
parent process uses `receive` to wait for a response, ensuring that failure is
communicated at the time the method returns, not after.

This problem is more general than the race condition, it also hints at how to
write functions that fail sychronously. Unsure whether synchronous failure
is an edge case in Erlang or not (where it would be the default in
single-threaded Ruby)

### Reading notes  (discuss Philo research?)

Possible points of interest:

* Self-calling `Fun`. Why did I want this? For quick spawn examples???
* "Let some other process fix the error" and "Let it crash"
* Corrective vs. defensive programming
* Some extra research into DIning Philoosopher: Chandry/Misra and a waiter.

## Day wrapup notes

Didn't go as planned, mostly just did some work on exercise and a whole lot of
searching around for answers. Also didn't get that far on Dining Philosophers,
except to research strategy. But rather than attempt to make up for lost time,
I just let it go.

Day 4: January 9 (Thursday)
-------------------------------------------

## Process Ring

note dining philosophers synthesis.
(or maybe not, because we didn't go chandry/misra)

note mixed feelings about academic nature of exercise
note usefulness of drawing a picture

```erlang
-module(ring(Probably more).
-export([send/2, loop/1]).

send(N, M) ->
  Head = build(N),
  Head ! { deliver, howdy, N*M }.

start() ->
  spawn(ring, loop, [void]).

connect(From, To) ->
  From ! {observe, To}.

build(N) ->
  First = start(),
  Last  = build(N-1, First),

  connect(Last, First),
  First.

build(0, Current) -> Current;

build(N, Current) ->
  Next = start(),
  connect(Current, Next),
  build(N-1, Next).

loop(Observer) ->
  receive
    {observe, NewObserver} -> 
      io:format("~p is now observing ~p", [self(), NewObserver]),
      loop(NewObserver);
    {deliver, _, 0} ->
      io:format("Done sending messages!~n"),
      loop(Observer);
    {deliver, Message, Count} when Observer =/= void ->
      io:format("[~p], ~p is sending message ~p to ~p~n", 
        [Count, self(), Message, Observer]),
      Observer ! {deliver, Message, Count - 1},
      loop(Observer)
  end.
```

## Reading notes

Points of interest:

* Process linking and monitoring
* Various ways of error signalling
* Linking + monitoring patterns / firewalls

(most time spent on book code samples)


## Dining philosophers


```erlang
-module(philosophers).
-export([dine/0, loop/3]).

dine() ->
  [C1, C2, C3, C4, C5] = [chopstick:start(X) || X <- [1,2,3,4,5]],
  dine(C1, C2, C3, C4, C5).


dine(C1, C2, C3, C4, C5) -> 
  Aristotle    = spawn(philosophers, loop, ["Aristotle",    C1, C2]),
  Popper       = spawn(philosophers, loop, ["Popper",       C2, C3]),
  Epictetus    = spawn(philosophers, loop, ["Epictetus",    C3, C4]),
  Heraclitus   = spawn(philosophers, loop, ["Heraclitus",   C4, C5]),
  Schopenhauer = spawn(philosophers, loop, ["Schopenhauer", C1, C5]),

  Aristotle ! Popper ! Epictetus ! Heraclitus ! Schopenhauer ! think.

loop(Philosopher, LeftChopstick, RightChopstick) ->
  receive
    think -> 
      io:format("~p is thinking.~n", [Philosopher]),
      timer:sleep(1000),

      self() ! eat;
    eat   -> 
      LeftChopstick  ! {take, self()},

      receive
        {cs, FirstChopstick} ->
          io:format("~p picked up chopstick ~p~n", [Philosopher, FirstChopstick]), 

          RightChopstick ! {take, self()},
          receive
            {cs, SecondChopstick} ->
              io:format("~p picked up chopstick ~p~n", [Philosopher, SecondChopstick]),
              io:format("~p is eating.~n", [Philosopher]),
              timer:sleep(1000)
          end
      end,

      LeftChopstick  ! {drop, self()},
      RightChopstick ! {drop, self()},

      io:format("~p is done eating, releases chopsticks ~p and ~p~n",
        [Philosopher, FirstChopstick, SecondChopstick]),

      self() ! think
  end,

  loop(Philosopher, LeftChopstick, RightChopstick).
```

```erlang
-module(chopstick).
-export([start/1, loop/2]).

start(Number) ->
  spawn(chopstick, loop, [Number, nobody]).

loop(Number, Owner) ->
  receive
    {take, Owner} -> loop(Number, Owner);
    {take, NewOwner} when Owner =:= nobody -> 
      NewOwner ! {cs, Number},
      loop(Number, NewOwner);
    {drop, Owner} ->
      loop(Number, nobody)
  end.
```

(note request/response synchronization and similarity to
N-ring problem, and also why I used the naive solution)

Note what I learned about Erlang from this example, even when using the naive
solution.

Never really thought through the many ways of solving this problem.

## wrapup

* Note about syntax errors (journal)
* Thoughts about asynchronous message delivery


Day 5: January 10 (Friday)
-------------------------------------------

### Start with monitor

Note points of interest here, mainly error handling.

```erlang
-module(errors).
-export([my_spawn/3]).

my_spawn(Mod, Func, Args) ->
  Pid = spawn(Mod, Func, Args),
  {T1, _} = statistics(wall_clock),

  spawn(fun() ->
    Ref = monitor(process, Pid),
    receive
      { 'DOWN', Ref, process, Pid, Why } ->
        io:format("~p went down with reason: ~p~n", [Pid, Why]),

        {T2, _} = statistics(wall_clock),

        io:format("~p was alive for ~p seconds~n", [Pid, (T2-T1)/1000])
    end
  end),

  Pid.
```

## Reading notes summarized here in transition
  * Die together workers and parent monitor
  * Keepalive
  * Distributed erlang, session names + cookie based auth
  * KVM uses process dictionary, what about tuple modules? (i guess it'd run
    into register problems and remote RPC issues)
  * Didn't quite get `nl()` to work
  * Recurring problems with stuck processes + testing callbacks
    (move to review instead?)

## Trivial process

(explain tangent)

```erlang
-module(trivial_process).
-export([start/0, loop/1]).

start() -> spawn(?MODULE, loop, [1]).

loop(N) ->
  io:format("Tick ~p.~n", [N]),

  receive
    _ -> error("Boom!")
  after 1000 ->
    loop(N+1)
  end.
```


## Rover


```erlang
-module(world).
-export([start/1, loop/3]).

start(Filename) ->
  spawn(world, loop, [read(Filename), 11, 13]).

loop(MapData, Row, Col) ->
  receive 
    {Caller, MsgID, snapshot} ->
      Caller ! { self(), MsgID, {snapshot, snapshot(MapData, Row, Col)}},
      loop(MapData, Row, Col);
    {Caller, MsgID, move_north} ->
      Caller ! { self(), MsgID, {move_north, Row-1, Col}},
      loop(MapData, Row-1, Col);
    {Caller, MsgID, move_south} ->
      Caller ! { self(), MsgID, {move_south, Row+1, Col}},
      loop(MapData, Row+1, Col);
    {Caller, MsgID, move_east} ->
      Caller ! { self(), MsgID, {move_east, Row, Col+1}},
      loop(MapData, Row, Col+1);
    {Caller, MsgID, move_west} ->
      Caller ! { self(), MsgID, {move_west, Row, Col-1}},
      loop(MapData, Row, Col-1)
  end.


read(Filename) ->
  { ok, MapBinary } = file:read_file(Filename),
  MapText = binary_to_list(MapBinary),
  list_to_tuple(
    [ list_to_tuple(string:tokens(X, " ")) || 
      X <- string:tokens(MapText, "\n")]).

% TODO: add @ sign to indicate current location.
snapshot(Map, Row, Col) ->
  RowWindow = [ element(RowD, Map) || 
                RowD <- lists:seq(Row - 2, Row + 2) ],

  string:join(
    lists:map(fun(RowData) ->
      string:join(
        [element(ColD, RowData) || ColD <- lists:seq(Col - 2, Col + 2)], 
        " ")
      end, 
      RowWindow), 
     "\n") ++ "\n".
```

```erl
-module(radio).
-export([start/1, loop/1]).
-define(TRANSMISSION_DELAY, 5000).

start(Controller) -> spawn(radio, loop, [Controller]).

loop(Controller) ->
  receive
    { transmit, Pid, Message } ->
      erlang:send_after(?TRANSMISSION_DELAY, Pid, {self(), erlang:make_ref(), Message});
    Message ->
      erlang:send_after(?TRANSMISSION_DELAY, Controller, Message)
  end,

  loop(Controller).
```


```erl
-module(controller).
-export([start/0, loop/0]).

start() -> spawn(controller, loop, []).

loop() ->
  receive
    {_, MsgId, {snapshot, MapData} } ->
      io:format("~s~n~n(msg id: ~p)~n", [MapData, MsgId]);
    Any -> io:format("Received message: ~p~n", [Any])
  end,
  loop().
```

Rover (note ugly map parsing code, consider attempting a refactor,
note ease of concurrency stuff)

## Daily wrapup

This project has given me a very visceral lesson in the differences between:

* Reading code
* Running example code
* Trying out coding exercises
* Working on toy projects
* Working on real projects

Learning a language is an N-dimensional activity, it's surprising that we
tend to have a much more simplistic view of what is involved (or at least I do).
So much of a willingness to theorize and discuss that which we have very little
practical experience with.


================================================
FILE: articles/v7/008-oss-quality.md
================================================
> This article was written in collaboration with Eric Hodel
> ([@drbrain](http://twitter.com/drbrain)), a developer from Seattle. 
> Eric is a Ruby core team member, and he also maintains RubyGems 
> and RDoc. 

A big challenge in managing open source projects is that their codebases tend
to decay as they grow. This isn't due to a lack of technically skilled
contributors, but instead is a result of the gradual loss of understandability 
that comes along with any long-term and open-ended project 
that has an distributed team of volunteers supporting it.

Once a project becomes more useful, it naturally attracts a more
diverse group of developers who are interested in adapting the codebase to
meet their own needs. Patches are submitted by contributors who do not fully 
understand a project's implementation, and maintainers merge these patches 
without fully understanding the needs of their contributors. Maintainers 
may also struggle to remember the reasoning behind any of their own code that they 
haven't touched in a while, but they still need to be able to work with it.

As a result of both of these influencing factors, mistaken assumptions tend to 
proliferate as a project grows, and with them come bugs and undefined behaviors. 
When direct knowledge of the codebase becomes limited and unreliable, it's easy to 
let code quality standards slip without fully realizing the potential
for future problems. 

If bad code continues to accumulate in this fashion, improving one part of a 
a project usually means breaking something else in the 
process. Once a maintainer starts spending most of their time fixing bugs, 
it gets hard to move their project forward in meaningful 
ways. This is where open source development stops being fun, and starts feeling 
like a painful chore.

Not all projects need to end up this way, though. As long as project maintainers 
make sure to keep the quality arrow pointing upwards over the long haul, 
any bad code that temporarily accumulates in a project can always be replaced with 
better code whenever things start getting painful. The real challenge is to 
establish healthy maintenance practices that address quality issues 
in a consistent and sustainable way. 

### Developing a process-oriented approach towards quality

In this article, we'll discuss three specific tactics we've used in 
our own projects that can be applied at any stage in the software 
development lifecycle. These are not quick fixes; they are helpful 
habits that drive up understandability and code quality more and more
as you continue to practice them. The good news is that even though 
it might be challenging to keep up with these efforts on a daily basis, 
the recommendations themselves are very simple:

1. Let external changes drive incremental quality improvements
2. Treat all code with inadequate testing as legacy code
3. Expand functionality via well-defined extension points 

We'll now take a look at each of these guidelines individually and walk 
you through some examples of how we've put them into practice in RDoc, 
RubyGems, and Prawn -- three projects that have had their own share of 
quality issues over the years, but continue to serve very diverse 
communities of users and contributors.

### 1) Let external changes drive incremental quality improvements

Although there is often an endless amount of cleanup work that can
be done in mature software projects, there is rarely enough
available development time to invest in these efforts. For programmers 
working on open source in their spare time, it is hard enough
to keep up with new incoming requests, so most preventative maintenance 
work ends up being deferred indefinitely. When cleanup efforts do happen,
they tend to be done in concentrated bursts and then things go back
to business-as-usual from there.

A better approach is to pay down technical debts little by little, not as a
distinct activity but as part of responding to ordinary change requests. There
are only two rules to remember when applying this technique in your daily work:

* Try to avoid making the codebase worse with each new change, or at least
minimize new maintenance costs as much as possible.
* If there is an easy way to improve the code while doing everyday work, 
go ahead and invest a little bit of effort now to make future changes easier. 

The amount of energy spent on meeting these two guidelines should be proportional
to the perceived risks and rewards of the change request itself, but typically
it doesn't take a lot of extra effort. It may mean spending an extra 10 minutes on a
patch that would take an hour to develop, or an extra hour on a patch that would
take a day to prepare. In any case, it should feel like an obviously good
investment that is well worth the cost you are paying for it.

There is a great example in Prawn that illustrates this technique being used,
and if you want to see it in its raw form, you can check out [this pull
request](https://github.com/prawnpdf/prawn/pull/587) from Matt Patterson.

Matt's request was to change the way that Prawn's image loading
feature detected whether it was working with an I/O object or a path to
a file on disk. Initially Prawn assumed that any object responding to `read` 
would be treated as an I/O object, but this was too loose of a test and
caused some subtle failures when working with `Pathname` objects.

The technical details of the change are not important here, so don't worry if
you don't understand them. Instead, just look at the method that would need to
be altered to fix this problem, and ask yourself whether you would feel
comfortable making a change to it:

```ruby
def build_image_object(file)
  file.rewind  if file.respond_to?(:rewind)
  file.binmode if file.respond_to?(:binmode)

  if file.respond_to?(:read)
    image_content = file.read
  else
    raise ArgumentError, "#{file} not found" unless File.file?(file)  
    image_content = File.binread(file)
  end
  
  image_sha1 = Digest::SHA1.hexdigest(image_content)

  if image_registry[image_sha1]
    info = image_registry[image_sha1][:info]
    image_obj = image_registry[image_sha1][:obj]
  else
    info = Prawn.image_handler.find(image_content).new(image_content)

    min_version(info.min_pdf_version) if info.respond_to?(:min_pdf_version)

    image_obj = info.build_pdf_object(self)
    image_registry[image_sha1] = {:obj => image_obj, :info => info}
  end

  [image_obj, info]
end
```

Although this probably isn't the absolute worst code you have ever seen, 
it isn't very easy to read. Because it takes on many responsibilities,
it's hard to even summarize what it is supposed to do! Fortunately for Matt,
the part that he would need to change was only the first few lines of the 
method, which are reasonably easy to group together:

```ruby
def build_image_object(file)
  file.rewind  if file.respond_to?(:rewind)
  file.binmode if file.respond_to?(:binmode)

  if file.respond_to?(:read)
    image_content = file.read
  else
    raise ArgumentError, "#{file} not found" unless File.file?(file)  
    image_content = File.binread(file)
  end

   # ... everything else 
end
```

The quick fix would have been to edit these lines directly, but Matt recognized
the opportunity to isolate a bit of related functionality and make the code a
little bit better in the process of doing so. Pushing these lines of code down
into a helper method and tweaking them slightly resulted in the following
cleanup to the `build_image_object` method:

```ruby
def build_image_object(file)
  io = verify_and_open_image(file)
  image_content = io.read

  # ... everything else
end
```

In the newly created helper method, Matt introduced his desired change, 
which is much easier to understand in isolation than it would have been in the 
original `build_image_object` method definition. In particular, he changed
the duck typing test to look for `rewind` rather than `read`, in the hopes
that it would be a more reliable way to detect I/O-like objects. Everything 
else would be wrapped in a `Pathname` instance:

```ruby
def verify_and_open_image(io_or_path)
  if io_or_path.respond_to?(:rewind)
    io = io_or_path

    io.rewind

    io.binmode if io.respond_to?(:binmode)
    return io
  end

  io_or_path = Pathname.new(io_or_path)
  raise ArgumentError, "#{io_or_path} not found" unless io_or_path.file?
  
  io_or_path.open('rb')
end
```

At this point, he could have submitted a pull request, because the tests were
still green and the new behavior was working as expected. However, the issue
he had set out to fix in the first place wasn't causing Prawn's tests to fail,
and that was a sign that there was some undefined behavior at the root of
this problem. Although Prawn had some tests for reading images referenced by
`Pathname` objects, it only had done its checks at a high level, and did not
verify that the PDF output was being rendered correctly.

A test would be needed at the lower level to verify that the output was no
longer corrupted, but this kind of testing is slightly tedious to do in Prawn.
Noticing this rough spot, Matt created an RSpec matcher to make this kind of
testing easier to do in the future:

```ruby
RSpec::Matchers.define :have_parseable_xobjects do
  match do |actual|
    expect { PDF::Inspector::XObject.analyze(actual.render) }.not_to raise_error
    true
  end
  failure_message_for_should do |actual|
    "expected that #{actual}'s XObjects could be successfully parsed"
  end
end
```

Finally, he provided a few test cases to demonstrate that his patch
fixed the problem he was interested in, and also covered some other 
common use cases as well:

```ruby
context "setting the length of the bytestream" do
  it "should correctly work with images from Pathname objects" do
    info = @pdf.image(Pathname.new(@filename))
    expect(@pdf).to have_parseable_xobjects
  end

  it "should correctly work with images from IO objects" do
    info = @pdf.image(File.open(@filename, 'rb'))
    expect(@pdf).to have_parseable_xobjects
  end

  it "should correctly work with images from IO objects not set to mode rb" do
    info = @pdf.image(File.open(@filename, 'r'))
    expect(@pdf).to have_parseable_xobjects
  end
end
```

When you put all of these changes together, the total value of this patch
is much greater than the somewhat obscure bug it fixed. By addressing
some minor pain points as he worked, Matt also improved Prawn in the
following ways:

* The `build_image_object` method is now more understandable because one 
of its responsibilities has been broken out into its own method.

* The `verify_and_open_image` method allows us to group together all the
basic guard clauses for determining how to read the image data, 
making it easier to see exactly what those rules are.

* The added tests clarify the intended behavior of Prawn's image loading
mechanism.

* The newly added RSpec matcher will help us to do more
PDF-level checks in future tests.

None of these changes required a specific and focused effort of refactoring or redesign,
it just involved a bit of attention to detail and a willingness to make minor
improvements that would pay off for someone else in the future.

As a project maintainer, you cannot expect contributors to put this level of
effort into their patches -- Matt really went above and beyond here. However, 
you can definitely look for these kind of opportunities yourself during review 
time, and either ask the contributor to make some revisions, or make them yourself 
before you merge in new changes. No matter who ends up doing the work, little by
little these kinds of incremental cleanup efforts can turn a rough codebase into
something pleasant to work with.

### 2) Treat all code without adequate testing as legacy code

Historically, we've defined legacy code as code that was written long before our
time, without any consideration for our current needs. However, any untested
code can also be considered legacy code[^1], because it often
has many of the same characteristics that make outdated systems difficult to
work with. Open source projects evolve quickly, and even very clean code
can cause a lot of headaches if its intended behavior is left undefined.

To guard against the negative impacts of legacy code, it helps 
to continuously update your project's test suite so 
that it constantly reflects your current understanding of the problem domain
you are working in. A good starting point is to make sure that your project
has good code coverage and that you keep your builds green in CI.
Once you've done that, the next step is to go beyond the idea of just having
lots of tests and start focusing on making your test suite more capable 
of catching problems before they leak out into released code.

Here are some things to keep in mind when considering the potential 
impact that new changes will have on your project's stability:

* Any behavior change introduced without test
coverage has a good chance of causing a defect or 
accidentally breaking backwards-compatibility in a future release.

* A passing test suite is not proof that a change is well-defined
and defect-free.

* The only reliable way to verify that existing features have 
well-defined behavior and good test coverage is to
review their code manually.

* Contributors often don't understand your project's problem domain 
or its codebase well enough to know how to write good tests for
their changes without some guidance.

These points are not meant to imply that each and every pull request
ought to be gone over with a fine-tooth comb -- they're only meant to 
serve as a reminder that maintaining a high quality test suite is
a harder problem than we often make it out to be. The same ideas
of favoring incremental improvements over heroic efforts that
we discussed earlier also apply here. There is no need to
rush towards a perfect test suite all at once, as long as it improves
on average over time.

We'll now look at a [pull request](https://github.com/rubygems/rubygems/pull/781/files) 
that Brian Fletcher submitted to
RubyGems for a good example of how these ideas can be applied 
in practice.

Brian's request was to add support for Base64 encoded usernames and
passwords in gem request URLs. Because RubyGems already supported
the use of HTTP Basic Auth with unencoded usernames and passwords in
URLs, this was an easy change to make. The desired URL decoding functionality
was already implemented by `Gem::UriFormatter`, so the
initial commit for this pull request involved changing just a single line 
of code:

```diff
     request = @request_class.new @uri.request_uri
 
     unless @uri.nil? || @uri.user.nil? || @uri.user.empty? then
-      request.basic_auth @uri.user, @uri.password
+      request.basic_auth Gem::UriFormatter.new(@uri.user).unescape,
+                         Gem::UriFormatter.new(@uri.password).unescape
     end
 
     request.add_field 'User-Agent', @user_agent
```

On the surface, this looks like a fairly safe change to make. Because it only
adds support for a new edge case, it should preserve the original behavior
for URLs that did not need to be unescaped. No new test failures were introduced
by this patch, and a quick look at the test suite shows that `Gem::UriFormatter`
has some tests covering its behavior.

As far as changes go, this one is definitely low risk. But if you dig in a
little bit deeper, you can find a few things to worry about: 

* Even though only a single line of code was changed, that line of code
was at the beginning of a method that is almost 90 lines long. This isn't
necessarily a problem, but it should at least be a warning sign to slow
down and take a closer look at things.

* A quick look at the test suite reveals that although there were tests
for the `unescape` method provided by `GemUri::Formatter`, there were no tests 
for the use of Basic Auth in gem request URLs, which means the behavior
this patch was modifying was not formally defined. Because of this, we can't
be sure that a subtle incompatibility wasn't introduced by this patch, 
and we wouldn't know if one was introduced later due to a change to
`GemUri::Formatter`, either.

* The new behavior introduced by this patch also wasn't verified, which
means that it could have possibly been accidentally removed in a future 
refactoring or feature patch. Another contributor could easily assume 
that URL decoding was incidental rather than intentional without
tests that indicated otherwise.

These are the kind of problems that a detailed review can discover 
which are often invisible at the surface level. However, a much more
efficient maintenance policy is to simply assume one or more of the 
above problems exist whenever a change is introduced without tests, 
and then either add tests yourself or ask contributors to add them
before merging. 

In this case, Eric asked Brian to add a test after giving him some guidance 
on how to go about implementing it. For reference, this was his exact request:

> Can you add a test for this to test/rubygems/test_gem_request.rb?
>
> You should be able to examine the request object through the block #fetch yields to.

In response, Brian dug in and noticed that the base case of
using HTTP Basic Auth wasn't covered by the tests. So rather than simply 
adding a test for the new behavior he added, he went ahead and wrote tests 
for both cases:

```ruby
class TestGemRequest < Gem::TestCase
 def test_fetch_basic_auth
    uri = URI.parse "https://user:pass@example.rubygems/specs." +
                     Gem.marshal_version
    @request = Gem::Request.new(uri, Net::HTTP::Get, nil, nil)
    conn = util_stub_connection_for :body => :junk, :code => 200

    response = @request.fetch

    auth_header = conn.payload['Authorization']

    assert_equal "Basic #{Base64.encode64('user:pass')}".strip, auth_header
  end

  def test_fetch_basic_auth_encoded
    uri = URI.parse "https://user:%7BDEScede%7Dpass@example.rubygems/specs." +
                    Gem.marshal_version
    @request = Gem::Request.new(uri, Net::HTTP::Get, nil, nil)
    conn = util_stub_connection_for :body => :junk, :code => 200

    response = @request.fetch

    auth_header = conn.payload['Authorization']

    assert_equal "Basic #{Base64.encode64('user:{DEScede}pass')}".strip, 
                 auth_header
  end
end
```

It is hard to overstate the difference between a patch with these tests
added to it and one without tests. The original commit introduced a new
dependency and more complex logic into a feature that lacked formal definition
of its behavior. But as soon as these tests are added to the change request,
RubyGems gains support for a new special condition in gem request URLs while 
tightening up the definition of the original behavior. The tests
also serve to protect both conditions from breaking without being noticed 
in the future.

Taken individually, the risks of accepting untested patches are
small enough that they don't seem important enough to worry about when you are pressed
for time. But in the aggregate, the effects of untested code will pile up until your
codebase really does become unworkable legacy code. For that reason, establishing
good habits about reviewing and shoring up tests on each new change can make a
huge difference in long-term maintainability.

### 3) Expand functionality via well-defined extension points 

Most open source projects can benefit from having two clearly defined interfaces:
one for end-users, and one for developers who want to extend its functionality.
This point may seem tangentially related to code quality and maintainability,
but a well-defined extension API can greatly increase a project's stability.

When its possible to add new functionality to a project without patching its
codebase directly, it becomes easier to separate essential features that most
people will need from features that are only relevant in certain rare 
contexts. The ability to support external add-ons in a transparent way also
makes it possible to try experiments outside of your main codebase and then
only merge in features that prove to be both stable and widely used.

Even within the scope of a single codebase, explicitly defining a layer one
level beneath the surface forces you to think about what the common points
of interaction are between your project's features. It also makes testing
easier, because feature implementations tend to get slimmed down as the
extension API becomes more capable. Each part can then be tested in 
isolation without having to think about large amorphous blobs
of internal dependencies.

It may be hard to figure out how to create an extension API when you first start
working on a project, because at that time you probably don't know much
about the ways that people will need to extend its core behavior, and you may
not even have a good sense of what its core feature set should be! This is
completely acceptable, and it makes sense to focus exclusively on your high-level 
interface at first. But as your project matures, you can use the following guidelines to
incrementally bring a suitable extension API into existence:

* With each new feature request, ask yourself whether it could be implemented
as an external add-on without patching your project's codebase. If not, figure 
out what extension points would make it possible to do so.

* For any of your features that have become difficult to work with or overly
complex, think about what extension points would need to be added in
order to extract those features into external add-ons.

* For any essential features that have clearly related functionality, 
figure out what it would take to re-implement them on top of well defined 
extension points rather than relying on lots of private internal code.

At first, you may start by carrying out these design considerations as simple
thought experiments that will indirectly influence the way you implement 
things. Later, you can take them more seriously and seek to support
new functionality via external add-ons rather than merging new features 
unless there is a very good reason to do otherwise. Every project needs to
discover the right balance for itself, but the basic idea is that the value of a
clear extension API increases the longer a project is in active use.

Because RDoc has been around for a very long time and has a fairly decent extension 
API, it is a good library to look at for examples of what this technique has
to offer. Without asking Eric for help, I looked into what it would take to autolink 
Github issues, commits, and  version tags in RDoc output. This isn't something I had 
a practical use for, but I figured it  would be a decent way to test how easily I 
could extend the RDoc parser.

I started with the following text as my input data: 

```
Please see #125, #127, and #159

Also see @bed324 and v0.14.0
```

My goal was to produce the following HTML output after telling RDoc what repository
that these issues, commits, and tags referred to:

```
<p>Please see <a href="https://github.com/prawnpdf/prawn/issues/125">#125</a>,
<a href="https://github.com/prawnpdf/prawn/issues/127">#127</a>, and 
<a href="https://github.com/prawnpdf/prawn/issues/159">#159</a></p>

<p>Also see <a
href="https://github.com/prawnpdf/prawn/commit/bed324">@bed324</a> and 
<a href="https://github.com/prawnpdf/prawn/tree/0.14.0">v0.14.0</a></p>
```

Rendered, the resulting HTML would look like this:

> Please see <a href="https://github.com/prawnpdf/prawn/issues/125">#125</a>,
> <a href="https://github.com/prawnpdf/prawn/issues/127">#127</a>, and 
> <a href="https://github.com/prawnpdf/prawn/issues/159">#159</a></p>
>
> Also see <a href="https://github.com/prawnpdf/prawn/commit/bed324">@bed324</a> and 
> <a href="https://github.com/prawnpdf/prawn/tree/0.14.0">v0.14.0</a></p>

I wasn't concerned about styling or how to fit this new functionality into a
full-scale RDoc run. I just wanted to see if I could take my little
snippet of sample text and replace the GitHub references with their 
relevant links. My experiment was focused solely on finding an
suitable entry point into the system that supported these 
kinds of extensions.

After about 20 minutes of research and tinkering, I was able 
to produce the following example:

```ruby
require 'rdoc'

REPO_URL = "https://github.com/prawnpdf/prawn"

class GithubLinkedHtml < RDoc::Markup::ToHtml
  def handle_special_ISSUE(special)
    %{<a href="#{REPO_URL}/issues/#{special.text[1..-1]}">#{special.text}</a>}
  end

  def handle_special_COMMIT(special)
    %{<a href="#{REPO_URL}/commit/#{special.text[1..-1]}">#{special.text}</a>}
  end

  def handle_special_VERSION(special)
    tag = special.text[1..-1]

    %{<a href="#{REPO_URL}/tree/#{tag}">#{special.text}</a>}
  end
end

markup = RDoc::Markup.new

markup.add_special(/\s*(\#\d+)/, :ISSUE)
markup.add_special(/\s*(@\h+)/,  :COMMIT)
markup.add_special(/\s*(v\d+\.\d+\.\d+)/, :VERSION)

wh = GithubLinkedHtml.new(RDoc::Options.new, markup)

puts "<body>#{wh.convert ARGF.read}</body>"
```

Once I figured out the right APIs to use, this became an easy problem to solve.
It was clear from the way things were laid out that this sort of use case had 
already been considered, and a source dive revealed that RDoc also uses these
extension points internally to support its own behavior. The only challenge I ran
into was that these extension points were not especially well documented, which
is unfortunately a more common problem than it ought to be with open 
source projects. 

It is often the case that extension points are built initially to support 
internal needs rather than external use cases, and so they often lag behind 
surface-level features in learnability and third-party usability. This is
certainly a solvable problem, and is worth considering when working on
your own projects. But even without documentation, explicit and stable extension
points can be a hugely powerful tool for making a project more maintainable.

### Reflections

As you've seen from these examples, establishing high quality standards for open 
source projects is a matter of practicality, not pride. Projects that are made up 
of code that is easy to understand, easy to test, easy to change, and easy to 
maintain are far more likely to be sustainable over the long haul than projects 
that are allowed to decay internally as they grow.

The techniques we've discussed in this article are ones that will
pay off even if you just apply them some of the time, but the more you use them,
the more you'll get in return. The nice thing about these practices is that they
are quite robust -- they can be applied in early stage experimental software as
well as in projects that have been used in production for years.

The hard part of applying these ideas is not in remembering them when things are
painful, but instead in keeping up with them when things are going well with
your project. The more contributions you receive, the more important these
strategies will become, but it is also hard to keep up with them because they do
slow down the maintenance process a little bit. Whenever you feel that pressure,
remember that you are looking out for the future of your project by focusing on
quality, and then do what you can to educate others so that they understand why
these issues matter.

Every project is different, and you may find that there are other ways to keep a
high quality standard without following the guidelines we've discussed in this
article. If you have some ideas to share, please let us know!

[^1]: The definition of legacy code as code without tests was popularized in 2004 by Michael Feathers, author of the extremely useful [Working Effectively with Legacy Code](http://www.amazon.com/Working-Effectively-Legacy-Michael-Feathers/dp/0131177052) book.


================================================
FILE: articles/v7/009-course1.md
================================================
This self-guided course will help you learn how to work with the low level 
tools that you'd usually rely on libraries and frameworks to provide. 
Each of its four parts will give you a feel for a different kind 
of I/O programming and text processing work:

* Standard I/O streams and the filesystem
* Encoding and decoding binary files
* Parsing text-based file formats
* Socket programming and network I/O

In each part of the course, you'll start by carefully reading a Practicing Ruby 
article that explores one of the topics listed above. You will then work through 
a set of review questions to test your grasp of the material. Finally you'll 
apply the concepts to realistic problems by working on a set of project-based
exercises.

Once you've completed the entire study guide, you'll know how to do all of the
following things:

* Build command line applications in Ruby that follow the Unix philosophy and
work similarly to other console-based applications you use day to day.

* Encode and decode binary files at the level of bits and bytes, and understand
how primitive data structures are represented in low-level storage formats.

* Work with streams in a memory efficient way, whether they're coming from files, 
standard I/O, or the internet.

* Work with the same parser and compiler technology that is used by serious
text processing libraries and programming languages.

* Understand the basics behind TCP-level socket programming, and how to build
simple client and server software.

We often take these concepts for granted because our libraries and frameworks take
care of them for us. But this self-guided tour of Ruby's basement will help you
appreciate the many low-level tools and techniques we have 
available for solving these problems.

To begin your journey, fork the [course git repository](https://github.com/elm-city-craftworks/course-001) 
and then follow the instructions in its [README](https://github.com/elm-city-craftworks/course-001/blob/master/README.md). 
Work at your own pace, and don't hesitate to ask for help when you need it. You
can submit issues in our tracker for general questions, and pull requests when
you'd like a review of your work. Good luck, and happy hacking!


================================================
FILE: articles/v7/010-information.md
================================================
Suppose that you want catch up with your friends 
Alice, Bob, and Carol. To do this, you might log into your favorite
chat service, join a group chat room, and then type in some
sort of friendly greeting and hit the enter key. Moments later, your friends would
see your message appear on their screens, and soon after that
they would probably send you some sort of response. As long as
their reply was somewhat intelligible, you could be reasonably 
certain that your message was successfully communicated, without
giving much thought to the underlying delivery mechanism.

Just beneath the surface of this everyday activity, we find a world 
of precise rules and constraints governing our 
communications. In the world of chat clients and servers, the 
meaning of your message does not matter, but its structure is 
of critical importance. Protocols define the format for messages 
to be encoded in, and even small variations will result 
in delivery failures. 

Even though much of this internal complexity is hidden by user interfaces,
message format requirements are not a purely technical
concern -- they can also directly affect human behavior. On Twitter, 
a message needs to be expressed in 140 characters or less, and on 
IRC the limit is only a few hundred characters. This single
constraint makes Twitter and IRC fundamentally different from
email and web forums, so it's hard to overstate the impact
that constraints can have on a communicaitons medium.

In addition to intentional restrictions on message structure,
there are always going to be incidental technical limitations
that need to be dealt with -- the kinds of quirks that arise
from having too much or too little expressiveness[^1] in a given
message format. These unexpected obstacles are among the most 
interesting problems in information exchange, because they are 
not an essential part of the job to be done but rather an
emergent property of the way we've decided to do the job. 

As programmers, we're constantly working to bridge the gap
between people and the machines that serve them. This article
explores the boundary lines between those two disjoint worlds, 
and the complicated decisions that need to be made 
in order to cross the invisible chasm that lies between
computational structures and human meaning.

## The medium is the message

To see the impact a communication medium can have on its messages,
let's work through a practical example. The line of text below is
representative of what an IRC-based chat message look like when it 
get sent over a TCP socket:

```
PRIVMSG #practicing-ruby-testing :Seasons greetings to you all!\r\n
```

Even if you've never used IRC before or looked into its implementation
details, you can extract a great deal of meaning from this single line 
of text. The structure is very simple, so it's fairly obvious that
`PRIVMSG` represents a command, `#practicing-ruby-testing` represents
a channel, and that the message to be delivered is 
`"Seasons greetings to you all!"`. If I asked you to parse this
text to produce the following array, you probably would have
no trouble doing so without any further instruction:

```ruby
["PRIVMSG", "#practicing-ruby-testing", "Seasons greetings to you all!"]
```

But if this were a real project and not just a thought experiment,
you might start to wonder more about the nuances of the protocol. Here
are a few questions that might come up after a few minutes of
careful thought:

* What is the significance of the `:` character? Does it always signify 
the start of the message body, or does it mean something else?

* Why does the message end in `\r\n`? Can the message body contain newlines,
and if so, should they be represented as `\n` or `\r\n`, or something
else entirely?

* Will messages always take the form `"PRIVMSG #channelname :Message Body\r\n"`, 
or are their cases where additional parameters will be used?

* Can channel names include spaces? How about `:` characters?

Try as we might, no amount of analyzing this single example will answer 
these questions for us. That leads us to a very important point: 
Understanding  the *meaning* of a message doesn't necessarily mean that 
we know how to process the information contained within it.

## The meaning of a message depends on its level of abstraction

At first glance, the text-based IRC protocol made it 
easy for us to identify the structure and meaning of the various
parts of a chat message. But when we thought a little more about what 
it would take to actually implement the protocol, we quickly ran 
into several questions about how to construct well-formed messages.

A lot of the questions we came up with had to do with basic syntax
rules, which is only natural when exploring an unfamiliar information
format. For example, we can guess that the `:` symbol is a special character 
in the following text, but we can't reliably guess its meaning without 
reading the formal specification for the IRC protocol:

```
PRIVMSG #practicing-ruby-testing :Seasons greetings to you all!\r\n
```

To see the effect of syntax on our interpretation of information
formats, consider what happens when we shift the representation 
of a chat message into a generic structure that
we are already familiar with, such as a Ruby array: 

```ruby
["PRIVMSG", "#practicing-ruby-testing", "Seasons greetings to you all!"]
```

Looking at this information, we still have no idea whether it 
constitutes a well-formed message to be processed by 
our hypothetical IRC-like chat system. But because we know Ruby's 
syntax, we understand what is being communicated here at
a primitive level.

Before when we looked at the `PRIVMSG` command expressed in
the format specified by the IRC protocol, we weren't able to
reliably determine the rules for breaking the message up
into its parts by looking at a single example. Because
we didn't already have its syntax memorized, we wouldn't even
be able to reliably parse IRC commands, let alone process them.
But as Ruby programmers, we know what array and string literals
look like, and so we know how to map their syntax to the concepts
behind them.

The mundane observation to be made here is that it's easier 
to understand a format you're familiar with than it is to 
interpret one you've never seen before. A far more interesting
point to discover is that these two examples have fundamental
differences in meaning, even if they can be interpreted in
a way that makes them equivalent to one another.

Despite their superficial similarities, the two examples
we've looked at operate at completely different
levels of abstraction. The IRC-based example directly 
encodes the concept of a *chat message*, whereas 
our Ruby example encodes the concept of an *array of strings*. 
In that sense, the former is a direct representation of a 
domain-specific concept, and the latter is a indirect 
representation built up from general-purpose data structures.
Both can express the concept a chat message, but they're not
cut from the same cloth.

Let's investigate why this difference
in structure matters. Consider what might happen if we attempted
to allow whitespace in chat channel names, i.e. 
`#practicing ruby testing` instead of `#practicing-ruby-testing`.
By directly substituting this new channel name into our `PRIVMSG`
command example, we get the text shown below:

```
PRIVMSG #practicing ruby testing :Seasons greetings to you all!\r\n
```

Here we run into a syntactic hiccup: If we allow for channel
names to include whitespace, we need to come up with more complex
rules for splitting up the message into its different parts. But
if we decide this is an ill-formed string, then we need to come
up with a constraint that says that the channel parameter
cannot include spaces in it. Either way, we need to come up
with a formal rule that will be applied at parse time,
before processing even begins.

Now consider what happens when we use Ruby syntax instead:

```ruby
["PRIVMSG", "#practicing ruby testing", "Seasons greetings to you all!"]
```

This is without question a well-formed Ruby array, and it will
be successfully parsed and turned into an internal data structure.
By definition, Ruby string literals allow whitespace in them, 
and there's no getting around that without writing our own 
custom parser. So while the IRC example *must* consider the meaning
of whitespace in channel names during the parsing phase, our
Ruby example *cannot*. Any additional constraints placed on the 
format of channel names would need to be done via logical 
validations rather than syntactic rules.

The key insight here is that the concepts we're expressing
when we encode something in one syntax or another have meaning
beyond their raw data contents. In the IRC protocol
a channel is a defined concept at the symbolic level, with a 
specific meaning to it. When we encode a channel name 
as a Ruby string, we can only approximate the concept by starting with
a more general structure and then applying logical rules to
it to make it a more faithful representation of a concept
it cannot directly express. This is not unlike translating
a word from one spoken language to another which cannot
express the same exact concept using a single word.

## Every expressive syntax has at least a few corner cases

Consider once more our fascinating Ruby array:

```ruby
["PRIVMSG", "#practicing-ruby-testing", "Seasons greetings to you all!"]
```

We've seen that because its structure is highly generic, its
encoding rules are very permissive. Nearly any sequence of
printable characters can be expressed within a Ruby string literal,
and so there isn't much ambiguity in expression of ordinary strings.

Despite its general-purpose nature, there are edge cases in Ruby's
string literal syntax that could lead to ambiguous or incomprehensible messages. 
For example, consider strings which have `"` characters within them:

```
"My name is: "Gregory"\n"
```

The above will generate a syntax error in Ruby, becasuse it ends up
getting parsed as the string `"My name is: "`, followed immediately
by the constant `Gregory`, followed by the string `"\n"`. Ruby
understandably has no way of interpreting that nonsense, so
the parser will fail.

If we were only concerned with parsing string literals, we could 
find a way to resolve these ambiguities by adding some special 
parsing rules, but Ruby has a much more complex grammar across
its entire featureset. For that reason, it expects you to be
a bit more explicit when dealing with edge cases like this one.
To get our string to parse, we'd need to do something like this:

```
"My name is: \"Gregory\"\n"
```

By writing `\"` instead of `"`, we tell the parser
to treat the quote character as just another character in the string
rather than a symbolic *end-of-string* marker. The `\` acts
as an escape character, which is useful for resolving these sorts
of ambiguities. The cost of course is that `\` itself
becomes a potential source of ambiguity, so you end up having to write
`\\` instead of `\` to express backslashes in Ruby
string literals.

Edge cases of this sort arise in any expressive text-based format.
They are often easy to resolve by adding a few more rules, but in many
cases the addition of new processing rules add an even more subtle layer
of corner cases to consider (as we've seen w. the `\` character).
Resolving minor ambiguities comes naturally to humans because we can
guess at the meaning of a message, but cold-hearted computers
can only follow the explicit rules we've given them.

## Can we free ourselves from the limitations of syntax?

One solution to the syntactic ambiguity problem is to represent information in
a way that is convenient for computers, rather than optimizing for
human readability. For example, here's the same array of strings
represented as a raw sequence of bytes in [MessagePack format]:

```
93 a7 50 52 49 56 4d 53 47 b8 23 70 72 61 63 74 69 63 69 6e 67 2d 72 75 62 
79 2d 74 65 73 74 69 6e 67 bd 53 65 61 73 6f 6e 73 20 67 72 65 65 74 69 6e 
67 73 20 74 6f 20 79 6f 75 20 61 6c 6c 21
```

At first, this looks like a huge step backwards, because it smashes our
ability to intuitively extract meaning from the message by simply
reading its contents. But when we discover that the vast majority of
these bytes are just encoded character data, things get a little
more comprehensible:

```ruby
"\x93\xA7PRIVMSG\xB8#practicing-ruby-testing\xBDSeasons greetings to you all!"
```

Knowing that most of the message is the same text we've seen in the other
examples, we only need to figure out what the few extra bytes of information
represent:

![](http://i.imgur.com/YAh5olr.png)

Like all binary formats, MessagePack is optimized for ease of processing
rather than human readability. Instead using text-based symbols to describe 
the structure of data, MessagePack uses an entirely numeric encoding format.

By switching away from brackets, commas, and quotation marks to arbitrary
values like `93`, `A7`, `B8`, and `BD`, we immediately lose the ability to
visually distinguish between the different structural elements of the 
message. This makes it harder to simply look at a message and know whether
or not it is well-formed, and also makes it harder to notice the connections
between the symbols and their meaning while reading an encoded message.

If you squint really hard at the yellow boxes in the above diagram, you might
guess that `93` describes the entire array, and that `A7`, `B8`, and `BD`
all describe the strings that follow them. But `A7`, `B8`, and `BD` need to
be expressing more than just the concept of a *string*, otherwise there
would be no need to use three different values. You might be able to
discover the underlying rule by studying the example for a while, but
it doesn't just jump out at you the way a pair of opening and closing
brackets might.

To avoid leaving you in suspense, here's the key concept: MessagePack
attempts to represent seralized data structures using as few bytes 
as possible, while making processing as fast as possible. To do this,
MessagePack uses type headers that tell you exactly what type of
data is encoded, and exactly how much space it takes up in 
the message. For small chunks of data, it conveys both of these
pieces of information using a single byte!

Take for example the first byte in the message, which has the
hexadecimal value of `93`. MessagePack maps the values `90-9F`
to the concept of *arrays with up to 15 elements*. This
means that an array with zero elements would have the type code 
of `90` and an array with 15 elements would have the type code
of `9F`. Following the same logic, we can see that `93` represents 
an array with 3 elements.

For small strings, a similar encoding process is used. Values in 
the range of `A0-BF` correspond to *strings with up to 31 bytes of data*.
All three of our strings are in this range, so to compute
their size, we just need to subtract the bottom of the range
from each of them:

```ruby
# note that results are in decimal, not hexadecimal
# String sizes are also computed explicitly for comparison

>> 0xA7-0xA0
=> 7
>> "PRIVMSG".size
=> 7

>> 0xB8-0xA0
=> 24
>> "#practicing-ruby-testing".size
=> 24

>> 0xBD-0xA0
=> 29
>> "Seasons greetings to you all!".size
=> 29
```

Piecing this all together, we can now see the orderly structure
that was previously obfuscated by the compact nature of the
MessagePack format:

![](http://i.imgur.com/H9lOSex.png)

Although this appears to be superficially similar to the structure
of our Ruby array example, there are significant differences that
become apparent when attempting to process the MessagePack data:

* In a text-based format you need to look ahead to find closing
brackets to match opening brackets, to organize quotation marks
into pairs, etc. In MessagePack format, explicit sizes for each
object are given so you know exactly where its data is stored
in the bytestream.

* Because we don't need to analyze the contents of the message
to determine how to break it up into chunks, we don't need
to worry about ambiguous interpretation of symbols in the data.
This avoids the need for introducing escape sequences for the
sole purpose of making parsing easier.

* The explicit separation of metadata from the contents of the
message makes it possible to read part of the message without
analyzing the entire bytestream. We just need to extract all
the relevant type and size information, and then from there
it is easy to compute offsets and read just the data we need.

The underlying theme here is that by compressing all of the
structural meaning of the message into simple numerical values,
we convert the whole problem of extracting the message into
a series of trivial computations: read a few bytes to determine
the type information and size of the encoded data, then
read some content and decode it based on the specified type,
then rinse and repeat.

## Separating structure from meaning via abstract types

Even though representing our message in a binary format allowed
us to make information extraction more precise, 
the data type we used still corresponds to concepts that don't exactly
fit the intended meaning of our message.

One possible way to solve this conceptual mapping problem is to completely 
decouple structure from meaning in our message format. To do that,
we could utilize MessagePack's application-specific type mechanism;
resulting in a message similar to what you see below:

![](http://i.imgur.com/s3Rjgzz.png)

The `C7` type code indicates an abstract type, and is followed
by two additional bytes: the first provides an arbitrary type
id (between 0-127), and the second specifies how many bytes
of data to read in that format. After applying these rules,
we end up with the following structure:

![](http://i.imgur.com/AubaxCk.png)

The contents of each object in the array is the same as it always
has been, but now the types have changed. Instead of an
array composed of three strings, we now have an array that
consists of elements that each have their own type.

Although I've illustrated the contents of each object as text-based
strings for the sake of readability,
the MessagePack format does not assume that the data associated
with abstract types will be text-based. The decision of
how to process this data is left up to the decoder.

Without getting into too many details, let's consider how abstract
data types might be handled in a real Ruby program[^3] that processed
MessagePack-based messages. You'd need to make an explicit mapping
between type identifiers and the handlers for each type, perhaps
using an API similar to what you see below:

```ruby
data_types = { 1 => CommandName, 2 => Parameter, 3 => MessageBody }

command = MessagePackDecoder.unpack(raw_bytes, data_types)
#  [ CommandName <"PRIVMSG">, 
#    Parameter   <"#practicing-ruby-testing">, 
#    MessageBody <"Season greetings to you all!"> ]
```

Each handler would be responsible for transforming raw byte arrays
into meaningful data objects. For example, the following class might
be used to convert message parameters (e.g. the channel name) into
a text-based representation:

```ruby
class Parameter
  def initialize(byte_array)
    @text = byte_array.pack("C*")

    raise ArgumentError if @text.include?(" ")
  end

  attr_reader :text
end
```

The key thing to note about the above code sample is that
the `Parameter` handler does not simply convert the raw binary into
a string, it also applies a validation to ensure that the
string contains no space characters. This is a bit of a
contrived example, but it's meant to illustrate the ability
of custom type handlers to apply their own data integrity
constraints.

Earlier we had drawn a line in the sand between the 
array-of-strings representation and the IRC message format
because the former was forced to allow spaces in strings
until after the parsing phase, and the latter was forced
to make a decision about whether to allow them or not
before parsing could be completed at all. The use
of abstract types removes this limitation, allowing us to choose when and where to
apply our validations, if we apply them at all.

Another dividing wall that abstract types seem to blur for
us is the question of what the raw contents of our message
actually represent. Using our own application-specific type
definitions make it so that we never need to consider the
contents of our messages to be strings, except as an
internal implementation detail. However, we rely
absolutely on our decoder to convert data that has been
tagged with these arbitrary type identifiers
into something that matches the underlying meaning of 
the message. In introducing abstract types, we have 
somehow managed to make our information format more precise 
and more opaque at the same time.

## Combining human intuition with computational rigor 

As we explored the MessagePack format, we saw that by coming up with very
precise rules for processing an input stream, we can interpet messages by
running a series of simple and unambiguous computations. But in the
process of making things easier for the computer, we complicated
things for humans. Try as we might, we aren't very good at
rapidly extracting meaning from numeric sequences like
`93`, `C7 01 07`, `C7 02 18`, and `C7 03 1D`.

So now we've come full circle in our explorations, realizing that we really do
want to express ourselves using something like the text-based IRC message 
format. Let's look at it one last time to reflect on its strengths
and weaknesses:

```
PRIVMSG #practicing-ruby-testing :Seasons greetings to you all!\r\n
```

The main feature of representing our message this way is that because we're
familiar with the concept of *commands* as programmers, it is easy to see
the structure of the message without even worrying about its exact syntax 
rules: we know intuitively that `PRIVMSG` is the command being sent,
and that `#practicing-ruby-testing` and `Seasons greetings to you all!`
are its parameters. From here, it's easy to extract the underlying
meaning of the message, which is: "Send the message 'Seasons greetings to you
all!' to the #practicing-ruby-testing channel".

The drawback is that we're hazy on the details: we can't simply guess the rules
about whitespace in parameters, and we don't know exactly how to interpret 
the `:` character or the `\r\n` at the end of the message. Because a correct 
implementation of the IRC protocol will need to consider
various edge cases, attempting to precisely describe the message format
verbally is challenging. That said, we could certainly give
it a try, and see what happens...

* Messages consist of a valid IRC command and its parameters
(if any), followed by `\r\n`.

* Commands are either made up solely of letters, or are
represented as a three digit number.

* All parameters are separated by a single space character.

* Parameters may not contain `\r\n` or the null character (`\0`).

* All parameters except for the last parameter must not contain
spaces and must not start with a `:` character.

* If the last parameter contains spaces or starts with a `:`
character, it must be separated from the rest of the
parameters by a `:` character, unless there are exactly
15 parameters in the message. 

* When all 15 parameters are present, then the separating `:` 
character can be omitted, even if the final parameter
includes spaces.

This ruleset isn't even a complete specification of the message format, 
but it should be enough to show you how specifications written in
prose can quickly devolve into the kind of writing you might expect 
from a tax attorney. Because spoken language is inherently fuzzy and 
subjective in nature, it makes it hard to be both precise and 
understandable at the same time.

To get around these communication barriers, computer scientists
have come up with *metalanguages* to describe the syntactic rules
of protocols and formats. By using precise notation with well-defined 
rules, it is possible to describe a grammar in a way that is both
human readable and computationally unambiguous.

When we look at the real specification for the IRC message format,
we see one of these metalanguages in use. Below
you'll see a nearly complete specification[^2] for the general form
of IRC messages expressed in [Augmented Backus–Naur Form][ABNF]:

```
message    =  command [ params ] crlf
command    =  1*letter / 3digit
params     =  *14( SPACE middle ) [ SPACE ":" trailing ]
           =/ 14( SPACE middle ) [ SPACE [ ":" ] trailing ]

nospcrlfcl =  %x01-09 / %x0B-0C / %x0E-1F / %x21-39 / %x3B-FF
                ; any octet except NUL, CR, LF, " " and ":"

middle     =  nospcrlfcl *( ":" / nospcrlfcl )
trailing   =  *( ":" / " " / nospcrlfcl )

SPACE      =  %x20        ; space character
crlf       =  %x0D %x0A   ; "carriage return" "linefeed"
letter     =  %x41-5A / %x61-7A       ; A-Z / a-z
digit      =  %x30-39                 ; 0-9
```

If you aren't used to reading formal grammar notations, this example may appear
to be a bit opaque at first glance. But if you go back and look at the
rules we listed out in prose above, you'll find that all of them are expressed
here in a way that leaves far less to the imagination. Each rule tells us
exactly what should be read from the input stream, and in what order.

Representing syntactic rules this way allows us to clearly understand
their intended meaning, but that's not the only reason for the formality. 
BNF-based grammar notations express syntactic rules so precisely that we can 
use them not just as a specification for how to build a parser
by hand, but as input data for a code generator that can build
a highly optimized parser for us. This not only saves development effort,
it also reduces the likelihood that some obscure edge case will be
lost in translation when converting grammar rules into raw
processing code.

To demonstrate this technique in use, I converted the
ABNF representation of the IRC message format into a grammar that is 
readable by the [Citrus parser generator][]. Apart from a few lines of 
embedded Ruby code used to transform the input data, the following code look 
conceptually similar to what you saw above:

```
grammar IRC
  rule message
    (command params? endline) {
      { :command => capture(:command).value,
        :params  => capture(:params).value }
    }
  end

  rule command
    letters | three_digit_code 
  end

  rule params
    ( ((space middle)14*14 (space ":"? trailing)?) |
      ((space middle)*14 (space ":" trailing)?) ) {
      captures.fetch(:middle, []) + captures.fetch(:trailing, [])
    }
  end

  rule middle
    non_special (non_special | ":")*
  end

  rule trailing
    (non_special | space | ":")+
  end

  rule letters
    [a-zA-Z]+
  end

  rule three_digit_code
    /\d{3}/ { to_str.to_i }
  end

  rule non_special
    [^\0:\r\n ]
  end

  rule space
    " "
  end

  rule endline
    "\r\n"
  end
end
```

Loading this grammar into Citrus, we end up with a parser that can correctly
extract the commands and paramaters from our original `PRIVMSG` example:

```ruby
require 'citrus'
Citrus.load('irc')

msg = "PRIVMSG #practicing-ruby-testing :Seasons greetings to you all!\r\n"

data = IRC.parse(msg).value

p data[:command] 
#=> "PRIVMSG"

p data[:params]
#=> ["#practicing-ruby-testing", "Seasons greetings to you all!"]
```

In taking this approach, we're forced to accept certain constraints
(like a set of complicated rules about where a `:` character can appear), but
we avoid turning our entire message format into meaningless streams of numbers
like `93` and `C7 01 08`. Even if there is a bit more magic going on in the
conversion of a Citrus grammar into a functioning parser, we can still see
the telltale signs of a deterministic process lurking just beneath the surface.

The decision to express a message in a text-based format or a binary format
is one rife with tradeoffs, as we've already seen from this single example.
Now that you've seen both approaches, consider how you might implement
a few different types of message formats. Would an audio file be better
represented as binary file format, or a text-based format? How about
a web page? Before you read this article you probably already knew the 
answers to those questions, but now hopefully you have a better sense of 
the tradeoffs involved in how we choose to represent information in
software systems.

## The philosophical conundrum of information exchange

Computers are mindless automatons, and humans are bad at numbers. This
friction between people and their machines runs so deep that
it's remarkable that any software gets built
at all. But because there is gold to be found at the
other side of the computational tarpit, we muddle through our differences 
and somehow manage to make it all work.

To work together, computers and humans need a bridge between their mutually
exclusive ways of looking at the world. And this is what coding is all about!
We *encode* information into data and source code for computers to process,
and then after the work is done, we *decode* the results of a computation back
into a human-friendly message format. 

Once everything is wired up, human users of software can think mostly 
in terms of meaningful information exchange, and software systems only need to 
worry about moving numbers around and doing basic arithmetic operations. 
Although it isn't especially romantic, this is how programmers trick computers 
and humans into cooperating with each other. When done well, people barely
notice the presence of the software system at all, and focus entirely on
their job to be done. This suits the computer just fine, as it does not
care at all what puny humans think of it.

As programmers, we must concern ourselves with the needs of both people 
and machines. We are responsible for connecting two seemingly incompatible worlds,
each with their own set of rules and expectations. This is what makes 
our job hard, but is also what makes it rewarding and almost magical 
at times. We've just explored some examples of the sorts of challenges that
can arise along the boundary line between people and machines, 
but I'm sure you can think of many more that are present in your own work. 

The next time you come across a tension point in your software design
process, take a moment to  reflect on these ideas, and see what kind of 
insights arise. Is the decision you're about to make meant to
benefit the people who use your software, or the machines that run your code?
Consider the tradeoffs carefully, but when in doubt, always choose to 
satisfy the humans. :grin:

> **NOTE:** While writing this article, I was also reading "Gödel, Escher, Bach"
in my spare time. Though I don't directly use any of its concepts here, Douglas
Hofstadter deserves credit (and/or blame) for getting me to think deeply
on *the meaning of meaning* and how it relates to software development.

[^1]: Having too much or too little expressiveness in a format is pretty much a guarantee, because even as we get closer to the *Goldilocks Zone*, increasingly subtle edge cases tend to proliferate. Since we can't expect perfection, we need to settle for expressiveness that's "good enough" and the tradeoffs that come along with it.

[^2]: For the sake of simplicity, I omitted the optional prefix in IRC messages which contains information about the sender of a message, because it involves somewhat complicated URI parsing. See [page 7 of the IRC specification](http://tools.ietf.org/html/rfc2812#page-7) for details.

[^3]: The abstract types API shown in this article is only a theoretical example, because the [official MessagePack library](https://github.com/msgpack/msgpack-ruby) for Ruby does not support application-specific types as of September 2014, even though they're documented in the specification. It may be a fun patch to write if you want to explore these topics more, though!

[MessagePack format]: https://github.com/msgpack/msgpack/blob/master/spec.md
[Citrus parser generator]: https://github.com/mjackson/citrus
[ABNF]: http://en.wikipedia.org/wiki/Augmented_Backus%E2%80%93Naur_Form


================================================
FILE: articles/v7/README.md
================================================
The articles in this folder are from Practicing Ruby's 7th volume.
You can also read them for free online at practicingruby.com.


================================================
FILE: articles/v8/001-problem-discovery.md
================================================
Imagine you're a programmer for a dental clinic, and they need your help to build a vacation scheduling system for their staff. Among other things, this system will display a calendar to staff members that summarizes all of the currently approved and pending vacation requests for the clinic, grouped by role.

The basic idea here is simple: If a dental assistant wants to take a week off some time in July, it'd be more likely to get time off approved for a week where there was only one other assistant out of the office than it would be for a week when five others were on vacation. Rather than waiting for a manager to review their request (which might take a while), this information can be supplied up front to make planning easier for everyone.

Your vacation request system already has been implemented weeks ago, so you can easily get all the data you need on who is requesting what time off, and who has already had their time off approved. Armed with this information, building the request summary calendar should be easy, right? Just take all the requests and then group them by employee roles, and then spit them out in chronological order. You'll be able to roll out this new feature into production by lunch time!

You grab your morning coffee, and sit down to work. Before you can even open your text editor, an uncomfortable realization weighs heavily upon you: Roles are actually a property of shifts, not employees. Your clinic is understaffed, and so some employees are cross-trained and need to wear multiple hats. To put it bluntly, there's at least one employee that's not precisely a receptionist, and would be more adequately described as "receptionish". She helps out in the billing office at times, and whenever she's working there, the clinic is down a receptionist.

You do have access to some data about individual shifts, so maybe that could be used to determine roles. By the time a shift is approved, the role is set, and you know for sure what that employee is doing for that day. 

You think for a little while. You uncover a few annoying problems that will need to be solved if you decide to go this route.

The shift data is coming from a third party shift planning system, and the import window is set out only ten weeks into the future.  In practice, shifts aren't really firmly committed to until four weeks out, so that makes the practical window even smaller. 

The idea that a given employee's shift in July would be set in stone by March is a fantasy, and so even if you could get at that data, it wouldn't be perfectly accurate. There's also no guarantee that attempting to import five times more data than what you're currently working with won't cause problems… the whole synchronization system was built in a bit of a hurry, and could be fragile in places.

Feeling the anxiety start to set in, you go for a quick walk around the block, and come to the realization that you've gone into problem solving mode already, when you really should be more in the problem discovery phase of things. You haven't even answered the question of how many employees work in multiple different roles, and you're already assuming that's a problem that needs a clear solution.

An idea pops into your head. You rush to your desk, and pop open a Rails console in production. You write a crude query and then massage the data with an ugly chain of Enumerable methods, and end up with a report that looks like this:

```ruby
 ["Nikko Bergnaum", [["Hygienists", 5]]],
 ["Anderson Miller", [["Billing", 50]]],
 ["Bell Effertz", [["Hygienists", 14]]],
 ["Vicky Okuneva", [["Receptionists", 30]]],
 ["Lavern Von", [["Assistants", 37]]],
 ["Crawford Purdy", [["Receptionists", 40]]],
 ["Valentin Daugherty", [["Hygienists", 61]]],
 ["Eudora Bauch", [["Receptionists", 40]]],
 ["Jaeden Bashirian", [["Assistants", 28]]],
 ["Roel Hammes", [["Dentists", 36]]],
 ["King Schowalter", [["Hygienists", 20]]],
 ["Liam Kovacek", [["Receptionists", 55]]],
 ["Elaina Von", [["Hygienists", 25]]],
 ["Susie Watsica", [["Hygienists", 31]]],
 ["Oswaldo Boyer", [["Hygienists", 20]]],
 ["Gardner Fay", [["Hygienists", 10]]],
 ["Joanny Beatty", [["Assistants", 52]]],
 ["Beth Yost", [["Hygienists", 34]]],
 ["Gerry Torphy", [["Hygienists", 10]]],
 ["Maureen Terry", [["Hygienists", 9]]],
 ["Maritza Kemmer", [["Billing", 25]]],
 ["Morton Hudson", [["Dentists", 61]]],
 ["Santino Parker", [["Hygienists", 49]]],
 ["Jesse Friesen", [["Hygienists", 31]]],
 ["Dillan Krajcik", [["Hygienists", 44]]],
 ["Travon Koch", [["Hygienists", 16]]],
 ["Audreanne Hand", [["Billing", 47]]],
 ["Coralie Predovic", [["Receptionists", 45]]],
 ["Jovani Schulist", [["Management", 50]]],
 ["Tanner D'Amore", [["Dentists", 41]]],
 ["Jace Nitzsche", [["Dentists", 21]]],
 ["Carolina Waters", [["Receptionists", 40]]],
 ["Terence Howell", [["Dentists", 39]]],
 ["Leann Pacocha", [["Assistants", 2]]],
 ["Alvah Rippin", [["Dentists", 50]]],
 ["Lorenzo West", [["Hygienists", 27]]],
 ["Gideon McKenzie", [["Dentists", 41]]],
 ["Katrine O'Reilly", [["Dentists", 51]]],
 ["Briana Ziemann", [["Dentists", 40]]],
 ["Jerome Harris", [["Dentists", 10]]],
 ["Misael Pagac", [["Assistants", 51]]],
 ["Krista Predovic", [["Assistants", 32]]],
 ["Carole O'Hara", [["Assistants", 42]]],
 ["Adalberto Doyle", [["Management", 49], ["Receptionists", 2]]],
 ["Noel Ortiz", [["Management", 28], ["Receptionists", 1]]],
 ["Monique McLaughlin", [["Receptionists", 43], ["Assistants", 1]]],
 ["Jaleel Graham", [["Billing", 50], ["Receptionists", 18]]],
 ["Ned Reilly", [["Receptionists", 50], ["Assistants", 1]]],
 ["Enrico Schowalter", [["Receptionists", 1], ["Assistants", 55]]],
 ["Caesar Goldner", [["Management", 30], ["Receptionists", 16]]],
 ["Kirstin Weissnat", [["Receptionists", 26], ["Assistants", 28]]],
 ["Guillermo Klein",
  [["Assistants", 41], ["Hygienists", 2], ["Receptionists", 3]]]]
```

This listing shows all the shifts planned for the next ten weeks, with counts for each employee by role. You copy and paste it into a text editor, and delete any of the lines for employees that have a single role. Here's what you end up with:

```ruby
   ["Adalberto Doyle", [["Management", 49], ["Receptionists", 2]]],
 ["Noel Ortiz", [["Management", 28], ["Receptionists", 1]]],
 ["Monique McLaughlin", [["Receptionists", 43], ["Assistants", 1]]],
 ["Jaleel Graham", [["Billing", 50], ["Receptionists", 18]]],
 ["Ned Reilly", [["Receptionists", 50], ["Assistants", 1]]],
 ["Enrico Schowalter", [["Receptionists", 1], ["Assistants", 55]]],
 ["Caesar Goldner", [["Management", 30], ["Receptionists", 16]]],
 ["Kirstin Weissnat", [["Receptionists", 26], ["Assistants", 28]]],
 ["Guillermo Klein",
  [["Assistants", 41], ["Hygienists", 2], ["Receptionists", 3]]]]
```

Now you've whittled the list down to only 9 people. In a business that employees over 50 people, this is 20% of the workforce, which isn't a tiny number. But taking a closer look at the data, you realize something else: even on this list of cross-trained employees, most staff members work in a single role the majority of the time, and very rarely fill in for another role.

Filtering the list again, you remove anyone who works in a single role at least 90% of the time. After this step, only three employees remain on your list:

```ruby
 ["Jaleel Graham", [["Billing", 50], ["Receptionists", 18]]],
 ["Caesar Goldner", [["Management", 30], ["Receptionists", 16]]],
 ["Kirstin Weissnat", [["Receptionists", 26], ["Assistants", 28]]]]]]]
```

Because these employees represent only about 5% of the total staff, you've revealed this problem as an edge case. For the other six employees that substitute for a different role once in a blue moon, you'd have at least 90% accuracy by always labeling them by their primary role. It'd be confusing to refer to them as anything else, at least for the purposes of vacation planning.

In the process of this ad-hoc exploration, you've discovered a reasonably accurate method of predicting employee roles far out into the future: if at least 90% of the shifts they're assigned are for a particular role, assume that is their primary role. Otherwise, label them as cross-trained, listing out all the roles they commonly fill in for. For example, 
Jaleel could be listed as "X-Trained (Billing, Receptionist)",
Kirsten as "X-Trained (Receptionist, Assistant)", and Caesar as "X-Trained (Receptionist, Management)".

Taking this approach will be at least as reliable as using the raw shift data, and requires no major technical changes to the system's under-plumbing. It's also dynamic, in the sense that the system will adaptively relabel employees as cross trained when they're doing more than one role on a regular basis.

Happy with this re-evaluation of the problem, you start working, and you manage to get the feature into production before lunch after all. In the worst case scenario, you can always come back to this and do more careful analysis, peering into the technological and philosophical rabbit hole that made you nervous in the first place. But there's a very good chance that this solution will work just fine, and so it's worth trying it out before venturing out into the darkness.

From this small exercise, you come to a powerful realization:

> Software isn't mathematically perfect reality, it's a useful fiction meant to capture some aspect of reality that is interesting or important to humans. Although our technical biases may aim for logical purity in the code we write, the humans that use our work mainly care about the story we're trying to tell them. We should seek the most simple solutions that allow us to tell that story, even if those solutions lack technical elegance.

In other words, feel free to ignore the man behind the curtain.


================================================
FILE: articles/v8/002-formula-processing.md
================================================
> This article was written in collaboration with Solomon White ([@rubysolo](http://twitter.com/rubysolo)). Solomon is a software developer from Denver, where he builds web applications with Ruby and ENV.JAVASCRIPT_FRAMEWORK.  He likes code, caffeine, and capsaicin.

Imagine that you're a programmer for a company that sells miniature zen gardens, and you've been asked to create a  small calculator program that will help determine the material costs of the various different garden designs in the company's product line.

The tool itself is simple: The dimensions of the garden to be built will be entered via a web form, and then calculator will output the quantity and weight of all the materials that are needed to construct the garden. 

In practice, the problem is a little more complicated, because the company offers many different kinds of gardens. Even though only a handful of basic materials are used throughout the entire product line, the gardens themselves can consist of anything from a plain rectangular design to very intricate and complicated layouts. For this reason, figuring out how much material is needed for each garden type requires the use of custom formulas.

> MATH WARNING: You don't need to think through the geometric computations being done throughout this article, unless you enjoy that sort of thing; just notice how all the formulas are ordinary arithmetic expressions that operate on a handful of variables.

The following diagram shows the formulas used for determining the material quantities for two popular products. *Calm* is a minimal rectangular garden, while *Yinyang* is a more complex shape that requires working with circles and semicircles: 

![](//i.imgur.com/JlKz2kC.png)

In the past, material quantities and weights for new product designs were computed using Excel spreadsheets, which worked fine when the company only had a few different garden layouts. But to keep up with the incredibly high demand for bespoke desktop Zen Gardens, the business managers have insisted that their workflow become more Agile by moving all product design activities to a web application in THE CLOUD™.

The major design challenge for building this calculator is that it would not be practical to have a programmer update the codebase whenever a new product idea was dreamt up by the product design team. Some days, the designers have been known to attempt at least 32 different variants on a "snowman with top-hat" zen garden, and in the end only seven or so make it to the marketplace. Dealing with these rapidly changing requirements would drive any reasonable programmer insane.

After reviewing the project requirements, you decide to build a program that will allow the product design team to specify project requirements in a simple, Excel-like format and then safely execute the formulas they define within the context of a Ruby-based web application.

Fortunately, the [Dentaku](https://github.com/rubysolo/dentaku) formula parsing and evaluation library was built with this exact use case in mind. Just like you, Solomon White also really hates figuring out snowman geometry, and would prefer to leave that as an exercise for the user.

## First steps with the Dentaku formula evaluator

The purpose of Dentaku is to provide a safe way to execute user-defined mathematical formulas within a Ruby application.  For example, consider the following code:

```ruby
require "dentaku"

calc = Dentaku::Calculator.new
volume = calc.evaluate("length * width * height", 
                       :length => 10, :width => 5, :height => 3)

p volume #=> 150
```

Not much is going on here -- we have some named variables, some numerical values, and a simple formula: `length * width * height`.  Nothing in this example appears to be sensitive data, so on the surface it may not be clear why safety is a key concern here. 

To understand the risks, you consider an alternative implementation that allows mathematical formulas to be evaluated directly as plain Ruby code. You implement the equivalent formula evaluator without the use of an external library, just to see what it would look like:

```ruby
def evaluate_formula(expression, variables)
   obj = Object.new

   def obj.context
     binding
   end

   context = obj.context

   variables.each { |k,v| eval("#{k} = #{v}", context) }
   eval(expression, context)
end

volume = evaluate_formula("length * width * height",
                  :length => 10, :width => 5, :height => 3) 

p volume #=> 150
```

Although conceptually similar, it turns out these two code samples are worlds apart when you consider the implementation details:

* When using Dentaku, you're working with a very basic external domain specific language, which only knows how to represent simple numbers, variables, mathematical operations, etc. No direct access to the running Ruby process or its data is provided, and so formulas can only operate on what is explicitly provided to them whenever a `Calculator` object is instantiated.

* When using `eval` to run formulas as Ruby code, by default any valid Ruby code will be executed. Every instantiated object in the process can be accessed, system commands can be run, etc. This isn't much different than giving users access to the running application via an `irb` console.

This isn't to say that building a safe way to execute user-defined Ruby scripts isn't possible (it can even be practical in certain circumstances), but if you go that route, safe execution is something you need to specifically design for. By contrast, Dentaku is safe to use with minimally trusted users, because you have very fine-grained control over the data and actions those users will be able to work with.

You sit quietly for a moment and ponder the implications of all of this. After exactly four minutes of very serious soul searching, you decide that for the existing and forseeable future needs of our overworked but relentlessly optimistic Zen garden designers... Dentaku should work just fine. To be sure that you're  on the right path, you begin working on a functional prototype to share with the product team.

## Building the web interface

You spend a little bit of time building out the web interface for the calculator, using Sinatra and Bootstrap. It consists of only two screens, both of which are shown below:

![](//i.imgur.com/h0ftlcF.png)

People who mostly work with Excel spreadsheets all day murmur that you must be some sort of wizard, and compliment you on your beautiful design. You pay no attention to this, because your mind has already started to focus on the more interesting parts of the problem.

> **SOURCE FILES:** [app.rb](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/app.rb) // [app.erb](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/views/app.erb) // [index.erb](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/views/index.erb) // [materials.erb](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/views/materials.erb)

## Defining garden layouts as simple data tables

With a basic idea in mind for how you'll implement the calculator, your next task is to figure out how to define the various garden layouts as a series of data tables.

You start with the weight calculations table, because it involves the most basic computations. The formulas all boil down to variants on the `mass = volume * density` equation:

![](//i.imgur.com/1VIrDO1.png)

This material weight lookup table is suitable for use in all of the product definitions, but the `quantity` value will vary based both on the dimensions of the garden to be built and the physical layout of the garden.

With that in mind, you turn your attention to the tables that determine how much material is needed for each project, starting with the Calm rectangular garden as an example.

Going back to the diagram from earlier, you can see that the quantity of materials needed by the Calm project can be completely determined by the length, width, height, and desired fill level for the sandbox:

![](//i.imgur.com/BfHgoPB.png)

You could directly use these formulas in project specifications, but it would feel a little too low-level. Project designers will need to work with various box-like shapes often, and so it would feel more natural to describe the problem with terms like perimeter, area, volume, etc. Knowing that the Dentaku formula processing engine provides support for creating helper functions, you come up with the following definitions for the materials used in the Calm project:

![](//i.imgur.com/xyYtuAM.png)

With this work done, you turn your attention to the Yinyang circular garden project. Even though it is much more complex than the basic rectangular design, you notice that it too is defined entirely in terms of a handful of simple variables -- diameter, height, and fill level:

![](//i.imgur.com/1G0vaNx.png)

As was the case before, it would be better from a product design perspective to describe things in terms of circular area, cylindrical volume, and circumference rather than the primary dimensional variables, so you design the project definition with that in mind:

![](//i.imgur.com/d71MgSp.png)

To make the system easily customizable by the product designers, the common formulas used in the various garden layouts will also be stored in a data table rather than hard-coding them in the web application. The following table lists the names and definitions for all the formulas used in the *Calm* and *Yinyang* projects:

![](//i.imgur.com/ovOhwEX.png)

Now that you have a rough sense of what the data model will look like, you're ready to start working on implementing the calculator program. You may need to change the domain model at some point in the future to support more complex use cases, but many different garden layouts can already be represented in this format.

> **SOURCE FILES:** [calm.csv](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/db/projects/calm.csv) // [yinyang.csv](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/db/projects/yinyang.csv) // [materials.csv](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/db/materials.csv) // [common_formulas.csv](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/db/common_formulas.csv)

## Implementing the formula processor

You start off by building a utility class for reading all the relevant bits of project data that will be needed by the calculator. For the most part, this is another boring chore -- it involves nothing more than loading CSV and JSON data into some arrays and hashes.

After a bit of experimentation, you end up implementing the following interface:

```ruby
p Project.available_projects 
#=> ["calm", "yinyang”]

p Project.variables("calm") 
#=> ["length", "width", "height”]

p Project.weight_formulas["black sand"] 
#=> "quantity * 2.000”

p Project.quantity_formulas("yinyang")
          .select { |e| e["name"] == "black sand" } #=>
# [{"name" => "black sand", 
#    "formula" => "cylinder_volume * 0.5 * fill", 
#    "unit" => "cu cm”}]

p Project.common_formulas["cylinder_volume"]
#=> "circular_area * height”
```

Down the line, the `Project` class will probably read from a database rather than text files, but this is largely an implementation detail. Rather than getting bogged down in ruminations about the future, you shift your attention to the heart of the problem -- the Dentaku-powered `Calculator` class.

This class will be instantiated with the name of a particular garden layout and a set of dimensional parameters that will be used to determine how much of each material is needed, and how much the entire garden kit will weigh. Sketching this concept out in code, you decide that the `Calculator` class should work as shown below:

```ruby
calc = Calculator.new("yinyang", "diameter" => "20", "height" => "5")

p calc.materials.map { |e| [e['name'], e['quantity'].ceil, e['unit']] } #=>
# [["1cm thick flexible strip", 472, "sq cm"],
#  ["granite slab", 315, "sq cm"],
#  ["white sand", 550, "cu cm"],
#  ["black sand", 550, "cu cm"]]

p calc.shipping_weight #=> 4006
```

With that goal in mind, the constructor for the `Calculator` class needs to do two chores:

1. Convert the string-based dimension parameters provided via the web form into numeric values that Dentaku understands. An easy way to do this is to treat the strings as Dentaku expressions and evaluate them, so that a string like `"3.1416"` ends up getting converted to a `BigDecimal` object under the hood.

2. Load any relevant formulas needed to compute the material quantities and weights -- relying on the `Project` class to figure out how to extract these values from the various user-provided CSV files.

The resulting code ends up looking like this:

```ruby
class Calculator
  def initialize(project_name, params={})
    @params = Hash[params.map { |k,v| [k,Dentaku(v)] }]  #1

    @quantity_formulas = Project.quantity_formulas(project_name)  #2
    @common_formulas   = Project.common_formulas
    @weight_formulas   = Project.weight_formulas
  end

  # ...
end
```

Because a decent amount of work has already been done to massage all the relevant bits of data into exactly the right format, the actual work of computing required material quantities is surprisingly simple:

1. Instantiate a `Dentaku::Calculator` object
2. Load all the necessary common formulas into that object (e.g. `circular_area`, `cylinder_volume`, etc.)
3. Walk over the various material quantity formulas and evaluate them (e.g. `"black sand" => "cylinder_volume * 0.5 * fill"`)
4. Build up new records that map the names of materials in a project to their quantities.

A few lines of code later, and you have a freshly minted `Calculator#materials` method:

```ruby
# class Calculator

  def materials
    calculator = Dentaku::Calculator.new #1

    @common_formulas.each { |k,v| calculator.store_formula(k,v) }  #2
    
    @quantity_formulas.map do |material|
      amt = calculator.evaluate(material['formula'], @params) #3

      material.merge('quantity' => amt) #4
    end
  end
```

And for your last trick, you implement the `Calculator#shipping_weight` method. 

Because currently all shipping weight computations are simple arithmetic operations on a `quantity` for each material, you don't need to load up the various common formulas used in the geometry equations. You just need to look up the relevant weight formulas by name, then evaluate them for each material in the list to get a weight value for that material. Sum up those values, for the entire materials list, and you're done!

```ruby
# class Calculator

  def shipping_weight
    calculator = Dentaku::Calculator.new

    # Sum up weights for all materials in project based on quantity
    materials.reduce(0.0) { |s, e| 
      weight = calculator.evaluate(@weight_formulas[e['name']], e)

      s + weight
    }.ceil
  end
```

Wiring the `Calculator` class up to your Sinatra application, you end up with a fully functional program, which looks just the same as it did when you mocked up the UI, but actually knows how to crunch numbers now.

As a sanity check, you enter the same values that you have been using to test the `Calculator` object on the command line into the Web UI, and observe the results:

![](//i.imgur.com/26sV6wr.png)

They look correct. Mission accomplished!!!

> **SOURCE FILES:** [project.rb](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/project.rb) // [calculator.rb](https://github.com/PracticingDeveloper/dentaku-zen-garden/blob/32e518f80b5499990a4f92af6a261594baaba88a/calculator.rb)

## Considering the tradeoffs involved in using Dentaku

It was easy to decide on using Dentaku in this particular project, for several reasons:

* The formulas used in the project consist entirely of simple arithmetic operations.

* The tool itself is an internal application with no major performance requirements.

* The people who will be writing the formulas already understand basic computing concepts.

* A programmer will available to customize the workflow and assist with problems as needed.

If even a couple of these conditions were not met, the potential caveats of using Dentaku (or any similar formula processing tool) would require more careful consideration.

**Maintainability concerns:**

Even though Dentaku's domain specific language is a very simple one, formulas are still a form of code. Like all code, any formulas that run through Dentaku need to be tested in some way -- and when things go wrong, they need to be debugged.

If your use of Dentaku is limited to the sort of thing someone might type into a cell of an Excel spreadsheet, there isn't much of a problem to worry about. You can fairly easily build some sane error handling, and can provide features within your application to allow the user to test formulas before they go live in production.

The more that user-defined computations start looking like "real programs", the more you will miss the various niceties of a real programming environment. We take for granted things like smart code editors that understand the languages we're working in, revision control systems, elaborate testing tools, debuggers, package managers, etc. 

The simple nature of Dentaku's DSL should prevent you from ever getting into enough complexity to require the benefits of a proper development environment. That said, if the use cases for your project require you to run complex user-defined code that looks more like a program than a simple formula, Dentaku would definitely be the wrong tool for the job.

**Performance concerns:** 

The default evaluation behavior of Dentaku is completely unoptimized: simply adding two numbers together is a couple orders of magnitude slower than it would be in pure Ruby. It is possible to precompile expressions by enabling `AST` caching, and this reduces evaluation overhead significantly. Doing so may introduce memory management issues at scale though, and even with this optimization the evaluator runs several times slower than native Ruby. 

None of these performance issues matter when you're solving a single system of equations per request, but if you need to run Dentaku expressions in a tight loop over a large dataset, this is a problem to be aware of.

**Usability concerns:**

In this particular project, the people who will be using Dentaku are already familair with writing Excel-based formulas, and they are also comfortable with technology in general. This means that with a bit of documentation and training, they will be likely to comfortably use a code-based computational tool, as long as the workflow is kept relatively simple.

In cases where the target audience is not assumed to be comfortable writing code-based mathematical expressions and working with raw data formats, a lot more in-application support would be required. For example, one could imagine building a drag-and-drop interface for designing a garden layout, which would in turn generate the relevant Dentaku expressions under the hood.

The challenge is that once you get to the point where you need to put a layer of abstraction between the user and Dentaku's DSL, you should carefully consider whether you actually need a formula processing engine at all. It's certainly better to go without the extra complexity when it's possible to do so, but this will depend heavily on the context of your particular application.

**Extensibility concerns:** 

Setting up non-programmers with a means of doing their own computations can help cut down on a lot of tedious maintenance programming work, but the core domain model and data access rules are still defined by the application's source code.

As requirements change in a business, new data sources may need to be wired up, and new pieces of support code may need to be written from time to time. This can be challenging, because tweaks to the domain model might require corresponding changes to the user-defined formulas.

In practice, this means that an embedded formula processing system works best when either the data sources and core domain model are somewhat stable, or there is a programmer actively maintaining the system that can help guide users through any necessary changes that come up.

With code stored either as user-provided data files or even in the application's database, there is also a potential for messy and complicated migrations to happen whenever a big change does need to happen. This may be especially challenging to navigate for non-programmers, who are used to writing something once and having it work forever.

*NOTE: Yes, this was a long list of caveats. Keep in mind that most of them only apply when you go beyond the "let's take this set of Excel sheets and turn it into a nicely managed program" use case and venture into the "I want to embed an adhoc SDK into my application" territory. The concerns listed above are meant to help you sort out what category your project falls into, so that you can choose a modeling technique wisely.*

## Reflections and further explorations

By now you've seen that a formula parser/evaluator can be a great way to take a messy ad-hoc spreadsheet workflow and turn it into a slightly less messy ad-hoc web application workflow. This technique provides a way to balance the central management and depth of functionality that custom software development can offer with the flexibility and empowerment of putting computational modeling directly into the hands of non-programmers.

Although this is not an approach that should be used in every application, it's a very useful modeling strategy to know about, as long as you keep a close eye on the tradeoffs involved.

If you'd like to continue studying this topic, here are a few things to try out:

* Grab the [source code for the calculator application](https://github.com/PracticingDeveloper/dentaku-zen-garden), and run it on your own machine.

* Create a new garden layout, with some new material types and shapes. For example,
you could try to create a group of concentric circles, or a checkerboard style design.

* [Explore how to extend Dentaku's DSL](https://github.com/rubysolo/dentaku#external-functions) with your own Ruby functions.

* Watch [Spreadsheets for developers](https://www.youtube.com/watch?v=0CKru5d4GPk), a talk by Felienne Hermans on the power and usefulness of basic spreadsheet software for rapid protyping and ad-hoc explorations.

Good luck with your future number crunching, and thanks for reading!


================================================
FILE: articles/v8/README.md
================================================
The articles in this folder are from Practicing Ruby's 8th volume.
You can also read them for free online at practicingruby.com.


================================================
FILE: templates/default.html.erb
================================================
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8"/>
  <title>Practicing Ruby</title>
  <style>
    body {
      font-family: "Helvetica Neue", arial, sans-serif;
      font-size: 14px;
      line-height: 1.6;
      padding-top: 10px;
      padding-bottom: 10px;
      background-color: white;
      padding: 30px;
      color: #333;
      border: 1px solid #aaa;
      max-width: 900px;
      margin: 20px auto;
    }

    body > *:first-child {
      margin-top: 0 !important;
    }

    body > *:last-child {
      margin-bottom: 0 !important;
    }

    a {
      color: #4183C4;
      text-decoration: none;
    }

    a.absent {
      color: #cc0000;
    }

    a.anchor {
      display: block;
      padding-left: 30px;
      margin-left: -30px;
      cursor: pointer;
      position: absolute;
      top: 0;
      left: 0;
      bottom: 0;
    }

    h1, h2, h3, h4, h5, h6 {
      margin: 20px 0 10px;
      padding: 0;
      font-weight: bold;
      -webkit-font-smoothing: antialiased;
      cursor: text;
      position: relative;
    }

    h2:first-child, h1:first-child, h1:first-child + h2, h3:first-child, h4:first-child, h5:first-child, h6:first-child {
      margin-top: 0;
      padding-top: 0;
    }

    h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor {
      text-decoration: none;
    }

    h1 tt, h1 code {
      font-size: inherit;
    }

    h2 tt, h2 code {
      font-size: inherit;
    }

    h3 tt, h3 code {
      font-size: inherit;
    }

    h4 tt, h4 code {
      font-size: inherit;
    }

    h5 tt, h5 code {
      font-size: inherit;
    }

    h6 tt, h6 code {
      font-size: inherit;
    }

    h1 {
      font-size: 28px;
      color: black;
    }

    h2 {
      font-size: 24px;
      border-bottom: 1px solid #cccccc;
      color: black;
    }

    h3 {
      font-size: 18px;
    }

    h4 {
      font-size: 16px;
    }

    h5 {
      font-size: 14px;
    }

    h6 {
      color: #777777;
      font-size: 14px;
    }

    p, blockquote, ul, ol, dl, li, table, pre {
      margin: 3px 0;
    }

    hr {
      background: transparent url("http://tinyurl.com/bq5kskr") repeat-x 0 0;
      border: 0 none;
      color: #cccccc;
      height: 4px;
      padding: 0;
    }

    body > h2:first-child {
      margin-top: 0;
      padding-top: 0;
    }

    body > h1:first-child {
      margin-top: 0;
      padding-top: 0;
    }

    body > h1:first-child + h2 {
      margin-top: 0;
      padding-top: 0;
    }

    body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child {
      margin-top: 0;
      padding-top: 0;
    }

    a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 {
      margin-top: 0;
      padding-top: 0;
    }

    h1 p, h2 p, h3 p, h4 p, h5 p, h6 p {
      margin-top: 0;
    }

    li p.first {
      display: inline-block;
    }

    ul, ol {
      padding-left: 30px;
    }

    ul :first-child, ol :first-child {
      margin-top: 0;
    }

    ul :last-child, ol :last-child {
      margin-bottom: 0;
    }

    dl {
      padding: 0;
    }

    dl dt {
      font-size: 14px;
      font-weight: bold;
      font-style: italic;
      padding: 0;
      margin: 15px 0 5px;
    }

    dl dt:first-child {
      padding: 0;
    }

    dl dt > :first-child {
      margin-top: 0;
    }

    dl dt > :last-child {
      margin-bottom: 0;
    }

    dl dd {
      margin: 0 0 15px;
      padding: 0 15px;
    }

    dl dd > :first-child {
      margin-top: 0;
    }

    dl dd > :last-child {
      margin-bottom: 0;
    }

    blockquote {
      border-left: 4px solid #dddddd;
      padding: 0 15px;
      color: #777777;
    }

    blockquote > :first-child {
      margin-top: 0;
    }

    blockquote > :last-child {
      margin-bottom: 0;
    }

    table {
      padding: 0;
    }
    table tr {
      border-top: 1px solid #cccccc;
      background-color: white;
      margin: 0;
      padding: 0;
    }

    table tr:nth-child(2n) {
      background-color: #f8f8f8;
    }

    table tr th {
      font-weight: bold;
      border: 1px solid #cccccc;
      text-align: left;
      margin: 0;
      padding: 6px 13px;
    }

    table tr td {
      border: 1px solid #cccccc;
      text-align: left;
      margin: 0;
      padding: 6px 13px;
    }

    table tr th :first-child, table tr td :first-child {
      margin-top: 0;
    }

    table tr th :last-child, table tr td :last-child {
      margin-bottom: 0;
    }

    img {
      max-width: 100%;
    }

    span.frame {
      display: block;
      overflow: hidden;
    }

    span.frame > span {
      border: 1px solid #dddddd;
      display: block;
      float: left;
      overflow: hidden;
      margin: 13px 0 0;
      padding: 7px;
      width: auto;
    }

    span.frame span img {
      display: block;
      float: left;
    }

    span.frame span span {
      clear: both;
      color: #333333;
      display: block;
      padding: 5px 0 0;
    }

    span.align-center {
      display: block;
      overflow: hidden;
      clear: both;
    }

    span.align-center > span {
      display: block;
      overflow: hidden;
      margin: 13px auto 0;
      text-align: center;
    }

    span.align-center span img {
      margin: 0 auto;
      text-align: center;
    }

    span.align-right {
      display: block;
      overflow: hidden;
      clear: both;
    }

    span.align-right > span {
      display: block;
      overflow: hidden;
      margin: 13px 0 0;
      text-align: right;
    }

    span.align-right span img {
      margin: 0;
      text-align: right;
    }

    span.float-left {
      display: block;
      margin-right: 13px;
      overflow: hidden;
      float: left;
    }

    span.float-left span {
      margin: 13px 0 0;
    }

    span.float-right {
      display: block;
      margin-left: 13px;
      overflow: hidden;
      float: right;
    }

    span.float-right > span {
      display: block;
      overflow: hidden;
      margin: 13px auto 0;
      text-align: right;
    }

    code, tt {
      font-family: "Lucida Console", "Courier New", courier;
      font-size: 12px;
      margin: 0 2px;
      padding: 3px 5px;
      white-space: nowrap;
      border: 1px solid #eaeaea;
      background-color: #f8f8f8;
      border-radius: 3px;
    }

    pre code {
      margin: 0;
      padding: 0;
      white-space: pre;
      border: none;
      background: transparent;
    }

    .highlight pre {
      background-color: #f8f8f8;
      border: 1px solid #cccccc;
      font-size: 13px;
      line-height: 19px;
      overflow: auto;
      padding: 6px 10px;
      border-radius: 3px;
    }

    pre {
      background-color: #f8f8f8;
      border: 1px solid #cccccc;
      font-size: 13px;
      line-height: 19px;
      overflow: auto;
      padding: 6px 10px;
      border-radius: 3px;
    }

    pre code, pre tt {
      background-color: transparent;
      border: none;
    }

    span.line-numbers {
      margin-right: 10px;
    }

    img.logo {
      display: block;
      margin: 10px auto;
    }

    h1.logo {
      text-align: center;
      font-size: 40px;
    }
  </style>
</head>
<body>
  <div id="containter">
    <h1 class="logo">Practicing Ruby</h1>
    <img class="logo" src="../../assets/images/header.png" />
    <%= @body %>
  </div>
</body>
</html>