Repository: pkardas/notes
Branch: master
Commit: 7b0d56be00b4
Files: 180
Total size: 904.1 KB

Directory structure:
gitextract_z_5iswqa/

├── .gitignore
├── README.md
├── books/
│   ├── architecture-hard-parts.md
│   ├── build.md
│   ├── clean-agile.md
│   ├── clean-code.md
│   ├── coaching-agile-teams.md
│   ├── code-complete.md
│   ├── comic-agile.md
│   ├── cracking-coding-interview/
│   │   ├── Dockerfile
│   │   ├── docker-compose.yml
│   │   ├── notes.md
│   │   ├── requirements.txt
│   │   └── src/
│   │       ├── ch01_arrays_and_strings/
│   │       │   ├── check_permutation.py
│   │       │   ├── is_unique.py
│   │       │   ├── one_away.py
│   │       │   ├── palindrome_permutation.py
│   │       │   ├── rotate_matrix.py
│   │       │   ├── string_compression.py
│   │       │   ├── string_rotation.py
│   │       │   ├── urlify.py
│   │       │   └── zero_matrix.py
│   │       └── ch02_linked_lists/
│   │           ├── delete_middle_node.py
│   │           ├── intersection.py
│   │           ├── linked_list.py
│   │           ├── loop_detection.py
│   │           ├── palindrome.py
│   │           ├── partition.py
│   │           ├── remove_dups.py
│   │           ├── return_kth_to_last.py
│   │           └── sum_lists.py
│   ├── ddd.md
│   ├── ddia.md
│   ├── docker-deep-dive.md
│   ├── elixir.md
│   ├── fundamentals-of-architecture.md
│   ├── go/
│   │   ├── ch01/
│   │   │   ├── Makefile
│   │   │   └── hello.go
│   │   ├── ch02/
│   │   │   ├── const.go
│   │   │   └── unicode.go
│   │   ├── ch03/
│   │   │   └── types.go
│   │   ├── ch04/
│   │   │   ├── case.go
│   │   │   ├── for.go
│   │   │   └── if.go
│   │   ├── ch05/
│   │   │   ├── anonymous.go
│   │   │   ├── deferExample.go
│   │   │   ├── functionAsParam.go
│   │   │   ├── functions.go
│   │   │   ├── functionsAreValues.go
│   │   │   └── returnFunction.go
│   │   ├── ch06/
│   │   │   └── pointers.go
│   │   ├── ch07/
│   │   │   ├── counter.go
│   │   │   ├── dependencyInjection.go
│   │   │   ├── embedding.go
│   │   │   ├── intTree.go
│   │   │   ├── interfaces.go
│   │   │   ├── iota.go
│   │   │   └── types.go
│   │   ├── ch08/
│   │   │   ├── customErrors.go
│   │   │   ├── errors.go
│   │   │   ├── panic.go
│   │   │   ├── recover.go
│   │   │   ├── sentinel.go
│   │   │   └── wrappingErrors.go
│   │   ├── ch09/
│   │   │   ├── formatter/
│   │   │   │   └── formatter.go
│   │   │   ├── main.go
│   │   │   └── math/
│   │   │       └── math.go
│   │   ├── ch10/
│   │   │   ├── deadlock.go
│   │   │   ├── deadlockSolution.go
│   │   │   └── goroutinesExample.go
│   │   └── notes.md
│   ├── hands-on-ml.md
│   ├── head-first-design-patterns/
│   │   ├── ch_01_strategy.py
│   │   ├── ch_02_observer.py
│   │   ├── ch_03_decorator.py
│   │   ├── ch_04_factory.py
│   │   ├── ch_05_singleton.py
│   │   ├── ch_06_command.py
│   │   ├── ch_07_adapter.py
│   │   ├── ch_07_facade.py
│   │   ├── ch_08_template_method.py
│   │   ├── ch_09_composite.py
│   │   ├── ch_09_iterator.py
│   │   ├── ch_10_state.py
│   │   ├── ch_11_virtual_proxy.py
│   │   └── notes.md
│   ├── kubernetes-book.md
│   ├── kubernetes-in-action.md
│   ├── nlp-book.md
│   ├── peopleware.md
│   ├── pragmatic-programmer.md
│   ├── pytest/
│   │   ├── .coveragerc
│   │   ├── Dockerfile
│   │   ├── docker-compose.yml
│   │   ├── notes.md
│   │   ├── requirements.txt
│   │   ├── setup.cfg
│   │   ├── src/
│   │   │   ├── __init__.py
│   │   │   ├── api.py
│   │   │   ├── cli.py
│   │   │   └── db.py
│   │   └── tests/
│   │       ├── ch_02/
│   │       │   ├── test_card.py
│   │       │   ├── test_classes.py
│   │       │   ├── test_exceptions.py
│   │       │   └── test_helper.py
│   │       ├── ch_03/
│   │       │   ├── conftest.py
│   │       │   ├── test_autouse.py
│   │       │   ├── test_count.py
│   │       │   ├── test_count_initial.py
│   │       │   ├── test_fixtures.py
│   │       │   ├── test_rename_fixture.py
│   │       │   └── test_some.py
│   │       ├── ch_04/
│   │       │   ├── conftest.py
│   │       │   ├── test_config.py
│   │       │   ├── test_tmp.py
│   │       │   └── test_version.py
│   │       ├── ch_05/
│   │       │   └── test_parametrize.py
│   │       ├── ch_06/
│   │       │   ├── pytest.ini
│   │       │   ├── test_builtin.py
│   │       │   ├── test_custom.py
│   │       │   └── text_combination.py
│   │       ├── ch_12/
│   │       │   ├── hello.py
│   │       │   └── test_hello.py
│   │       └── ch_15/
│   │           ├── conftest.py
│   │           ├── pytest.ini
│   │           └── test_slow.py
│   ├── python-architecture-patterns/
│   │   ├── Dockerfile
│   │   ├── Makefile
│   │   ├── docker-compose.yml
│   │   ├── notes.md
│   │   ├── requirements.txt
│   │   ├── setup.cfg
│   │   ├── src/
│   │   │   ├── __init__.py
│   │   │   ├── adapters/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── notifications.py
│   │   │   │   ├── orm.py
│   │   │   │   ├── redis_publisher.py
│   │   │   │   └── repository.py
│   │   │   ├── app.py
│   │   │   ├── bootstrap.py
│   │   │   ├── config.py
│   │   │   ├── domain/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── commands.py
│   │   │   │   ├── events.py
│   │   │   │   └── model.py
│   │   │   ├── redis_consumer.py
│   │   │   ├── service_layer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── handlers.py
│   │   │   │   ├── message_bus.py
│   │   │   │   └── unit_of_work.py
│   │   │   └── views.py
│   │   └── tests/
│   │       ├── __init__.py
│   │       ├── conftest.py
│   │       ├── e2e/
│   │       │   ├── __init__.py
│   │       │   ├── api_client.py
│   │       │   ├── redis_client.py
│   │       │   ├── test_app.py
│   │       │   └── test_external_events.py
│   │       ├── integration/
│   │       │   ├── __init__.py
│   │       │   ├── test_uow.py
│   │       │   └── test_views.py
│   │       └── unit/
│   │           ├── __init__.py
│   │           ├── test_batches.py
│   │           ├── test_handlers.py
│   │           └── test_product.py
│   ├── refactoring.md
│   ├── release-it.md
│   ├── system-design-interview.md
│   ├── tidy-first.md
│   └── understanding-distributed-systems.md
├── case-studies/
│   └── reddit.md
├── conferences/
│   ├── aws-innovate-ai-ml-21.md
│   ├── brown-bags.md
│   └── pycon-2022.md
├── courses/
│   └── fast-ai.md
├── patterns/
│   ├── abbreviations.md
│   └── architecture.md
└── teaching/
    ├── python-intermediate/
    │   └── README.md
    └── python-intro/
        ├── README.md
        └── notebook.ipynb

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.DS_Store
.AppleDouble
.LSOverride
.idea
.ipynb_checkpoints
*/.pytest_cache/
git-user.sh
/excluded_resources/*


================================================
FILE: README.md
================================================
# 👉👉👉 Visit [musicat.fm](https://musicat.fm) 😻

You can connect Spotify and Apple Music to it to discover many cool statistics about your taste!

(I'm the author 🤩)

---

## Notes

### Books

👀 In progress:

- [System design interview](books/system-design-interview.md)

#### ✅ Finished:

- Code:
    - [Clean Code: A Handbook of Agile Software Craftsmanship](books/clean-code.md)
    - [Learning Go: An Idiomatic Approach to Real-World Go Programming](books/go/notes.md)
    - [Python Testing with Pytest](books/pytest/notes.md)
    - [Refactoring: Improving the Design of Existing Code](books/refactoring.md)
    - [Tidy first?](books/tidy-first.md)

- Architecture:
    - [Architecture Patterns with Python](books/python-architecture-patterns/notes.md)
    - [Designing Data-Intensive Applications: The Big Ideas Behind Reliable, Scalable, and Maintainable Systems](books/ddia.md)
    - [Head First Design Patterns: Building Extensible and Maintainable Object-Oriented Software](books/head-first-design-patterns/notes.md)
    - [Release It! Design and Deploy Production-Ready Software](books/release-it.md)
    - [Fundamentals of Software Architecture](books/fundamentals-of-architecture.md)

- Process:
    - [Clean Agile: Back to Basics](books/clean-agile.md)
    - [Domain-Driven Design: Tackling Complexity in the Heart of Software](books/ddd.md)
    - [Peopleware: Productive Projects and Teams](books/peopleware.md)
    - [The Pragmatic Programmer](books/pragmatic-programmer.md)
    - [Comic Agilé](books/comic-agile.md)

- DevOps:
    - [The Kubernetes Book](books/kubernetes-book.md)

- Product:
    - :eyes:

- ML:
    - [Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition](books/nlp-book.md)

#### ☑️ Finished partially:

- [Code Complete: A Practical Handbook of Software Construction](books/code-complete.md)
- [Cracking the Coding Interview](books/cracking-coding-interview/notes.md)
- [Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow: Concepts, Tools, and Techniques to Build Intelligent Systems](books/hands-on-ml.md)
- [Build](books/build.md)
- [Coaching Agile Teams](books/coaching-agile-teams.md)

#### ⏳ Queue:

- [Docker Deep Dive](books/docker-deep-dive.md)
- [Software Architecture: The Hard Parts](books/architecture-hard-parts.md)
- [Understanding Distributed Systems](books/understanding-distributed-systems.md)
- [Kubernetes in Action](books/kubernetes-in-action.md)
- [Elixir in Action](books/elixir.md)

### Case Studies

- [Reddit](case-studies/reddit.md)

### Conferences

- [PyCon 2022](conferences/pycon-2022.md)
- [AWS Innovate: AI/ML Edition 2021](conferences/aws-innovate-ai-ml-21.md)
- [Brown Bags](conferences/brown-bags.md)

### Patterns

- [Abbreviations](patterns/abbreviations.md)
- [Architecture](patterns/architecture.md)

### Teaching

- [Introduction to Programming: Python for beginners](teaching/python-intro)
- [Python Intermediate](teaching/python-intermediate)

### Courses

- [Course @ FastAI](courses/fast-ai.md)


================================================
FILE: books/architecture-hard-parts.md
================================================
[go back](https://github.com/pkardas/learning)

# Software Architecture: The Hard Parts: Modern Tradeoff Analysis for Distributed Architectures

Book by Pramod Sadalage, Neal Ford, Mark Richards, Zhamak Dehghani


================================================
FILE: books/build.md
================================================
[go back](https://github.com/pkardas/learning)

# Build

Book by Tony Fadell

- [1.1 Adulthood](#11-adulthood)
- [1.2 Get a job](#12-get-a-job)
- [1.3 Heroes](#13-heroes)
- [1.4 Don't (only) look down](#14-dont-only-look-down)
- [2.1 Just managing](#21-just-managing)
- [2.2 Data versus opinion](#22-data-versus-opinion)
- [2.3 Assholes](#23-assholes)
- [2.4 I quit](#24-i-quit)
- [3.1 Make the intangible tangible](#31-make-the-intangible-tangible)
- [3.2 Why storytelling](#32-why-storytelling)
- [3.3 Evolution versus disruption versus execution](#33-evolution-versus-disruption-versus-execution)
- [3.4 Your first adventure - and your second](#34-your-first-adventure---and-your-second)

## 1.1 Adulthood

When you are looking at the array of potential careers before you, the correct place to start is "What do I want to
learn?"

- NOT: How much money do I want to make?
- NOT: What title do I want to have?
- NOT: What company has enough name recognition?

Early adulthood is about watching your dreams go up in flames and learning as much as you can from the ashes.

Go where you can grow - people, mission, the opportunity are all that matters.

> The only failure is your twenties is inaction. The rest is trial and error.

Humans learn through productive struggle, by trying it themselves and screwing up and doing it differently next time.
You have to push yourself to the mountain, even if it means you might fall of a cliff.

## 1.2 Get a job

If you are going to throw your time, energy, and youth at a company, try to join one that's not just making a better
mousetrap. Find a business that's starting a revolution:

- it's creating a product that's wholly new or combines existing technology in a novel way that the competition can't
  make or even understand
- this product solves a problem - a real pain point - that a lot of customers experience daily
- the novel technology can deliver on the company vision
- leadership is not dogmatic about what the solution looks like and is willing to adapt to their customers' needs
- it's thinking about a problem or a customer need in a way you've never heard before, but makes a perfect sense once
  you hear it

Cool technology isn't enough, a great team isn't enough, plenty of funding isn't enough. You have to time you product
right. The world has to be ready to want it. If you're not solving a real problem, you can;t start a revolution.

Seemingly impossible problems that a decade ago would have cost billions to solve, requiring massive investments from
gian firms, can now be figured out with a smartphone app, a small sensor, and the internet.

If you are passionate about something - something that could be solving a huge problem one day - then stick with it.
Because one day, if you are truly solving a real issue, when the world is ready to want it, you will already be there.

You don't have to an executive right away, you don't have to get a job at the most amazing, world-changing company out
of college, but you should have a goal.

## 1.3 Heroes

The only thing that can make a job truly amazing or complete waste of time is the people.

You always have something to offer if you are curious and engaged. You can always trade and barter good ideas; you can
always be kind and find a way to help.

Try to get into a small company, the sweet spot is a business of 30-100 people building something worth building. You
could go to Google, Apple, Facebook, or some other giant company, but it will be hard to maneuver yourself to work
closely with the rock stars.

Smaller companies still have specialization, but usually without silos. And they have a different energy. The whole
company will be focused on working together to make one precious idea become reality. Anything unnecessary is shunned -
red tape and politics are typically nonexistent.

Being in that lifeboat with people you deeply respect is a joy. It is the best time you can have at work. It might be
the best time you can have.

## 1.4 Don't (only) look down

IC - individual contributor - a person who doesn't manage others. As an IC, you need to occasionally do 2 things:

- look up - look beyond the next deadline or project, bne sure the mission still makes sense to you and that the path to
  reach it seems achievable
- look around - get out of your comfort zone and away from the immediate team you are on,talk to the other functions in
  your company to understand their perspectives, needs, and concerns

Don't think doing the work just means locking yourself in a room - a huge part of it is walking with your team. The work
is reaching your destination together. Or finding a new destination and bringing your team with you.

## 2.1 Just managing

6 things you should know before becoming a manager:

- You don't have to be a manager to be successful - many people wrongly assume that the only path to more money and
  stature is managing a team. There are alternatives that will enable you to get a similar paycheck.
- Remember that once you become a manager, you will stop doing the thing that made you successful in the first place -
  your job will be communication, communication, communication, recruiting, hiring, firing, setting budgets, reviews,
  one-to-one meetings, setting goals, keeping people on track, resolving conflicts, mentoring, ...
- Becoming a manager is a discipline - management is a learned skill, not a talent.
- Being exacting and expecting great work is not micromanagement - your job is to make sure the team produces
  high-quality work, it only turns into micromanagement when you dictate the step-by-step process.
- Honesty is more important than style - you can be successful with any style as long as you never shy away from
  respectfully telling people the uncomfortable, hard truth needs to be said.
- Don't worry that your team will outshine you - in fact, it's your goal, you should always be training someone on your
  team to do your job, the better they are, the easier it is for you to move up and even start managing managers

When you are a manager, you are no longer just responsible for the work. You are responsible for human beings.

A star individual contributor is incredibly valuable. Valuable enough that many companies will pay them just as much as
they'd pay a manager. A truly great IC will be a leader in their chosen function and also become an informal cultural
leader, someone who people across the company will seek out for advice and mentorship.

Examining the product in detail and caring deeply about the quality of what your team is producing is not
micromanagement. That's exactly what you should be doing. Steve Jobs was bringing out a jeweler's loupe and looking at
individual pixels on a screen to make sure the user interface graphics were properly drawn.

As a manager, you should be focused on making sure the team is producing the best possible product.

It is very easy to turn 1:1s into a friendly chats that go nowhere, so clear meeting agenda can be beneficial.

If you are a manager - congrats, you're now a parent. Not because you should treat your employees like children, but
because it's now your responsibility to help them work through failure and find success.

## 2.2 Data versus opinion

Data driven decisions - you can acquire, study, and debate facts - relatively easy to make.

Opinion-driven - follow your gut and your vision - always hard and always questioned.

Make decisions, not everyone has to agree - it happens when one person has to make the final call. This isn't a
democracy, nor dictatorship - you can't give orders without explaining yourself.

Storytelling is how you get people to take a leap of faith to do something new. Creating a believable narrative that
everyone can latch on to is critical to moving forward and making hard choices. It's all that marketing comes down to.

You are selling - vision, guy, opinion.

> It's not data or intuition, it's data and intuition.

## 2.3 Assholes

Up to 12 percent of corporate senior leadership exhibit psychopathic traits. There are different assholes:

- Political assholes - people who master the art of corporate politics, but then do nothing but take credit for everyone
  else's work. These assholes usually build a coalition of budding assholes around them
- Controlling assholes - micromanagers who systematically strangle the creativity and juy out of their team. They never
  give people credit for their work, never praise it, and often steal it.
- Asshole assholes - they suck at work and everything else, mean jealous, insecure jerks. They cannot deliver, are
  deeply unproductive, so they do everything possible to deflect attention away from themselves. They are generally out
  of door pretty quickly.
- Mission-driven "assholes" - crazy passionate - they are neither easygoing nor easy to work with. Unlike true assholes,
  they care.

Pushing for greatness doesn't make you an asshole. Not tolerating mediocrity doesn't make you na asshole. You need to
understand their motivations.

Controlling assholes won't listen. They will never admit they screwed up.

Things you can do when faced with a controlling asshole:

- kill'em with kindness
- ignore them
- try to get around them
- quit

Most people aren't assholes. And even if they are, they are also human. So don't walk into a job trying to get anyone
fired. Start with kindness. Try to make peace. Assume the best.

## 2.4 I quit

Sometimes you need to quit. Here is how you know:

- You are no longer passionate about the mission - every hour at your desk feels like an eternity
- You have tried everything - the company is letting you down

Once you do decide to quit, make sure you leave in the right way - try to finish as much as possible, find natural
breakpoint in your project.

Hating your job is never worth whatever raise, title, or perks they throw at you to stay.

The threat of leaving may be enough to push your company to get serious and make whatever change you are asking for. But
it might not. Quitting should not be a negotiating tactic - it should be the very last card you play.

Good things take time, big times take longer. If you flit from project to project, company to company, you will never
have the vital experience of starting and finishing something meaningful.

## 3.1 Make the intangible tangible

Don't just make a prototype of your product and think you're done. Prototype as much of the full customer experience as
possible.

Your product isn't only your product. It's the whole user experience. The customer journey and touchpoints:

- awareness (PR, search, social media, ads)
- education (website, email, blog, trial/demo)
- acquisition (partners, payment model)
- product (design, UX, performance)
- onboarding (quick guide, account creation, tips, how-to videos)
- usage (reliability, usability, updates, lifespan)
- support (troubleshooting, knowledge base, call center, community)
- loyalty (new product, newsletter, promotions, ratings/reviews)

## 3.2 Why storytelling

Every product should have a story, a narrative that explains why it needs to exist and how will it solve your customer's
problems. A good product story:

- it appeals to people's rational and emotional sides
- it takes complicated concepts and makes them simple
- it reminds people of the problem that's being solved - it focuses on the why

The story of your product, your company, and your vision should drive everything you do.

Virus of a doubt: "it is a way to get into people's heads, remind them about a daily frustration, get them annoyed about
it all over again. You get them angry about how it works now so they can get excited abut a new way of doing things."

Product's story is its design, features, images, videos, quotes from customers, tips from reviewers. The sum of what
people see and feel about this thing that you have created.

Why does this thing need to exist? Why does it matter? Why will people need it? Why will they love it? The longer you
work on something, the more the "what" takes over the "why". When you get wrapped in the "what", you get ahead of
people. You think everyone can see what you see. But they don't.

Earn their trust by showing that you really know your stuff or understand their needs. Of offer them something useful,
connect with them in a new way, so they feel assured that they're making the right choice with your company.

Appeal to their emotions, connect with something they care about. Their worries, their fears. Every person is different,
and everyone will read your story differently.

Analogies can be a useful tool in storytelling. They create a shorthand for complicated concepts.

## 3.3 Evolution versus disruption versus execution

Evolution - a small, incremental step to make something better

Disruption - a fork on the evolutionary tree - something fundamentally new that changes the status quo, usually by
taking a novel or revolutionary approach to an old problem

Execution - doing what you have promised to do and doing it well

Your version one product should be disruptive, not evolutionary. But disruption alone will not guarantee success.
Continue to evolve, but always seek out new ways to disrupt yourself.

Disruption should be important for you personally. If you've truly made something disruptive, your competition probably
won't be able to replicate it quickly.

Just don't overshoot. Don't try to disrupt everything at once.

As your disruptive product, process, or business model begins to gain steam with customers, your competitors will start
to get worried. They'll start paying attention, they will get pissed. When companies get angry they undercut your
pricing, try to embarrass you with marketing, use negative press, put in new agreements with sales to lock you out of
the business.

And they might sue you. If they can't innovate, they litigate. The good news is that a lawsuit means you've officially
arrived (you are a real threat, and they know it).

Disruptions - extremely delicate balancing act:

- you focus on making one amazing thing but forget that it has to be part of a single, fluid experience
- beautiful execution on everything else but the one thing that would have differentiated your product withers away
- you change too many things too fast and regular people can't recognize or understand what you have made, you can't
  push people too far outside their mental model, not at first

Challenge yourself, over-deliver, create excellent solutions.

If you do it right, one disruption will fuel the next. One revolution will domino another.

## 3.4 Your first adventure - and your second

When releasing V1 you have the following tools to make decisions: Vision, Customer insights, Vision. Once you start
iterating on an existing product, you will have experience and data, so you can use your existing tools but in different
order: Data, Customer insights, Vision.

Locking yourself alone in a room to create a manifesto of your single, luminous vision looks and feels indistinguishable
from completely loosing your mind. Get at least one person - but preferably a small group - to bounce ideas off of.
Sketch your ideas together, then fulfill it together.


================================================
FILE: books/clean-agile.md
================================================
[go back](https://github.com/pkardas/learning)

# Clean Agile: Back to Basics

Book by Robert Cecil Martin

- [Chapter 1: Introduction to Agile](#chapter-1-introduction-to-agile)
- [Chapter 2: The Reasons For Agile](#chapter-2-the-reasons-for-agile)
- [Chapter 3: Business Practices](#chapter-3-business-practices)
- [Chapter 4: Team Practices](#chapter-4-team-practices)
- [Chapter 5: Technical Practices](#chapter-5-technical-practices)
- [Chapter 6: Becoming Agile](#chapter-6-becoming-agile)
- [Chapter 7: Craftsmanship](#chapter-7-craftsmanship)
- [Chapter 8: Conclusion](#chapter-8-conclusion)
- [Afterword](#afterword)

## Chapter 1: Introduction to Agile

The Agile Manifesto was written in February 2001 in Utah by 17 software experts. Once a movement become popular, the
name of that movement got blurred through misunderstanding and usurpation.

When did Agile begin? More than 50 000 years ago when humans first decided to collaborate on a common goal. The idea of
choosing small intermediate goals and measuring the progress after each is too intuitive, and too human, to be
considered any kind of revolution.

Agile was not the only game in town:

- Scientific Management - top-down, command-and-control approach. Big up-front planning followed by careful detailed
  implementation. Worked best for projects that suffered a high cost of change and solved very well-defined problems
  with extremely specific goals.
- Waterfall - logical descendant of Scientific Management. Even though it was not what the author was recommending, it
  was the concept people took away from his paper. And it dominated the next 3 decades. It dominated but it didn't work.

How could thoroughly analyzing the problem, carefully designing a solution, and then implementing that design fail so
spectacularly over and over again.

The beginnings of the Agile reformation began in the late 1980s. In 1995 a famous paper on Scrum was written.

The Preamble of the Agile Manifesto:

> We are uncovering better ways of developing software by doing it and helping others do it.

The Agile Manifesto:

> **Individuals and interactions** over processes and tools.

> **Working software** over comprehensive documentation.

> **Customer collaboration** over contract negotiation.

> **Responding to change** over following a plan.

The Iron Cross of project management: good, fast, cheap, done - pick any three you like, you will not have the fourth.

A good manager drives a project to be good enough, fast enough, cheap enough and done as much as necessary. This is kind
of management that agile strives to enable.

Agile is a _framework_ that helps developers and managers execute this kind of pragmatic project management. However,
such management is not done automatic. It is entirely possible to work within Agile framework and still completely
mismanage the project and drive it to failure.

Agile provides data. An Agile development team produces just the kinds of data that managers need in order to make good
decisions:

- Velocity - how much the development team has gotten done every week.
- Burn-down chart - shows how many points remain until the next major milestone. Has a slope that predicts when the
  milestone will probably be reached.

This data managers need to decide how to set the coefficients on the Iron Cross and drive the project to the best
possible outcome.

Agile development is first and foremost a feedback-driven approach. Each, week, each day, each hour, and even each
minute is driven by looking at the results of the previous week, day, hour and minute, and then making the appropriate
adjustments.

The Date (deadline) is usually fixed and is not going to change because some developers think they may not be able to
make it. At the same time, the requirements are wildly in flux and can never be frozen. This is because the customers
don't really know what they want. So the requirements are constantly being re-evaluated and re-thought.

The Waterfall model promised to give us a way to get our arms around this problem:

- The Analysis Phase - no real consensus on just what analysis is, the best definition: "it is what analyst do".
- The Design Phase - is where you split the project up into modules and design interfaces between those modules.
- The Implementation Phase - there is no way to successfully pretend it is done, meanwhile, the requirements are still
  coming.
- The Death March Phase - customers are angry, stakeholders are angry, the pressure mounts, people quit. Hell.

It can be called - Runway Process Inflation - we are going to do the thing that did not work, and do it a lot more of
it.

Of course Waterfall was not an absolute disaster. It did not crush every software project into rubble. But it was, and
remains, a disastrous way to run a software project.

The Waterfall just makes so much sense. First, we analyze the problem, then we design the solution, and then we
implement the design. Simple. Direct. Obvious. And wrong.

An Agile project begins with analysis, but it is an analysis that never ends. Time before deadline is divided into
regular increments called _iterations_ or _sprints_. The size of an iteration is usually one or two weeks.

The first iteration (Iteration Zero). is used to generate a short list of features (stories). Iteration Zero is used to
set up development environment, estimate the stories and lay out the initial plan. This process of writing stories,
estimating them, planning them and designing never stops. Every iteration will have some analysis and design and
implementation in it.

In Agile project, we are always analyzing and estimating.

Software is not a reliably estimable process. We programmers simply do not know how long things will take. There is no
way to know how complicated a task is going to be until that task is engaged and finished.

After a couple of iterations we get insight how much time will be needed basing on past iterations. This number averages
at a relatively stable velocity. After four or five iterations, we will have a much better idea when this project will
be done.

We practice Agile in order to destroy hope before that hope can kill the project. Hope is the project killer. Hope is
what makes a software team mislead managers abut their true progress. Hope is a very bad way to manage a software
project. And Agile is a way to provide an early and continuous dose of cold, hard reality as a replacement for hope.

Some folks think that Agile is about going fast. It is not. Agile is about knowing, as early as possible, just how
screwed we are. The reason we want to know this as early as possible is so that we can manage the situation. Managers
manage software projects by gathering data and then making the best decisions they can base on that data.

Managers do this by making changes to the scope, the schedule, the staff, and the quality:

- Changing the Schedule - ask stakeholders if we can delay the project. Do this as early as possible.
- Adding Staff - in general, business is simply not willing to change the schedule. When new staff is added,
  productivity plummets for a few weeks as the new people suck the life out of the old people. Then, hopefully, the new
  people start to get smart enough to actually contribute. Of course, you need enough time, and enough improvement, to
  make up for the initial loss.
- Decrease Quality - everyone knows that you can go much faster by producing crap. WRONG. There is no such thing as
  quick and dirty. Anything dirty is slow. **The only way to go fast, is to go well**. If we want to shorten our
  schedule, the only option is to _increase_ quality.
- Changing Scope - if the organization is rational, then the stakeholders eventually bow their heads in acceptance and
  begin to scrutinize the plan.

Inevitably the stakeholders will find a feature that we have already implemented and then say "It is a real shame you
did that one, we sure do not need it". At the beginning of each iteration, ask the stakeholders which features to
implement first.

20 000 foot view of Agile:

> Agile is a process wherein a project is subdivided into iterations. The output of each iteration is measured and used
> to continuously evaluate the schedule. Features are implemented in the order of business value so that the most
> valuable things are implemented first. Quality is kept as high as possible. The schedule is primarily managed by
> manipulating scope.

## Chapter 2: The Reasons For Agile

Agile is important because of professionalism and the reasonable expectations from our customers.

- Professionalism - nowadays the cost of software failure is high, therefore we need to increase our professionalism. We
  are surrounded by computers, and they all need to be programmed - they all need software. Nowadays, virtually nothing
  of significance can be done without interacting with a software system. Now our actions are putting lives and fortunes
  at stake.
- Reasonable Expectations - meeting expectations is one of primary goals of Agile development.
    - we will not ship sh*t - Agile's emphasis on Testing, Refactoring, Simple Design and customer feedback is the
      obvious remedy for shipping bad code.
    - continuous technical readiness - system should be technically (solid enough to be deployed) deployable at the end
      of every iteration.
    - stable productivity - big redesigns are horrifically expensive and seldom are deployed. Developers instead, should
      continuously keep the architecture, design and code as clean as possible, this allows to keep their productivity
      high and prevent the otherwise inevitable spiral into low productivity and redesign.
    - inexpensive adaptability - software - soft (easy to change), ware (product). Software was invented because we
      wanted a way to quickly and easily change the behavior of our machines. Developers should celebrate change because
      that is why we are here. Changing requirements is the name of the whole game. Our jobs depend on our ability to
      accept and engineer changing requirements and to make those changes relatively inexpensive. If a change to the
      requirements breaks your architecture, then your architecture sucks.
    - continuous improvement - the older a software system is, the better it should be. Unfortunately it seldom happens.
      We make things worse with time. The Agile practices of Pairing, TDD, Refactoring, and Simple Design strongly
      support this expectation.
    - fearless competence - people are afraid of changing bad code, you can break it, and if it breaks it will become
      yours. This fear forces you to behave incompetently. Customers, users, and managers expect _fearless competence_.
      They expect that if you see something wrong or dirty, you will fix it and clean it. They don't expect you to allow
      problems to fester and grow - they expect you to stay on top of the code, keeping it as clean and clear as
      possible. How to eliminate that fear? Use TDD.
    - qa should find nothing - the Agile practices support this expectation.
    - test automation - manual tests are always eventually lost. Manual tests are expensive and so are always a target
      for reduction. Besides, asking humans to do what machines can do is expensive, inefficient, and immoral. Every
      test that can be feasibly automated must be automated. Manual testing should be limited to those things that
      cannot be automatically validated and to the creative discipline of Exploratory Testing.
    - we cover for each other - each individual member of a software team makes sure that there is someone who can for
      him if he goes down. It is your responsibility to make sure that one or more of your teammates can cover for you.
    - honest estimates - you should provide estimates based on what you do and do not know. You can estimate in relative
      terms (task B should take half of the time spent on task A), you can also estimate using ranges.
    - you need to say "no" - when answer for something is "no", then the answer is really "no". For example if solution
      for a problem can not be found.
    - continuous aggressive learning - our industry changes quickly. We must be able to change with it. So learn, learn,
      learn! Learn with or without company's help.
    - mentoring - the best way to learn is to teach. So when new people join the team, teach them. Learn is to teach
      other.

Customer Bill of Rights:

- You have the right to an overall plan and to know what can be accomplished when and at what cost.
    - We cannot agree to deliver fixed scopes on gard dates. Either the scopes or the dates must be soft.
- You have the right to get the most possible value out of every iteration.
    - The business has the right to expect that developers will work on the most important things at any given time, and
      that each iteration will provide them the maximum possible usable business value.
- You have the right to see progress in a running system, proven to work by passing repeatable tests that you specify.
- You have the right to change your mind, to substitute functionality, and to change priorities without paying
  exorbitant costs.
- You have the right to be informed of schedule and estimate changes, in time to choose how to reduce the scope to meet
  a required date. You can cancel at any time and be left with a useful working system reflecting investment to date.

Developer Bill of Rights:

- You have the right to know what is needed with clear declarations of priority.
    - Developers are entitled to precision in requirements and in the importance of those requirements. This right
      applies within the context of an iteration. Outside an iteration, requirements and priorities will shift and
      change.
- You have the right to produce hugh-quality work at all times.
    - The business has no right to tell developers to cut corners or do low quality work. Or, to say this differently,
      the business has no right to force developers to ruin their professional reputations or violate their professional
      ethics.
- You have the right to ask for and receive help from peers, managers, and customers.
    - This statement gives programmers the right to communicate.
- You have the right to make and update your estimates.
    - You can change your estimate when new factors come to light. Estimates are guesses that get better with time.
      Estimates are never commitments.
- You have the right to accept your responsibilities instead of having them assigned to you.
    - Professionals accept work, they are not assigned work. A professional developer has every right to say "no" to a
      particular job or task. It may be that the developer does not feel confident in their ability to complete the
      task, or it may be that the developer believes the task better suited for someone else. Or, it may be that the
      developer rejects the task for personal or moral reasons. Acceptance implies responsibility.

> Agile is a set of rights, expectations, and disciplines of the kind that form the basis of an ethical profession.

## Chapter 3: Business Practices

If you would like an accurate and precise estimate of a project, then break it down into individual lines of codes. The
time it takes you to do this will give you a very accurate and precise measure of how long it took you to build the
project.

Trivariate Analysis - such estimates are composed of three numbers: best-case, nominal-case, and worst-case. These
numbers are confidence numbers. The worst-case number is the amount of time which you feel 95% confident that the task
will be completed. The nominal-case has only 50% confidence, and the best case only 5%.

Stories and Points - a user story is an abbreviated description of a feature of the system, told from the point of view
of a user. We want to delay the specification of those details as long as possible, right up to the point where the
story is developed.

Story points are a unit of estimated effort, not real time. They are not even estimated time - they are estimated
effort. Velocity is not a commitment. The team is not making a promise to get 30 points done during the iteration. They
aren't even making the promise to try get 30 points done. This is nothing more than their best guess as to how many
points will be complete by the end of the iteration.

The Four-Quadrant Game (The Highest Return of Investment) - the stories that are valuable but cheap will be done right
away. Those that are valuable but expensive will be done later. Those that are neither valuable nor expensive might get
done one day. Those that are not valuable but are expensive will never be done.

Yesterday's weather - the best predictor of today's weather is yesterday's weather. The best predictor of the progress
of an iteration is the previous iteration.

The project is over when there are no more stories in the deck worth implementing.

User stories are simple statements that we use as reminders of features. We try not to record too much detail when we
write the story because we know that those details will likely change. Stories follow a simple set of guidelines that we
remember with the acronym INVEST:

- I - Independent - they do not need to be implemented in any particular order. This is a soft requirement because there
  may be stories that depend on other stories. Still, we try to separate the stories so that there is little dependence.
- N - Negotiable - we want details to be negotiable between the developers and the business.
- V - Valuable - the story must have clear and quantifiable value to the business. Refactoring/Architecture/Code cleanup
  is never a story. A story is always something that the business values.
- E - Estimable - must be concrete enough to allow the developers to estimate it.
- S - Small - a user story should be larger than one or two developers can implement in a single iteration.
- T - Testable - the business should be able to articulate tests that will prove that the story has been completed.

There are number of schemes for estimating stories:

- Flying Fingers
- Planning Poker

A spike is a meta-story, or a story for estimating a story. It is a spike because it often requires us to develop a long
but very thin slice through all the layers of the system. For example, there is a story you cannot estimate: Print PDF -
you have never used the PDF library. So you write a new story called Estimate Print PDF - now you estimate that story,
which is easier to estimate.

The goal of each iteration is to produce data by getting stories done. The team should focus on stories rather than
tasks within stories. It is far better to get 80% of the stories done than it is to get each story 80% done. Focus on
driving the stories to completion.

A story cannot be completed without the acceptance tests. If QA continues to miss the midpoint deadline, one iteration
after another, then the ratio of QA engineers to developers is likely wrong. After the midpoint, if all the acceptance
tests are done, QA should be working on the tests for the next iteration.

The definition of done is this: acceptance tests pass.

If we see a positive slope in velocity, it likely does not mean that the team is actually going faster. Rather, it
probably means that the project manager is putting pressure on the team to go faster. As that pressure builds, the team
will unconsciously shift the value of their estimates to make it appear that they are going faster. This is simple
inflation. The points are a currency, and the team is devaluing them under external pressure. The lesson is that
velocity is a measurement not an objective. Don't put pressure on the thing you are measuring.

Estimate is not a promise, and the team has not failed if the actual velocity is lower.

The practice of Small Releases suggest that a development team should release their software as often as possible. The
new goal, is Continuous Delivery - the practice of releasing the code to production after every change.

Acceptance Tests - Requirements should be specified by the business.

BDD - Behavior-Driven Development - the goal is to remove the techie jargon from the tests and make the tests appear
more like specifications that businesspeople would appreciate. At first, this was just another attempt at formalizing
the language of testing, in this case using 3 special adverbs: Given, When, and Then.

## Chapter 4: Team Practices

A metaphor can provide a vocabulary that allows the team to communicate effectively. On the other hand, some metaphors
are silly to the point of being offensive to the customer.

DDD solved the metaphor problem. Eric Evans coined the term _Ubiquitous Language_. What the team needs is a model of the
problem domain, which is described by a vocabulary that everyone (the programmers, QA, managers, customers, users)
agrees on.

The Ubiquitous Language is used in all parts of the project. It is a thread of consistency that interconnects the entire
project during every phase of its lifecycle.

A software project is not a marathon, not a sprint, nor a sequence of sprints. In order to win, you must pace yourself.
If you leap out of the blocks and run full speed, you will run out of energy long before you cross the finish line.

You must run at a Sustainable Pace. If you try to run faster than the pace you can sustain, you will have to slow down
and rest before you reach the finish line. Managers may ask you to run faster than you should. You must not comply. It
is your job to husband your resources to ensure that you endure the end.

> Working overtime is not a way to show your dedication to your employer. What it shows is that you are a bad planner,
> that you agree to deadlines to which you shouldn't agree, that you make promises you shouldn't make, that you are a
> manipulable laborer and not a professional. This is not to say that all overtime is bad, nor that you should never
> work overtime. There are extenuating circumstances for which the only option is to work overtime. But they should be
> extremely rare. And you must be aware that the cost of that overtime will likely be greater than the time you save on
> the schedule.

The most precious ingredient in the life of a programmer is sufficient sleep. Make sure you know how many hours of sleep
your body needs, and then prioritize those hours. Those hours will more than pay themselves back.

No one owns the code in an Agile project. The code is owned by the team as a whole. Any member of the team can check and
improve any module in the project at any time. The team owns the code collectively. Collective Ownership does not mean
that you cannot specialize. However, even as you specialize, you must also generalize. Divide your work between your
specialty and other areas of the code. Maintain your ability to work outside your specialty.

The continuous build should never break.

Standup Meeting:

- This meeting is optional. Many teams get by just fine without one.
- It can be less often than daily. Pick the schedule that makes sense to you.
- It should take ~10 minutes, even for large teams.
- This meeting follows a simple formula.

The basic idea is that the team members stand in a circle and answer 3 questions:

1. What did I do since the last meeting?
2. What will I do until the next meeting.
3. What is in my way?
4. [Optional] Whom do you want to thank?

No discussion. No Posturing. No deep explanations. No complaints. Everybody gets 30 seconds to answer those 3 questions.

## Chapter 5: Technical Practices

Without TDD, Refactoring, Simple Design and Pari Programming, Agile becomes an ineffective flaccid shell of what it was
intended to be.

TEST-DRIVEN DEVELOPMENT. Every required behavior should be entered twice: once as a test, and then again as production
code that makes the test pass.

The 3 rules of TDD:

1. Do not write any production code until you have first written a test that fails due to the lack of that code.
2. Do not write more of a test that is sufficient to fail - and failing to compile counts as a failure.
3. Do not write more production code that is sufficient to pass the currently failing test.

The tests are a form of documentation that describe the system being tested. This documentation is written in a language
that the programmers know fluently. It is utterly unambiguous, it is so formal it executes, and it cannot get out of
sync with the application code. The test are the perfect kind of documentation for programmers: code.

Remember that function that is hard to test after the fact? The function is hard to test because you did not design it
to be easy to test. You wrote the code first, and you are now writing the tests as and afterthought. By writing the
tests first, you will decouple the system in ways that you had never thought about before. The whole system will be
testable, therefore, the whole system will be decoupled.

REFACTORING. Refactoring is the practice of improving the structure of the code without altering the behavior, as
defined by tests. In other words, we make changes to the names, the classes, the functions and the expressions without
breaking any of the tests.

Red/Green/Refactor:

1. We create a test that fails.
2. Then we make the test pass.
3. Then we clean up the code.
4. Return to step 1.

The word Refactoring should never appear on a schedule. Refactoring is not the kind of activity that appears on a plan.
We do not reserve time for refactoring. Refactoring is simply part of our minute-by-minute, hour-by-hour approach to
writing software.

Sometimes the requirements change is such a way that you realize the current design and architecture of the system is
suboptimal, and you need to make a significant change to the structure of the system. Such changes are made within the
Red/Green/Refactor cycle. We do not create a project specifically to change the design. We do not reserve time in the
schedule for such large refactorings. Instead, we migrate the code one small step at a time, while continuing to add new
features during normal Agile cycle.

SIMPLE DESIGN. The practice of Simple Design is one of the goals of Refactoring. Simple Design is the practice of
writing only the code that is required with a structure that keeps it simplest, smallest, and the most expressive.

Rules of Simple Design:

1. Pass all the tests.
2. Reveal the intent - It should be easy to read and self-descriptive. This is where we apply many of the simpled and
   more cosmetic refactorings. We also split large functions into smaller, better-named functions.
3. Remove duplication.
4. Decrease elements - Once we have removed all the duplication, we should strive to decrease the number of structural
   elements, such as classes, functions, variables.

The more complex the design, the greater the cognitive load placed on the programmers. That cognitive load is Design
Weight. The greater the weight of that design, the more time and effort are required for the programmers to understand
and manipulate the system.

PAIR PROGRAMMING. Pairing is the act of two people working together on a single programming problem. Any configuration
is fine (the same workspace, sharing the screen, keyboard, ping-pong, ...). We pair so that we behave like a team. When
a member of a team goes down, the other team members cover the hole left by that member and keep making progress towards
the goal. **Pairing is the best way, by far, to share knowledge between team members and prevent knowledge silos from
forming. It is the best way to make sure that nobody on the team is indispensable.**

The word "pair" implies that there are just 2 programmers involved in a pairing session. While this is typically true,
it is not a rule.

Generally, managers are pleased to see programmers collaborating and working together. It creates the impression that
work is being done.

**Never, ever, ever, ask for permission to pair. Or test. Or refactor. Or... You are the expert. You decide.**

## Chapter 6: Becoming Agile

Agile Values:

1. Courage - It is reckless to conform to a schedule by sacrificing quality. The belief that quality and discipline
   increase speed is a courageous belief because will constantly be challenged by powerful but naive folks who are in a
   hurry.
2. Communication - A team that sits together and communicates frequently can work miracles. We value direct and frequent
   communication that crosses channels. Face-to-face, informal, interpersonal conversations.
3. Feedback - Maximize the frequency and quantity of feedback. They allow us to determine when things are going wrong
   early enough to correct them. They provide massive education about the consequences of earlier decisions.
4. Simplicity - Numbers of problems should be reduced to minimum. Therefore, indirection can be kept to a minimum.
   Solutions can be simple. This applies to the software, but it also applies to the team. Passive aggression is
   indirection. Keep the code simple. Keep the team simpler.

These values are diametrically opposed to the values of large organisations who have invested heavily in
middle-management structures that value safety, consistency, command-and-control, and plan execution.

It is not really possible to transform such an organisation to Agile.

Agile coaches are members of the team whose role is to defend the process within the team. In the heat of development,
developers may be tempted to go off process. Perhaps they inadvertently stop pairing, stop refactoring, or ignore
failures in the continuous build. The coach acts as the team's conscience, always reminding the team of the promises
they made to themselves and the values they agreed to hold. This role typically rotates from one team member to the next
on an informal schedule and based on need. A mature team working steadily along does not require a coach. On the other
hand, a team under some kind of stress (schedule, business or interpersonal) may decide to ask someone to fill the role
temporarily.

Every member of an Agile team needs to understand the values and techniques of Agile. Therefore, if one member of the
team is trained, all members of the team should be trained.

Agile is for small- to medium-sized teams. period. It works well for such teams. Agile was never intended for large
teams. The problem of large teams is a problem societies and civilizations. And large teams are a solved problems.

Agile was invented because we did not know how to effectively organize a relatively small group of programmers to be
effective. Software development needed its own process because software is really like nothing else.

The answer to the question of Agile in the large is simply to organize your developers into small Agile teams, then use
standard management and operations research techniques to manage those teams.

Great tools do the following:

- Help people accomplish their objectives
- Can be learned "well enough" quickly
- Become transparent to users
- Allow adaptation and exaptation
- Are affordable

Git is an example of a great tool.

Your team should establish the pattern of work compatible with their specific context first, and then consider using
tools that support their workflow. Workers use and control tools, tools don't control and use people. You don't want to
get locked into other people's process flows.

ALM - Agile Lifecycle Management systems despite being feature rich and commercially successful, ALM tools utterly fail
at being great.:

- ALMs tend to be complicated, usually demanding up-front training.
- These tools often require constant attention.
- ALM tools aren't always easily adapted.
- ALM tools can be expensive.
- ALM does rarely work the way your team does, and often their default mode is at odds with Agile methods. For example
  many ALM tools assume that team members have individual work assignments, which makes them nearly unusable for teams
  who work together in a cross-functional way.

You can try different forms of Agile practices and check which one is the most relevant to your team's needs:

- Kanban - making the work visible, limiting work in progress and pulling work through the system.
- Scrum and XP - short daily meetings, a product owner, a process facilitator (Scrum Master), retrospectives, a
  cross-functional team, user stories, small releases, refactoring, writing tests first, and pair programming.
- Align team events - when the team events across multiple teams (standups, retrospectives) are aligned in time, it is
  possible to then roll up daily and systematic impediments via an escalation tree.
- Escalation trees - if it makes sense to always work on items that produce the highest value, then it makes sense to
  escalate impediments immediately via a well-defined escalation path.
- Regular interteam interaction - regular interaction between the Scrum Masters, Product Owners and team members who are
  working together toward a common deliverable.
- Portfolio Kanban - sets work in progress limits at the initiative level in order to ensure that the organization is
  focused on the highest-value work at all times.
- Minimum Viable Increments - what is the shortest path to producing the highest value in the shortest time. A growing
  number of organizations are taking this to extreme by implementing Continuous Delivery - releasing small updates on a
  frequent basis, sometimes as frequently as multiple times per day.

Enablers of multiteam coordination:

- SOLID - especially useful for simplifying multiteam coordination by dramatically reducing dependencies.
- Small, valuable user stories - limit the scope of dependencies, which simplifies multiteam coordination.
- Small, frequent releases - whether these releases are delivered to the customer or not, the practice of having a
  releasable product across all the teams involved helps to surface coordination and architectural issues so that the
  root cause can be found and addressed.
- Continuous Integration - calling for integration across the entire product after every checkin.
- Simple Design - one of the hardest practices to learn and apply because it is one of the most counter-intuitive
  practices. When coordinating the work of massive dependencies between teams, monolithic, centralized, preplanned
  architectures create massive dependencies between teams that tend to force them to work in lock step, thus defeating
  much of the promise of Agile. Simple Design, especially when used with practices such as a microservices architecture,
  enables Agility in large.

## Chapter 7: Craftsmanship

Many companies misunderstood Agile. Managers are willing to push developers to work faster and are using the full
transparency of the process to micromanage them. Developers are pushed hard to fit their estimates into the imposed
milestones. Failing to deliver all story points in a sprint means developer must work harder in the next sprint to make
up the delay. If the product owner thinks developers are spending too much time on things like automated tests,
refactoring, or pairing they simply tell them to stop doing it.

Strategic technical work has no place in _their_ Agile process. There is no need for architecture or design. The order
is to simply focus on the highest-priority item in the backlog and get it done as fast as possible. This approach
results in a long sequence of iterative tactical work and accumulation of technical debt. Bugs are accumulating,
delivery time goes up, people start to blame one another.

> Companies are still not mature enough to understand that technical problems are in fact business problems.

A group of developers met in November 2008 in Chicago to create a new movement: Software Craftsmanship.

Manifesto:

As aspiring Software Craftsmen, we are raising the bar or professional software development by practicing it and helping
others learn the craft. Through this work we have come to value:

- Not only working software, but also well-crafted software.
- Not only responding to change, but also steadily adding value.
- Not only individuals and interactions, but also a community of professionals.
- Not only customer collaboration, but also productive partnership.

The Software Craftsmanship manifesto describes an ideology, a mindset. It promotes professionalism through different
perspectives.

**Well-crafted software** - code that is well-designed and well tested. It is code that we are not scared to change and
code that enables business to react fast. It is code that is both flexible and robust.

**Steadily adding value** - no matter what we do, we should always be committed to continuously provide increasing value
to our clients and customers.

**A community of professionals** - we are expected to share and learn with each other, raising the bar of our industry.
We are responsible for preparing the next generation of developers.

**Productive partnership** - we will have a professional relationship with our clients and employers. We will always
behave ethically and respectfully, advising and working with our clients and employers in the best way possible. We will
expect a relationship of mutual respect and professionalism.

We will look at our work not as something we need to do as part of a job but as a professional service we provide. We
will take ownership of our own careers, investing our own time and money to get better at what we do. Craftspeople
strive to do the best job they can, not because someone is paying, but based on a desire to do things well.

Developers should not ask for authorization for writing tests. They should not have separate tasks for unit testing or
refactoring. These technical activities should be factored into the development of any feature. They are not optional.
Managers and developers should only discuss what is going to be delivered and when, not how. Every time developers
volunteer details of how they work, they are inviting managers to micromanage them. Developers should be able to clearly
describe how they work and the advantages of working that way to whomever is interested. What developers should not do
is to let other people decide how they work.

Conversations between developers and business should be about why, what and when - not how.

Craftsmanship promotes software development as a profession. A profession is part of who we are. A job is a thing that
we do but is not part of who we are. A profession is something we invest in. It is something we want to get better at.
We want to gain more skills and have a long-lasting and fulfilling career.

Combining Agile and Craftsmanship is the perfect way to achieve business agility.

## Chapter 8: Conclusion

This book covered basics of Agile.

## Afterword

Ask the developers in an "Agile organization" what Agile is, and you will likely get a very different answer than if you
ask anyone beyond the level of a software development manager.

Developers understand Agile to be a methodology for streamlining the development process and for making software
development more predictable, more practicable, and more manageable.

Many developers are blissfully unaware of management's use of the metrics provided by the implementation of Agile
practices and the data it produces.


================================================
FILE: books/clean-code.md
================================================
[go back](https://github.com/pkardas/learning)

# Clean Code: A Handbook of Agile Software Craftsmanship

Book by Robert Cecil Martin

- [Chapter 1: Clean Code](#chapter-1-clean-code)
- [Chapter 2: Meaningful names](#chapter-2-meaningful-names)
- [Chapter 3: Functions](#chapter-3-functions)
- [Chapter 4: Comments](#chapter-4-comments)
- [Chapter 5: Formatting](#chapter-5-formatting)
- [Chapter 6: Objects and Data Structures](#chapter-6-objects-and-data-structures)
- [Chapter 7: Error Handling](#chapter-7-error-handling)
- [Chapter 8: Boundaries](#chapter-8-boundaries)
- [Chapter 9: Unit Tests](#chapter-9-unit-tests)
- [Chapter 10: Classes](#chapter-10-classes)
- [Chapter 11: Systems](#chapter-11-systems)
- [Chapter 12: Emergence](#chapter-12-emergence)
- [Chapter 13: Concurrency](#chapter-13-concurrency)
- [Chapter 17: Smells and Heuristics](#chapter-17-smells-and-heuristics)

## Chapter 1: Clean Code

- ugly code is expensive
- take your time to write a good code
- bad code programmer's fault, not PO's, manager's or anyone's else
- bad code is like a building with broken windows - people see ugly building and stop caring
- code like a prose, code should look like you care
- make the language look like it was made of the problem
- code rot quickly

## Chapter 2: Meaningful names

Variable name should answer all the questions. It should tell why it exists. If a name requires a comment it does not
reveal its content. Names should be pronounceable. One letter variables are hard to `grep` in the code - should be ONLY
as local variables inside short methods. The length of a name should correspond to the size of its scope. Avoid
encodings.

> Difference between a smart programmer and a professional programmer is that professional programmer understands that
> **clarity is a king**.

Don't be funny 😔 People tend to forget jokes, so people will forget true meaning of a variable. Choose clarity over
entertainment. Do not use slang or culture-dependant names.

Pick one word per concept, e.g. `get` instead of `fetch`, `retrieve`, ...

## Chapter 3: Functions

Functions are the first line of organisation in any program. Functions should be small. No more than 2-3 indents.

> Functions should do one thing. They should do it well. They should do it only.

The reason we write functions is to decompose a larger concept. A function should not mix the levels of abstractions.

> You know you are working on clean code when each routine turns out to be pretty much what you expected.

Don't be afraid to make a name long. The more function arguments the worse - difficulties with testing.

Passing a boolean flag to a function is extremely ugly. Grouping arguments into objects seems like cheating, but it is
not.

Functions should have no side effects

*Command Query Separation* - functions should either do something or answer something, but not both.

Exceptions are preferred than error codes. Suggestion to extract exception handling to separate function.

*Don't repeat yourself* - duplication may be the root of all evil in software. Database norms formed to eliminate
duplication in data, OOP concentrates the code, etc.

> Writing software is like any other kind of writing. When you write a paper or article, you get your thoughts down
> first, then you massage it until it **reads well**.

> The art of programming is, and always has been, the art of language design.

## Chapter 4: Comments

Comments are usually bad, they mean you failed to express yourself in code. IMO: the Best comments are the ones that are
explaining why things were done in a particular way.

Don't put historical discussions or irrelevant details into the comments.

## Chapter 5: Formatting

Code formatting is important. Visual design of the code is important. Variable should be declared "in well-known for
everybody places". Functions should show natural flow -> top-down.

Another matter is alignment, e.g. of test cases in parametrised tests. However, variables declarations is an overkill.

However, a team should agree upon a single formatting style.

## Chapter 6: Objects and Data Structures

Hiding implementation is about abstractions.

The Law of Demeter - a module should not know about the innards of the objects it manipulates. Class *C* has a method *
f*, method *f* should call the methods of: *C*, object created by *f*, object passed as an argument to *f* or object
held in an instance variable of *C*.

Train wreck: `ctxt.getOptions().getScratchDir().getAbsolutePath()` - a bunch of couples train cars. Does it violate The
Law of Demeter? `ctxt` contains options, which contain a scratch directory, which has absolute path - a lot of
knowledge. However, in this case this law does nto apply because these are data structures with no behaviour. It would
be good to hide the structure of `ctxt`, e.g.: `ctxt.getScratchDirectoryOption().getAbsolutePath()`.

Data Transfer Objects - a class with public variables and no functions, e.g. for communicating with the database.

Objects - expose behaviour and hide data, data structures - expose data and have no significant behaviour.

## Chapter 7: Error Handling

Error handling is important, but if it obscures logic, it is wrong. Exceptions are preferred over return codes - return
codes can clutter the caller with unnecessary code.

`try` blocks are like transactions, `catch` has to leave the program into a consistent state.

Error messages need to be informative - mention the operation that failed and the type of failure.

It might be a good idea to wrap library's error with your own exceptions - this makes library easily replaceable.

## Chapter 8: Boundaries

How to keep boundaries of our system clean - e.g. when using external libraries:

- when working with collections, wrap them with object and provide only required functionalities.
- write learning tests - write tests to explore and understand API
- our code shouldn't know too many details about 3rd-party library
- use ADAPTER interface - converted from our perfect interface to the provided interface

## Chapter 9: Unit Tests

The Three Laws of TDD:

- You may not write production code until you have written a failing unit test
- You may not write more of unit code than is sufficient to fail, and not compiling is failing
- You may not write more production code than is sufficient to pass the currently failing test

Test code is just as important as production code. It is not second-class citizen. It must be kept as clean as
production code.

The Build-Operate-Check pattern - each test is split into three parts:

1. build up the test data
2. operate on test data
3. check that the operation yielded the expected results

Test code must be: simple, succinct, expressive, however it doesn't need to be as efficient as production code.

One test should test a single concept.

Clean tests follow 5 rules - FIRST:

- F - Fast - tests should be fast, they should run quickly, if they don't you won't want to run them frequently.
- I - Independent - Tests should not depend on each other, one test should not set up conditions for the next test
- R - Repeatable - Tests should be repeatable in any environment (office, home, train without network), if they are not
  you will have an excuse for why they fail
- S - Self-Validating - Tests should not have a boolean output, they should either fail or pass
- T - Timely - Tests need to be written in a timely fashion, should be written just before the production code

## Chapter 10: Classes

Classes should be small. The second rule is that they should be smaller than that. Naming is most probably the best way
of determining class size. If we cannot derive a concise name for a class, then it is likely too large.

The Single Responsibility Principle - a class or module should have one, and only one reason to change.

Cohesion - classes should have a small number of instance variables. Each of class' methods should manipulate one or
more of those variables.

Open-Closed Principle - class should be open for extensions but closed for modifications.

Dependency Inversion Principle - our classes should depend upon abstractions, not on concrete details.

## Chapter 11: Systems

It is a myth we can get the systems "right the first time". Instead, we should implement only today's stories, then
refactor and expand the system to implement new stories tomorrow. This is the essence of iterative and incremental
agility.

Use the simplest thing that can possibly work.

## Chapter 12: Emergence

According to Kent, a design is simple if it follows these rules:

- runs all tests - system needs to be testable - if this can not be achieved, system should not be released, all tests
  need to pass
- contains no duplication
- expresses the intent of the programmer - the clearer the code, the less time others will have to spend understanding
  it (small functions and classes, good names)
- minimises the number of classes and methods - the least important rule, above rules are more important, however
  overall goal should be to keep system small

Can set of practices replace experience? No. On the other hand, practices are a crystallised form of the many decades of
experience of many authors.

## Chapter 13: Concurrency

Concurrency is a decoupling strategy. It helps to decouple what gets done from when it gets done. In single-threaded
apps wheat and when are strongly coupled.

Concurrency Defence Principles:

- Single Responsibility Principle - concurrency-related code should be kept separately from other code
- limit the access to any data that may be shared
- a good way of avoiding shared data is to avoid sharing data in the first place
- use copy of data , collect results from multiple threads and merge results
- threads should be as independent as possible

Java supports thread-safe collections, e.g. ConcurrentHashMap, there are other classes to support advanced concurrency:
ReentrantLock - a lock that can be acquired and released, Semaphore - a classic lock with count, CountDownLatch - a lock
that waits for a number of events before releasing all threads waiting on it.

Couple of behaviours:

- Bound Resources - resources of a fixed size or number used in a concurrent environment, e.g. database connection
- Mutual Exclusion - only one thread can access shared data or a shared resource at a time
- Starvation - thread(s) prohibited from proceeding for an excessively long time or forever
- Deadlock - two or more threads waiting for each other to finish
- Livelock - threads in lockstep, each trying to do work but finding another "in the way", threads continue trying to
  make progress but are unable

Execution models:

- producer - consumer - one or more threads create some work and place it in a queue, one or more consumer threads
  acquire that work from queue and complete it
- readers - writers - writers wait until there is no readers before allowing the writer to perform an update, if there
  are continuous readers, writers will starve
- dining philosophers - a hungry philosopher needs 2 forks before accessing the food, after consumption releases forks
  and waits until he is hungry again. There are number of solutions to this problem.

`synchronised` keyword introduces a lock in Java. Locks are expensive so use them carefully, also such sections should
be small.

Graceful shutdown is hard to get correct. Think about it early and get it working early.

General tips:

- get your non-threaded code working first
- make threaded-based code pluggable (one thread, n threads, ...)
- run with more threads than processors

## Chapter 17: Smells and Heuristics

Comments:

- Metadata should not appear in the comment (author, modification date). Comments should be reserved for technical notes
  only.
- Do not write comments that will become obsolete.
- Do not paraphrase code.
- Be brief and correct.
- Instead of commenting-out code - delete it.

Environment:

- You should be able to check out system with one simple command.
- You should be able to run all unit tests with just one command.

Functions:

- Functions should have a small number of arguments, no argument is best. More than 3 arguments is very questionable and
  should be avoided.
- Output arguments are counterintuitive readers expect arguments to be inputs, not outputs. If function must change
  state of something, have it change the state of the object it is called on.
- Flag arguments should be avoided (boolean flags) - they loudly declare function is doing multiple things.
- Methods that are never called should be removed. Dead code is wasteful.

General:

- The ideal source files should contain one, and only one language (for example Java + JavaScript snippets + English
  comments).
- Function / Class should implement the behaviours that another programmer could reasonably expect.
- Check every boundary condition.
- No duplication, perhaps the most important rule. Duplicated code means a missed opportunity for abstraction. Codd
  Normals Forms are a strategy for eliminating duplication.
- It is important to create abstractions that separate higher level general concepts from lower level detailed concepts.
- High level concepts should be independent of low level derivatives.
- A well-defined interface does not offer very many functions to depend upon, so coupling is low. Good software
  engineers learn to limit what they expose at the interfaces of their classes and modules.
- Get rid of dead code - code that is never executed.
- Variables and functions should be defined close to where they are used.
- Use consistent naming.
- Keep source code organised and free of clutter.
- Things that don't depend upon each other should not be artificially coupled.
- Feature envy - the methods of a class should be interested in the variables and functions of the class they belong to,
  and not the variables and functions of other classes.
- Code should be expressive as possible.
- Code should be placed where a reader would naturally expect it to be (the principle of the least surprise).
- Think if function should be static or not.
- Variables should have meaningful names, also use intermediate variables when performing difficult calculations.
- Function names should say what they do, if you can't understand what function does by reading the call - change the
  name.
- Polymorphism is preferred over if / else or switch / case statements.
- Follow code standards.
- Replace magic numbers with named constants.
- Be precise, use appropriate data structures.
- Encapsulate conditions - boolean logic is hard to understand without having to see it in the context, extract the
  functions that explain the intent of the conditional.
- Avoid negative conditions - harder to understand.
- Functions should do one thing.
- Encapsulate boundary conditions.
- The statements within a function should all be written at the same level of abstraction.
- Keep configurable data at high levels.
- Law of Demeter - we don't want a single module to know much about its collaborators.

Names:

- Choose descriptive names. Names in software are 90% of what makes software readable.
- Choose names at the appropriate level of abstraction. Don't pick names that communicate implementation details.
- Use standard nomenclature where possible.
- Use unambiguous names.
- Names should describe side effects.

Tests:

- Use coverage tool.
- Don't skip trivial tests.
- Test boundary conditions.
- Tests should be fast.


================================================
FILE: books/coaching-agile-teams.md
================================================
[go back](https://github.com/pkardas/learning)

# Coaching Agile Teams

Book by Lyssa Adkins

- [1. Will I be a Good Coach?](#1-will-i-be-a-good-coach)

## 1. Will I be a Good Coach?

If teams are to have kinds of stellar experiences, leverage agile to teh full competitive advantage it was meant to
provide.

Agile coaching matters because it helps both, producing products that matter in the real, complex and uncertain world,
and adding meaning to people's work lives.

Agile is easy to get going yet hard to do well.

Imagine a team that admits mistakes, reinforces their shared values, forgives one another, and moves on. Do you think
such a team would come up with astonishing ideas?

An agile (or Scrum) coach is:

- someone who appreciates teh depths of agile practices and principles and can help teams appreciate them too
- someone who has faces big dragons, organizational impediments, and has become a coach to managers and other outsiders
  in the course of addressing them
- someone who can help management at all levels of the organization the benefits of working agile
- someone who has brought ideas from professional facilitation, coaching, conflict management, mediation, theater and
  more to help the team become a high-performance team

Native wiring for coaching:

- ability to "read a room", ability to read emotion in the air and know whether all is good
- care about people more than products
- cultivate curiosity
- believe that people are basically good
- they know that plans fall apart, so they act in the moment with the team
- any group of people can do good things
- it drives them crazy when someone says "yeah, I know, it's a waste of time, but that's how we do it here"
- chaos and destruction are simply building blocks for something better
- they risk being wrong


================================================
FILE: books/code-complete.md
================================================
[go back](https://github.com/pkardas/learning)

# Code Complete: A Practical Handbook of Software Construction

Book by Steve McConnell

- [Chapter 1: Software Construction](#chapter-1-software-construction)
- [Chapter 2: Metaphors for a Richer Understanding of Software Development](#chapter-2-metaphors-for-a-richer-understanding-of-software-development)
- [Chapter 8: Defensive Programming](#chapter-8-defensive-programming)
- [Chapter 20: The Software-Quality Landscape](#chapter-20-the-software-quality-landscape)
- [Chapter 21: Collaborative Construction](#chapter-21-collaborative-construction)
- [Chapter 22: Developer Testing](#chapter-22-developer-testing)
- [Chapter 24: Refactoring](#chapter-24-refactoring)
- [Chapter 25: Code-Tuning Strategies](#chapter-25-code-tuning-strategies)
- [Chapter 32: Self-Documenting Code](#chapter-32-self-documenting-code)
- [Chapter 33: Personal Character](#chapter-33-personal-character)
- [Chapter 34: Themes in Software Craftsmanship](#chapter-34-themes-in-software-craftsmanship)

## Chapter 1: Software Construction

Construction - process of building (planning, designing, checking the work). Construction is mostly coding and debugging
but also involves designing, planning, unit testing, ... Centre of the software development process. The only activity
that is guaranteed to be done (planning might be imperfect, etc.).

## Chapter 2: Metaphors for a Richer Understanding of Software Development

Metaphors contribute to a greater understanding of software-development issues - paper writing metaphor, farming
metaphor, etc...

## Chapter 8: Defensive Programming

Protecting yourself from "cruel world of incorrect data". Use assertions to document assumptions made in the code.

Guidelines:

- use assertions for conditions that should never occur, this is not error checking code. On error program should take
  corrective actions, on assertion fail source code should be updated.
- no executable code in asserts:
    - bad: `assert foo(), ...`
    - good: `result = foo(); assert result, ...`
- use asserts to document and verify preconditions (before executing the routine) and post conditions (after executing
  the routine)
- for high robustness: failed assertions should be handled anyway

Error handling:

- return neutral value - 0, empty string, ...
- substitute the next piece of valid data - for example when processing stream of data from the sensor (e.g.
  temperature)
  , you may want to skip the missing value and wait for another
- return the same answer as the previous time - some data might not change in time dramatically, so it is okay to return
  the last correct value
- substitute the closest legal value - for example reversing car does not show negative speed value but instead shows
  0 (the closest legal value)
- log a warning message on incorrect data
- return error code - report error has been encountered and trust some other routine higher up will handle the error
- call centralised error-processing routine, disadvantage is that entire program coupled with the mechanism
- display error message to the user, warning: don't share too much with the user, attacker may use this information
- shut down - useful in safety-critical applications

While handling errors you need to choose between robustness (do something to keep the software alive) and correctness (
ensuring the data is always correct). Once approach is selected it should be coherent across the system.

Exceptions:

- they eliminate the possibility to go unnoticed
- throw only for truly exceptional situations - for situations that can not be addressed
- if exception can be handled locally - handle locally
- avoid exceptions in constructors, because if exception happens there, destruction might not be called - resource leak!
- include all the information that led to the exception
- avoid empty catch blocks
- standardise project's use of exceptions

Barricades:

- similar to having isolated compartments in the hull of a ship, damaged parts are isolated
- use validation classes that are responsible for cleaning the data
- assume data is unsafe and you need to sanitise it

*Offensive programming* - exceptional cases should be handled in a way that makes them obvious during development and
recoverable when production code is running. During development, you want errors to be as visible as possible but during
production it should not be observable.

## Chapter 20: The Software-Quality Landscape

There are many quality metrics: correctness, usability, efficiency, reliability, integrity, adaptivity, accuracy,
robustness - these are metrics important to the user, for a programmer more important metrics are: maintainability,
flexibility, portability, reusability, readability, testability, understandability.

*Techniques for Improving Software Quality*: set up software quality objectives, perform quality assurance activities,
prototyping.

Defect-detection techniques: design reviews, code reviews, prototyping, unit tests, integration tests, regression tests,
... even all of them combined will not detect all the issues.

> Most studies have found that inspections are cheaper than testing. A study at the Software Engineering Laboratory 
> found that code reading detected about 80% more faults per hour than testing.

Cost of detection is only one part. There is also cost of fixing the issues. The longer defect remains in the system,
the more expensive it becomes to remove.

Recommended combination: Formal inspections of all requirements, architecture, design -> Modeling / prototyping -> Code
reading -> Testing.

Remember: Improving quality reduces development cost.

## Chapter 21: Collaborative Construction

> IBM found that each hour of inspection prevented about 100 hours or related work (testing and defect correction)

> Reviews cut the errors by over 80%

> Reviews create a venue for more experienced and less experienced programmers to communicate about technical issues.

Collective ownership - code is owned by the group rather than by the individuals and can be accessed and modified by
various members.

Guide on pair programming:

- it will not be effective if you argue on styling conventions
- don't let it turn into watching - person without the keyboard should be an active participant
- sometimes it is better to discuss something on the whiteboard and then go programming solo
- rotate pairs
- match other's pace, the fast learner needs to slow down
- don't force people who don't like each other to pair
- no pairing between newbies

Nice idea: for discussing the design everyone should come with a prepared list of potential issues. It is good to assign
perspectives - maintainer, coder, user, designer. Author in such discussion should play minor role, should only present
the overview. Reviewer can be anyone outside author - tester, developer. Management should not be present at the
meeting, however should be briefed with the results after the discussion. Design review can not be used for performance
appraisals. Group should be focused on identifying defects. Goal of this meeting is not to explore alternatives ar
debate who is right and who is wrong.

> NASA's Software Engineering Laboratory found that code reading detected about 3.3 defects per hour of effort. Testing
> detected 1.8 errors per hour.

## Chapter 22: Developer Testing

> You must hope to find errors in your code. Such hope might seem like an unnatural act, but you should hope that it's
> you who finds the errors and not someone else.

Why TDD:

- same effort to write test cases before and after
- you detect defects earlier, and you can correct them more easily
- forces you to think a little about the requirements and design before writing code
- exposes requirements problems sooner

Developers tend to write *clean tests* rather than test for all the ways code breaks. Developer's testing isn't
sufficient to provide adequate quality assurance.

General Principle of Software Quality: improving quality improves the development schedule and reduces development cost.

## Chapter 24: Refactoring

The Cardinal Rule of Software Evolution: Evolution should improve the internal quality of the program.

Signs / smells that indicate refactoring is needed:

- code duplication - you need to do parallel changes
- too long routine
- too long loop or too deeply nested
- poor class cohesion - if a class takes ownership for many unrelated responsibilities
- too many parameters
- changes require parallel modifications to multiple classes
- related data not organised into classes
- overloaded primitive data type
- class doesn't do much - sometimes the result of refactoring is that an old class doesn't have much to do
- trap data - one routine just passes data to another
- one class knows too much about the other
- poor names
- public data members - in general bad idea
- subclass uses only a small percentage of its parent routines
- comments should not be used to explain bad code - "don't comment bad code, rewrite it"
- usage of setup code before routine call
- code that "seems like it might be needed one day" - programmers are rather bad at guessing what functionality might be
  needed someday, *design ahead* introduces unnecessary complexity

Data-Lever Refactoring:

- replace magic number with a named constant
- give a variable informative name
- inline expressions
- replace expression with a routine
- convert data primitive to a class
- encapsulate returned collection

Statement-Level Refactoring:

- decompose boolean expression - use variables that help document the meaning of the expression
- move boolean expression into a well-named function
- return as soon as you know the return value

Routine-Level Refactoring:

- Inline simple routines
- Convert long routine into a class
- Separate query operations from modification operations
- Combine similar routines by parametrizing themIf routine depends on the parameter passed in - consider splitting the
  routine
- Pass a whole object instead of specific fields, however if you are creating an object just to pass it to a routine,
  consider changing the routine to take only specific fields
- Routine should return the most specific object (mostly applicable to iterators, collections, ...)

Class Implementation Refactoring:

- Extract specialised code into a subclass - if class has code that is used by only a subset of its instances
- Combine similar code into a superclass - if at least 2 classes have similar code

Class Interface Refactoring:

- Eliminate classes not doing too much
- Hide a delegate - A calling B, A calling C, when really class A should call B and class B should call class C
- Or remove middleman, remove B and make A call C directly
- Hide routines that are not intended to be used outside the class
- Encapsulate unused routines - if you use only small portion of class's interface

Refactoring might cause a lot of harm if misused:

- refactoring should be small
- one refactoring at a time
- make a list of needed steps
- make a parking lot - in the middle of refactoring you might think about another refactoring, and another, and so on,
  for changes that aren't required immediately save a list of TODO changes
- check IDE / compiler / other tool's errors
- refactored code should be retested, programmer should also add more test cases
- be careful about small refactoring because they tend to introduce more bugs than big refactoring
- adjust approach basing on the risk of the refactoring - some changes are more dangerous than the other

Refactoring refers to making a changes in working code and do not affect the program's behaviour. Programmers who are
tweaking broken code aren't refactoring - they are hacking.

There are many strategies on where refactoring should be started. For example, whenever you are adding a routine you
should refactor it's neighbour, or when you are adding a class, or you should refactor error-prone modules, the most
complex modules, etc.

## Chapter 25: Code-Tuning Strategies

Code tuning is one way of improving a program's performance. You can find other ways to improve performance - faster and
without harm to the code.

> More computing sins are committed in the name of efficiency (without necessarily achieving it) than for any other 
> single reason - including blind stupidity ~ Wulf

Efficiency can be seen from many viewpoints:

- requirements

TRW required sub-second response time - this led to highly complex design and cost ~100M $, analysis determined, users
would be satisfied with 4 seconds responses 90% of time, modifying the response time requirements reduced cost by ~70M
$.

Before you invest time solving a performance problem, make sure you are solving a problem that needs to be solved.

- design

Sometimes program design make it difficult to write high-performance system, others make it hard not to.

- class and routine design

On this level algorithms and data structures matter.

- OS interactions

You might not be aware the compiler generated code using heavy OS calls.

- code compilation

Good compilers, turn good high-level language code into optimised machine code.

- hardware

Sometimes cheapest and the beast way to improve a program's performance is to buy a new hardware.

- code tuning

Small-scale changes that affect a single class, routine or just few lines of code, that make it run mode efficiently.

Some sources say, you can multiply improvements on each of the six levels, achieving performance improvement of a
million-fold.

Code tuning is not the most effective way to improve performance! Writing micro-efficient code does not prove you are
cool. Efficient code isn't necessarily better.

The Pareto Principle: Also known as 80/20 rule, you can get 80% of the result with 20% of effort.

Working toward perfection might prevent completion. Complete it first, and then perfect it. The part that needs to be
perfect is usually small.

False statement: Reducing the lines of code in a high level-language improves the speed or size of the resulting machine
code.:

```
# This is slower:
for i = 1 to 10
	a[i] = i

# This is faster:
a[1] = 1
a[2] = 2
...
a[10] = 10
```

It is also impossible to identify performance bottlenecks before program is working completely, hence "You should
optimise as you go" is false. Also, premature optimisation is the root of all evil, because you are missing perspective.

Compilers are really powerful, however they are better in optimising straightforward code than they are at optimising
tricky code. So, design application properly, write clear code and compiler will do the rest :)

Sources of inefficiency:

- I/O operations - if possible: store data in the memory
- paging - an operation that causes the OS to swap pages of memory is much slower than operation that works on only one
  page of memory.
- system calls - calls to system routines are expensive (context switch, saving app state, recovering kernel state),
  avoid using system calls, write your own routines using small part of the functionality offered by a system routine,
  work with system vendor to improve performance
- interpreted languages - :(
- errors - errors in code can be another source of performance problems

Experience doesn't help with optimisation. A person's experience might have come from an old machine, language or
compiler. You can never be sure about the effect of an optimisation until you measure the effect.

## Chapter 32: Self-Documenting Code

Unit development folder - informal document that contains notes used by a developer during construction - main purpose
is to provide a trail of design decisions that aren't documented elsewhere.

Detailed-design document - low-level design document, describes the class-level or routine-level design decisions.

Internal documentation (within the program) is the most detailed kind of documentation. The main contributor to
code-level documentation isn't comments, but good programming style, good variable names, clear layout and minimisation
of control-flow and data-structure complexity,

> **Good comments don't repeat the code or explain it. They clarify its intent. Comments should explain, at a higher 
> level of abstraction than the code, what you are trying to do.**

Kinds of comments:

- repeat of the code - comment gives no additional information
- explanation of the code - code is so complicated it needs to be explained, make code better instead of adding comments
- **summary of the code** - very useful when someone other than the code's original author tries to modify the code
- **description of the codes' intent** - IBM study says "understanding programmer's intent is the most difficult
  problem"
- **information that cannot be expressed by code itself** - for example copyright notice, notes about design, references
  to requirements

3 types of acceptable comments were highlighted above.

Effective commenting shouldn't be time-consuming. Guidelines for effective commenting:

- if commenting style is too fancy it very likely becomes annoying to maintain
- write pseudocode in comments
- performance is not a good reason for avoiding commenting (in some languages commenting slows down execution /
  compilation) - usually solution for this is to pass code through tool striping comments before release

End-line comments pose several problems and should be avoided - hard to write meaningful comment in one line, not much
space on the right side of the screen.

The code itself is always the first documentation you should check. If the code is not good enough, look for comments.

Comments should avoid abbreviations. Comments should justify violations of good programming style. Don't comment tricky
code, rewrite it. If something is tricky for you, for others it might be incomprehensible.

> Make your code so good that you don't need comments, and then comment it to make it even better.

Commenting data declarations:

- comment the units
- comment the range of allowable numeric values
- use enumerated types to express coded meanings
- comment limitations of input data, use assertions
- if variable is used as bit field, explain every bit
- if you have comments that refer to a specific variable, make sure the comment stays updated after variable name change

Keep comments close to the code they describe. Describe the design approaches, limitations, usage assumptions and so on.
Do not document implementation details in the interface.

## Chapter 33: Personal Character

The best programmers are the people who realise how small their brains are. The purpose of many good programming
practices is to reduce the load on your grey cells:

- decomposing - make a system simpler to understand
- reviews, inspections and tests - our intellectual capacity is limited, so we augment it with someone's else
- short routines reduce the load on our brains
- writing programs in terms of the problem domain rather than in terms of low level implementation details reduces
  mental workload
- conventions free brain from the relatively mundane aspects of programming

How to exercise curiosity and make a learning a priority?

- If your workload consists entirely on short-term assignments that don't develop your skills, be dissatisfied. Half of
  what you need to know will be outdated in three years. You are not learning, you are turning into a dinosaur. If you
  can't learn at your job, find a new one.
- Experiment if you don't understand something. Learn to make mistakes, learn from the each. Making a mistake is no sin.
  Failing to learn from mistake is.
- Read about problem-solving, don't reinvent the wheel.
- Study the work of the great programmers, it is not about reading 500-long source code but for example about high-level
  design.
- Read books, one book is more than most programmers read each year.
- Affiliate with other professionals
- Set up a professional development plan

Mature programmers are honest, which means: you refuse to pretend you are an expert when you are not, you admit your
mistakes, you provide realistic estimates, you understand your program.

Writing readable code is part of being a team player. As a readability guideline, keep the person who has to modify your
code in mind. Programming is communicating with another programmer first and communicating with the computer second.

To stay valuable, you have to stay current. For young hungry programmers, this is an advantage. Older programmers
sometimes feel they have already earned their stripes and resent having to improve themselves year after year.

Good habits matter because most of what you do as a programmer you do without consciously thinking about it.

## Chapter 34: Themes in Software Craftsmanship

There are many intellectual tools for handling computer science complexity:

- dividing a system into subsystems at the architecture level so that brain can focus on smaller amount of the system at
  one time
- carefully interface definition
- preserving the abstraction representing by the interface so that brain doesn't have to remember arbitrary details
- avoid global data
- avoid deep inheritance hierarchy
- carefully define error handling strategy
- prevent monster classes creation
- keep functions short
- use self-explanatory names
- minimise number of parameters passed to the routine
- use conventions

Points above are used to decrease usage of mental resources you need to use in order to understand the code.

Abstraction is a particularly powerful tool for managing complexity. Fred Brooks said that the biggest single gain ever
made in computer science was in the jump from machine language to higher-level languages. It freed programmers from
worrying about detailed quirks of individual pieces of the hardware and allowed them to focus on programming.

Reducing complexity is arguably the most important key to being and effective programmer.

Collective ability isn't simply the sum of the team members' individual skills. The way people work together determines
if abilities sum up or subtract from each other.

In real word, requirements are never stable -in order to build software more flexibly - use incremental approach, plan
to develop program in several iterations.

Write readable code because it helps other people to read the code. Computer doesn't care if code is readable. A
professional programmer writes readable code. Even if you think you are the only one who will read your code, in
reality, chances are good that someone else will need to modify your code. One study found that 10 generations of
maintenance programmers work on an average program before it gets rewritten.

If your language doesn't support some mechanisms do not hesitate and implement them (e.g. missing `assert`) on your own.

At the highest level, you shouldn't have any idea how the data is stored. Suggested levels of abstraction:

4. High level problem domain terms

3. low level problem domain terms

2. low level implementation structures

1. programming language structures and tools

0. operating system operations and machine instructions


================================================
FILE: books/comic-agile.md
================================================
[go back](https://github.com/pkardas/learning)

# Comic Agilé

Book by Luxshan Ratnaravi, Mikkel Noe-Nygaard

- [1: Transformation](#1-transformation)
- [4: Team](#4-team)
- [6: Miscellaneous](#6-miscellaneous)

## 1: Transformation

Instead of taking a waterfall approach to your agile transformation, take an iterative one and grow the scope
organically. Focus on changing the organizational culture to align with an agile one.

Product Owners don't dictate anything just because they are accountable for maximizing the value through effective
product backlog. The entire Scrum Team collaborates on creating a plan for the next Sprint.

Asses the psychological safety in your organization. If it is too low, seek to make working agreements where blameless
post-mortems are part of them, so you can create a culture of promoting healthy conflicts and celebration of mistakes (
and learning from them). Help your managers in demanding more psychological safety from their superiors, as that is a
prerequisite for the managers creating it for you.

If you only partly adopted the agile way of working, the scope and time might be fixed, so the only parameter that the
teams can rally vary is how much technical debt to create.

## 4: Team

Team Velocity - the velocity is only for the team. If Management doesn't get that, educate them on the purpose and
nature of velocity.

Technical Debt - If you PO doesn't get the importance of reducing Technical Debt, you need to educate them - spending
some time now on reducing the technical debt will most likely decrease time-to-market of new features.

Avoid external participants in the team's retrospective (lack of trust to the externals).

Use a simple tools for building agile culture, by taking a just-enough approach to your tooling, you free ip energy to
focus on the needed behavioral changes.

DevOps is not just about tools, testing and CI/CD pipelines - it is more about culture, breaking down silos and
aligning cross-functional teams tp the paths of value delivery.

WIP limit should create a pull system in the team's flow. This should then bring a conversation about collaboration and
the knowledge sharing needed to ensure that the entire team can actually swarm around each PBI.

Mob Programming - is about working collaboratively in groups of +3 to deliver high quality software and/or share
knowledge between the developers in the mob. The Driver - controls the keyboard, the Navigators are thinking,
discussing, reviewing and reflecting. The roles are interchanged.

Stability is the foundation for building the trust needed to become high-preforming teams. If team keeps changing, they
will have difficulties moving up Tucksman's phases - forming, storming, norming, performing.

## 6: Miscellaneous

In the spirit of openness, you don't have to wait for the Retrospective to bring up potential improvements to your ways
of working.

Companies with diverse leadership are 45% more likely to grow their market share and 70% more likely to capture new
markets compared to companies with "non-diverse" leadership. Behavioral diversity is the other half of the equation,
which includes:

- ensuring everyone is heard
- making it safe to propose novel ideas
- giving team members decision-making authority
- sharing credit for success
- giving actionable feedback
- implementing feedback from the team


================================================
FILE: books/cracking-coding-interview/Dockerfile
================================================
FROM python:3.10.4

WORKDIR /src

ENV PYTHONPATH "${PYTHONPATH}:/src"

COPY requirements.txt .
RUN pip install -r requirements.txt

COPY src/ src/


================================================
FILE: books/cracking-coding-interview/docker-compose.yml
================================================
version: "3.9"
services:
  interview:
    build:
      context: .
      dockerfile: Dockerfile
    volumes:
      - ./:/src


================================================
FILE: books/cracking-coding-interview/notes.md
================================================
[go back](https://github.com/pkardas/learning)

# Cracking the Coding Interview: 189 Programming Questions and Solutions

Book by Gayle Laakmann McDowell

Code here: [click](.)

- [Chapter 1: The Interview Process](#chapter-1-the-interview-process)
- [Chapter 2: Behind the Scenes](#chapter-2-behind-the-scenes)
- [Chapter 3: Special situations](#chapter-3-special-situations)
- [Chapter 4: Before the Interview](#chapter-4-before-the-interview)
- [Chapter 5: Behavioral Questions](#chapter-5-behavioral-questions)

## Chapter 1: The Interview Process

Assessment of a candidate performance:

- Analytical skills: Did you need much help to solve the problem? How optimal was your solution? How long did it take
  you to arrive at a solution?
- Coding skills: Were you able to successfully translate your algorithm to reasonable code? Was it clean and
  well-organized? Did you think of potential errors? Did you use good style?
- Technical knowledge: Do you have a strong foundation in computer science and the relevant technologies?
- Experience: Have you made good technical decisions in the past? Have you built interesting, challenging projects? Have
  you shown drive, initiative, and other important factors?
- Culture fit: Do your personality and values fit with the company and team? Did you communicate well with your
  interviewer?

False negatives are acceptable. Some good candidates are rejected. The company is out to build a great set of employees.
They can accept that they miss out on some good people. Company is far more concerned with false positives: people who
do well in an interview but are not in fact very good.

Basic data structure and algorithm knowledge is useful. It is a good proxy. These skills are not hard to learn, but are
well-correlated with being a good developer. Also, it is hard to ask problem-solving questions that don't involve
algorithms and data structures.

Your interviewer develops a feel for your performance by comparing you to other people. Getting a hard question isn't a
bad thing. When it is harder for you, it is harder for everyone.

If you haven't heard back from a company within 3-5 business days after interview, check in with your recruiter.

You can almost always re-apply to a company after getting rejected. Typically, you have to wait between 6-12 months.

## Chapter 2: Behind the Scenes

"Bar raiser" interviewer is charged with keeping the interview bar high. This person has significant experience with
interviews and veto power in the hiring decision.

## Chapter 3: Special situations

**Experienced candidates.** More experienced engineers might see slightly less focus on algorithm questions. Some
interviewers might hold experienced candidates to a somewhat lower standard. After all, it has been years since these
candidates took an algorithms class. Others though hold experienced candidates to a higher standard. On average, it
balances out.

The exception to this rule is system design and architecture questions. Performance in such interview questions would be
evaluated with respect to your experience level.

Personality fit: Typically assessed by how you interact with your interviewer. Establishing a friendly, engaging
conversation with your interviewers is your ticket to many job offers.

**For interviewers.**

- Don't actually ask the exact questions in here (this book). You can ask similar questions to these. Some candidates
  are reading this book. Your goal is to test their problem-solving skills, not their memorization skills.
- Ask Medium and Hard problems. When you ask questions that are too easy, performance gets clustered together.
- Use hard questions, not hard knowledge. If your question expects obscure knowledge, ask yourself: is this truly an
  important skill? Most won't remember Dijkstra's algorithm or the specifics of how AVL trees work.
- Avoid "scary" questions. Some questions intimidate candidates, because it seems like they involve some specialized
  knowledge, even if they really don't - math or probability, low-level knowledge, system design or scalability,
  proprietary systems (e.g. Google Maps). If you are going to ask a question that sounds "scary", make sure you really
  reassure candidates that it doesn't require the knowledge that they think it does.
- Offer positive reinforcement. You want candidates to feel comfortable. A candidate who is nervous will perform poorly,
  and it doesn't mean that they aren't good. Moreover, a good candidate who has a negative reaction to you or to the
  company is less likely to accept an offer - and they may dissuade their friends from interviewing/accepting as well.
  No matter how poorly a candidate is doing, there is always something they got right. Find a way to infuse some
  positivity into the interview.
- Coach your candidates.
    - Many candidates don't use an example to solve a question. Guide them.
    - Some candidates take a long time to find the bug because they use an enormous example. They didn't realize it
      would be more efficient to analyze their code conceptually first, or that a small example would work nearly as
      well. Guide them.
    - If they dive into code before they have an optimal solution, pull them back and focus them on the algorithm.
    - If they get nervous and stuck and aren't sure where to go, suggest to them that they walk through the brute force
      solution and look for areas to optimize.
    - Remind them that they can start off with a brute solution. Their first solution doesn't have to be perfect.
- If they want silence, give them silence. If your candidate needs this, give your candidate time to think.
- Know your mode: sanity check, quality, specialist, and proxy.
    - Sanity Check - Easy problem-solving or design questions. They assess a minimum degree of competence. You can use
      them early in the process.
    - Quality Check - More challenging questions. Designed to be more rigorous and make a candidate think.
    - Specialist Questions - Test knowledge on specific topics, e.g. Java or machine learning.
    - Proxy Knowledge - This is knowledge that is not quite at the specialist level, but that you would expect a
      candidate at their level to know.

## Chapter 4: Before the Interview

If you are smart, you can code, and you can prove that, you can land your interview.

Resume screeners want to know that you are smart, and you can code. You should prepare your resume to highlight these 2
things. Think twice before cutting more technical lines in order to allow space for your non-technical hobbies.

Keep your resume short, max. 1.5-2 pages. Long resumes are not a reflection of having tons of experience, there are a
reflection of not understanding how to prioritize content. A resume should not include a full history of every role you
have ever had. Include only the relevant positions - the ones that make you a more impressive candidate.

For each role, try to discuss you accomplishments with the following approach: "_Accomplishment X by implementing Y
which led to Z_". Not everything will fit into this approach, but the principle is the same: what you did, how you did
it, and what the results were.

## Chapter 5: Behavioral Questions

Ensure that you have one to three projects that you can talk about in detail. You should be able to discuss the
technical components in depth. These should be projects where you played a central role.

What are your weaknesses? A good answer conveys a real, legitimate weakness but emphasises how you work to overcome it.

What questions should you ask the interviewer?

- Genuine Questions: these are the questions you actually want to know the answer to.
- Insightful Questions: these questions demonstrate your knowledge or understanding of technology. These questions will
  typically require advance research about the company.
- Passion Questions: these questions are designed to demonstrate your passion for technology. They show that you are
  interested in learning and will be a strong contributor to the company. E.g.: I am very interested in scalability, and
  I would love to learn more about it. What opportunities are there at this company to learn about this?

Be specific, not arrogant. How do you make yourself sound good without being arrogant? Be specific. Specificity means
giving just the facts and letting the interviewer derive an interpretation.

Stay light on details and just state the key points. Your interviewer can ask for more details.

Focus on yourself, not your team. More "I", less "we".

Give structured answers.

1. Nugget first - means starting your response with a "nugget" that succinctly describes what your response will be
   about.
2. S.A.R (Situation, Action, Result) - you start off outlining the situation, then explaining the actions you took, and
   lastly, describing the result.

Tell me about yourself, suggested structure:

1. Current role (headline only)
2. College
3. Post college & onwards (job, technologies)
4. Current role (more details)
5. Outside of work (hobbies)
6. Wrap up (what are you looking for)


================================================
FILE: books/cracking-coding-interview/requirements.txt
================================================
pytest==7.1.2


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/check_permutation.py
================================================
import pytest


def check_permutation_sets(string: str, potential_permutation_string: str) -> bool:
    return len(string) == len(potential_permutation_string) and set(string) == set(potential_permutation_string)


def check_permutation_sort(string: str, potential_permutation_string: str) -> bool:
    return sorted(string) == sorted(potential_permutation_string)


def check_permutation_array(string: str, potential_permutation_string: str) -> bool:
    if len(string) != len(potential_permutation_string):
        return False

    url_array = [0] * 128

    for ch in string:
        url_array[ord(ch)] += 1

    for ch in potential_permutation_string:
        url_array[ord(ch)] -= 1

        if url_array[ord(ch)] < 0:
            return False

    return True


@pytest.mark.parametrize("string, potential_permutation_string, is_permutation", [
    # @formatter:off
    ("god",                 "dog",                 True),
    ("god",                 "dod",                 False),
    ("god",                 "dogg",                False),
    ("cat belongs to ala",  "ala belongs to cat",  True),
    ("interview questions", "interviews question", True),
    ("interview questions", "interview question",  False),
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    check_permutation_sets,
    check_permutation_sort,
    check_permutation_array,
])
def test_algorithm(function, string, potential_permutation_string, is_permutation):
    assert function(string, potential_permutation_string) == is_permutation


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/is_unique.py
================================================
import pytest


def check_if_has_unique_characters_pythonic(string: str) -> bool:
    return len(set(string)) == len(string)


def check_if_has_unique_characters_ascii(string: str) -> bool:
    boolean_array = [False] * 128
    for ch in string:
        int_ch = ord(ch)
        if boolean_array[int_ch]:
            return False
        boolean_array[int_ch] = True
    return True


def check_if_has_unique_characters_no_structures(string: str) -> bool:
    for i, ch_0 in enumerate(string):
        for ch_1 in string[i + 1:]:
            if ch_0 == ch_1:
                return False
    return True


def check_if_has_unique_characters_no_structures_sort(string: str) -> bool:
    sorted_string = sorted(string)

    for i in range(len(sorted_string) - 1):
        if sorted_string[i] == sorted_string[i + 1]:
            return False

    return True


@pytest.mark.parametrize("string, has_all_unique_chars", [
    # @formatter:off
    ("qwerty", True),
    ("",       True),
    ("qqwert", False),
    ("qwertt", False),
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    check_if_has_unique_characters_pythonic,
    check_if_has_unique_characters_ascii,
    check_if_has_unique_characters_no_structures,
    check_if_has_unique_characters_no_structures_sort,
])
def test_algorithm(function, string, has_all_unique_chars):
    assert function(string) == has_all_unique_chars


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/one_away.py
================================================
import pytest


def is_one_edit_away_pythonic(string: str, edit: str) -> bool:
    if abs(len(string) - len(edit)) > 1:
        return False

    if string in edit or edit in string:
        return True

    return len(set(string) - set(edit)) <= 1


def is_one_edit_away_loop(string: str, edit: str) -> bool:
    if abs(len(string) - len(edit)) > 1:
        return False

    shorter_text, longer_text = string if len(string) < len(edit) else edit, string if len(string) >= len(edit) else edit
    shorter_i, longer_i = 0, -1
    edit_found = False

    while shorter_i < len(shorter_text) and longer_i < len(longer_text):
        longer_i += 1

        if shorter_text[shorter_i] == longer_text[longer_i]:
            shorter_i += 1
            continue

        if edit_found:
            return False

        if len(string) == len(edit):
            shorter_i += 1

        edit_found = True

    return True


@pytest.mark.parametrize("string, edit, expected_result", [
    # @formatter:off
    ("pale",  "ple",  True),
    ("pale",  "ale",  True),
    ("ale",   "pale", True),
    ("pales", "pale", True),
    ("pale",  "bale", True),
    ("pale",  "bake", False),
    ("pale",  "ba",   False),
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    is_one_edit_away_pythonic,
    is_one_edit_away_loop
])
def test_algorithm(function, string, edit, expected_result):
    assert function(string, edit) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/palindrome_permutation.py
================================================
from collections import Counter

import pytest


def is_palindrome_permutation_pythonic(string: str) -> bool:
    raw_string = string.replace(' ', '')
    letter_frequency = Counter(raw_string)

    if len(raw_string) % 2 == 0:
        return all(frequency % 2 == 0 for frequency in letter_frequency.values())
    else:
        return sum(1 for frequency in letter_frequency.values() if frequency == 1) <= 1


def is_palindrome_permutation_counter(string: str) -> bool:
    raw_string = string.replace(' ', '')
    letter_frequency = Counter()
    num_of_odd = 0

    for ch in raw_string:
        letter_frequency[ch] += 1

        if letter_frequency[ch] % 2 == 1:
            num_of_odd += 1
        else:
            num_of_odd -= 1

    return num_of_odd <= 1


@pytest.mark.parametrize("string, expected_result", [
    # @formatter:off
    ("tact coa",     True),
    ("kamil slimak", True),
    ("slimakkamil ", True),
    ("aaaaaab",      True),
    ("aaa",          True),
    ("aaaaacb",      False),
    ("abc",          False),
    ("slimakoamil ", False),
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    is_palindrome_permutation_pythonic,
    is_palindrome_permutation_counter
])
def test_algorithm(function, string, expected_result):
    assert function(string) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/rotate_matrix.py
================================================
from typing import List

import pytest


def rotate_matrix_list_comprehension(matrix: List[List[int]]) -> List[List[int]]:
    size = len(matrix)
    return [
        [matrix[col][row] for col in reversed(range(size))]
        for row in range(size)
    ]


def rotate_matrix_zip(matrix: List[List[int]]) -> List[List[int]]:
    return [list(reversed(row)) for row in zip(*matrix)]


@pytest.mark.parametrize("matrix, rotated_matrix", [
    ([[1, 2],
      [3, 4]],
     [[3, 1],
      [4, 2]]),
    ([[1, 2, 3],
      [4, 5, 6],
      [7, 8, 9]],
     [[7, 4, 1],
      [8, 5, 2],
      [9, 6, 3]]),
    ([[1, 2, 3, 8],
      [4, 5, 6, 8],
      [7, 8, 9, 8],
      [8, 8, 8, 8]],
     [[8, 7, 4, 1],
      [8, 8, 5, 2],
      [8, 9, 6, 3],
      [8, 8, 8, 8]])
])
@pytest.mark.parametrize("function", [
    rotate_matrix_zip,
    rotate_matrix_list_comprehension
])
def test_algorithm(function, matrix, rotated_matrix):
    assert function(matrix) == rotated_matrix


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/string_compression.py
================================================
from dataclasses import dataclass

import pytest


def compress_string(text: str) -> str:
    @dataclass
    class Compressed:
        char: str
        freq: int

    compressed = []

    for ch in text:
        if compressed and ch == compressed[-1].char:
            compressed[-1].freq += 1
        else:
            compressed.append(Compressed(char=ch, freq=1))

    return ''.join(f"{c.char}{c.freq}" for c in compressed) if len(compressed) * 2 < len(text) else text


@pytest.mark.parametrize("text, expected_result", [
    # @formatter:off
    ("a",       "a"),
    ("aabb",    "aabb"),
    ("aaaa",    "a4"),
    ("aabbb",   "a2b3"),
    ("aabbbaa", "a2b3a2"),
    # @formatter:on
])
def test_algorithm(text, expected_result):
    assert compress_string(text) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/string_rotation.py
================================================
import pytest


def is_rotated(string: str, rotated_string: str) -> bool:
    return len(string) == len(rotated_string) and rotated_string in string * 2


@pytest.mark.parametrize("string, rotated_string, expected_result", [
    # @formatter:off
    ("",            "",            True),
    ("waterbottle", "erbottlewat", True),
    ("dog",         "gdo",         True),
    ("dog",         "dogdo",       False),
    ("dog",         "godd",        False),
    ("dog",         "go",          False),
    # @formatter:on
])
def test_algorithm(string, rotated_string, expected_result):
    assert is_rotated(string, rotated_string) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/urlify.py
================================================
import pytest


def urlify_pythonic(url: str) -> str:
    return ' '.join(url.split()).replace(' ', "%20")


def urlify_array(url: str) -> str:
    result_url = ""
    last_appended_character = None

    for ch in url:
        if ch == ' ' and last_appended_character is None:
            # Do not duplicate '%20' in the URL
            continue
        elif ch == ' ' and last_appended_character:
            last_appended_character = None
            result_url += "%20"
        else:
            last_appended_character = ch
            result_url += ch

    if last_appended_character is None:
        return result_url[:-3]

    return result_url


@pytest.mark.parametrize("url, expected_url", [
    # @formatter:off
    ("Mr John Smith",     "Mr%20John%20Smith"),
    ("Mr John  Smith",    "Mr%20John%20Smith"),
    ("    Mr John Smith", "Mr%20John%20Smith"),
    ("Mr John Smith    ", "Mr%20John%20Smith"),
    ("Mr ",               "Mr"),
    ("M ",                "M"),
    ("  ",                ""),
    ("",                  ""),
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    urlify_pythonic,
    urlify_array,
])
def test_algorithm(function, url, expected_url):
    assert function(url) == expected_url


================================================
FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/zero_matrix.py
================================================
from typing import List

import pytest


def nullify_loop(matrix: List[List[int]]) -> List[List[int]]:
    height, width = len(matrix), len(matrix[0])
    columns, rows = set(), set()

    for row in range(height):
        for col in range(width):
            if matrix[row][col] == 0:
                columns.add(col)
                rows.add(row)

    return [
        [
            0 if row in rows or col in columns else matrix[row][col]
            for col in range(width)
        ]
        for row in range(height)
    ]


def nullify_in_place(matrix: List[List[int]]) -> List[List[int]]:
    height, width = len(matrix), len(matrix[0])

    def nullify_column(pos: int) -> None:
        for i in range(height):
            matrix[i][pos] = 0

    def nullify_row(pos: int) -> None:
        matrix[pos] = [0] * width

    col_start = 0

    for row in range(height):
        for col in range(col_start, width):
            if matrix[row][col] == 0:
                nullify_row(row)
                nullify_column(col)

                col_start = col + 1
                break

    return matrix


@pytest.mark.parametrize("matrix, rotated_matrix", [
    ([[0, 2],
      [3, 4]],
     [[0, 0],
      [0, 4]]),
    ([[1, 2, 3, 4],
      [1, 0, 3, 4],
      [1, 2, 3, 0]],
     [[1, 0, 3, 0],
      [0, 0, 0, 0],
      [0, 0, 0, 0]])
])
@pytest.mark.parametrize("function", [
    nullify_loop,
    nullify_in_place,
])
def test_algorithm(function, matrix, rotated_matrix):
    assert function(matrix) == rotated_matrix


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/delete_middle_node.py
================================================
import pytest

from linked_list import (
    LinkedList,
    Node,
)


def delete_middle_node(node: Node) -> None:
    assert node.next, "node is not the last node in the linked list"

    node.data = node.next.data
    node.next = node.next.next


@pytest.mark.parametrize("values, node, expected_result", [
    # @formatter:off
    ([1, 2, 3, 4], 2, [1, 3, 4]),
    ([1, 2, 3, 4], 3, [1, 2, 4]),
    # @formatter:on
])
def test_algorithm(values, node, expected_result):
    linked_list = LinkedList(values)
    delete_middle_node(linked_list.node_for_value(node))
    assert linked_list.values == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/intersection.py
================================================
from typing import Optional

import pytest

from linked_list import (
    LinkedList,
    Node,
)


def intersection(list_0: LinkedList, list_1: LinkedList) -> Optional[Node]:
    if list_0.tail != list_1.tail:
        return None

    l0_node, l1_node = list_0.head, list_1.head
    l0_len, l1_len = list_0.length, list_1.length

    # Advance pointers when lists have different size:
    if l0_len > l1_len:
        for i in range(l0_len - l1_len):
            l0_node = l0_node.next

    if l0_len < l1_len:
        for i in range(l1_len - l0_len):
            l1_len = l1_len.next

    while l0_node and l1_node:
        if l0_node == l1_node:
            return l0_node
        l0_node = l0_node.next
        l1_node = l1_node.next

    assert False, "Loop above must finish the program"


l0 = LinkedList([3, 1, 5, 9])
l1 = LinkedList([4, 6])
tail = LinkedList([7, 2, 1]).head

l4 = LinkedList([3, 1, 5, 9, 7, 2, 1])
l5 = LinkedList([4, 6, 7, 2, 1])


@pytest.mark.parametrize("list_0, list_0_tail, list_1, list_1_tail, expected_result", [
    # @formatter:off
    (l0, tail, l1, tail, tail),
    (l4, None, l5, None, None)
    # @formatter:on
])
def test_algorithm(list_0, list_0_tail, list_1, list_1_tail, expected_result):
    list_0.tail.next = list_0_tail
    list_1.tail.next = list_1_tail

    assert intersection(list_0, list_1) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/linked_list.py
================================================
from typing import (
    List,
    Optional,
)

import pytest


class Node:
    def __init__(self, data: int) -> None:
        self.next = None
        self.data = data


class LinkedList:
    def __init__(self, data: List[int]) -> None:
        self.head = None
        for val in data:
            self.append(val)

    @property
    def values(self) -> List[int]:
        result, current = [], self.head
        while current:
            result.append(current.data)
            current = current.next
        return result

    @property
    def tail(self) -> Optional[Node]:
        node = self.head
        while node and node.next:
            node = node.next
        return node

    @property
    def length(self) -> int:
        return len(self.values)

    def node_for_value(self, val: int) -> Optional[Node]:
        node = self.head
        while node:
            if node.data == val:
                return node
            node = node.next
        return None

    def append(self, data: int) -> None:
        self.head = append(self.head, data)

    def delete(self, data: int) -> None:
        self.head = delete(self.head, data)


def delete(head: Optional[Node], data: int) -> Optional[Node]:
    node = head

    if not node:
        return None

    if head.data == data:
        return head.next

    while node.next:
        if node.next.data == data:
            node.next = node.next.next
            break
        node = node.next

    return head


def append(head: Optional[Node], data: int) -> Optional[Node]:
    if not head:
        return Node(data)

    current, end = head, Node(data)
    while current.next:
        current = current.next
    current.next = end

    return head


@pytest.mark.parametrize("values", [
    [],
    [1],
    [1, 2],
    [1, 2, 3],
])
def test_append(values):
    assert LinkedList(values).values == values


@pytest.mark.parametrize("values, to_delete, expected_result", [
    # @formatter:off
    ([],        0, []),
    ([1],       0, [1]),
    ([1],       1, []),
    ([1, 2],    1, [2]),
    ([1, 2],    2, [1]),
    ([1, 2, 3], 2, [1, 3]),
    # @formatter:on
])
def test_delete(values, to_delete, expected_result):
    linked_list = LinkedList(values)
    linked_list.delete(to_delete)
    assert linked_list.values == expected_result


@pytest.mark.parametrize("values, value, expected_node_val", [
    # @formatter:off
    ([1, 2, 3, 4], 2, 2),
    ([1, 2, 3, 4], 5, None)
    # @formatter:on
])
def test_node_for_value(values, value, expected_node_val):
    node = LinkedList(values).node_for_value(value)
    assert node.data if node else node == expected_node_val


@pytest.mark.parametrize("values, expected_tail", [
    # @formatter:off
    ([],     None),
    ([1],    1),
    ([1, 2], 2),
    # @formatter:on
])
def test_tail(values, expected_tail):
    tail = LinkedList(values).tail
    assert tail.data == expected_tail if expected_tail else tail is None


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/loop_detection.py
================================================
from typing import Optional

import pytest

from linked_list import (
    LinkedList,
    Node,
)


def get_loop(linked_list: LinkedList) -> Optional[Node]:
    slow, fast = linked_list.head, linked_list.head

    def get_loop_head():
        nonlocal slow, fast
        slow = linked_list.head

        while slow != fast:
            slow = slow.next
            fast = fast.next

        return fast

    while fast and fast.next:
        slow = slow.next
        fast = fast.next.next

        if slow == fast:
            return get_loop_head()

    return None


l0 = LinkedList([1, 2, 3, 4, 5])
l0.node_for_value(5).next = l0.node_for_value(3)

l1 = LinkedList([1, 2, 3, 4, 5])


@pytest.mark.parametrize("linked_list, expected_result", [
    # @formatter:off
    (l0, l0.node_for_value(3)),
    (l1, None),
    # @formatter:on
])
def test_algorithm(linked_list, expected_result):
    assert get_loop(linked_list) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/palindrome.py
================================================
import pytest

from linked_list import (
    LinkedList,
    Node,
)


def is_palindrome_simple(linked_list: LinkedList) -> bool:
    values = linked_list.values
    return values == values[::-1]


def is_palindrome_reverse(linked_list: LinkedList) -> bool:
    def reverse_list() -> Node:
        head, node = None, linked_list.head

        while node:
            new_node = Node(data=node.data)
            new_node.next = head
            head = new_node

            node = node.next

        return head

    normal_node = linked_list.head
    reversed_node = reverse_list()

    while normal_node and reversed_node:
        if normal_node.data != reversed_node.data:
            return False
        normal_node = normal_node.next
        reversed_node = reversed_node.next

    return not normal_node and not reversed_node


def is_palindrome_slow_fast_runner(linked_list: LinkedList) -> bool:
    slow, fast = linked_list.head, linked_list.head
    stack = []

    while fast and fast.next:
        stack.append(slow.data)
        slow = slow.next
        fast = fast.next.next

    if fast:
        slow = slow.next

    while slow:
        if stack and stack.pop() != slow.data:
            return False
        slow = slow.next

    return True


@pytest.mark.parametrize("values, expected_result", [
    # @formatter:off
    ([1, 2, 3, 4], False),
    ([1, 2, 2, 2], False),
    ([1, 2, 2, 1], True),
    ([1, 2, 1],    True),
    ([1],          True),
    ([],           True)
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    is_palindrome_simple,
    is_palindrome_reverse,
    is_palindrome_slow_fast_runner,
])
def test_algorithm(function, values, expected_result):
    linked_list = LinkedList(values)
    assert function(linked_list) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/partition.py
================================================
from typing import Tuple

import pytest
from linked_list import LinkedList


def partition(linked_list: LinkedList, partition_val: int) -> Tuple[LinkedList, LinkedList]:
    l1, l2 = LinkedList(data=[]), LinkedList(data=[])
    node = linked_list.head

    while node:
        if node.data < partition_val:
            l1.append(node.data)
        else:
            l2.append(node.data)
        node = node.next

    return l1, l2


@pytest.mark.parametrize("values, partition_val, expected_values", [
    # @formatter:off
    ([1, 2, 3, 4, 5], 3, ([1, 2],          [3, 4, 5])),
    ([1, 2, 3, 4, 5], 0, ([],              [1, 2, 3, 4, 5])),
    ([1, 2, 3, 4, 5], 6, ([1, 2, 3, 4, 5], [])),
    # @formatter:on
])
def test_algorithm(values, partition_val, expected_values):
    linked_list = LinkedList(values)
    l1, l2 = partition(linked_list, partition_val)
    assert (l1.values, l2.values) == expected_values


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/remove_dups.py
================================================
import pytest

from linked_list import LinkedList


def remove_duplicates_buffer(linked_list: LinkedList) -> LinkedList:
    unique_data = set()
    prev, current = None, linked_list.head

    while current:
        if current.data in unique_data:
            prev.next = current.next
        else:
            unique_data.add(current.data)
            prev = current
        current = current.next

    return linked_list


def remove_duplicates_no_buffer(linked_list: LinkedList) -> LinkedList:
    current = linked_list.head

    while current:
        runner = current

        while runner.next:
            if current.data == runner.next.data:
                runner.next = runner.next.next
            else:
                runner = runner.next

        current = current.next

    return linked_list


@pytest.mark.parametrize("values, expected_result", [
    # @formatter:off
    ([],           []),
    ([1, 1],       [1]),
    ([1, 1, 0],    [1, 0]),
    ([1, 1, 1, 1], [1]),
    ([0, 1, 0, 1], [0, 1]),
    ([1, 2, 3, 4], [1, 2, 3, 4]),
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    remove_duplicates_buffer,
    remove_duplicates_no_buffer
])
def test_algorithm(function, values, expected_result):
    linked_list = LinkedList(values)
    assert function(linked_list).values == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/return_kth_to_last.py
================================================
from typing import Optional

import pytest

from linked_list import (
    LinkedList,
    Node,
)


def return_kth_to_last_simple(linked_list: LinkedList, k: int) -> int:
    node = linked_list.head
    position, i = len(linked_list.values) - k, 0

    if position < 0:
        return -1

    while node and i < position:
        node = node.next
        i += 1

    return node.data


def return_kth_to_last_simplest(linked_list: LinkedList, k: int) -> int:
    values = linked_list.values
    size = len(values)

    return values[size - k] if size - k >= 0 else -1


def return_kth_to_last_recursive(linked_list: LinkedList, k: int) -> int:
    found_value = None

    def _return_kth_to_last(node: Optional[Node]) -> int:
        if not node:
            return 0

        index = _return_kth_to_last(node.next) + 1

        if index == k:
            nonlocal found_value
            found_value = node.data

        return index

    _return_kth_to_last(linked_list.head)

    return found_value if found_value else -1


def return_kth_to_last_iterative(linked_list: LinkedList, k: int) -> int:
    p1, p2 = linked_list.head, linked_list.head

    for _ in range(k):
        if not p1:
            return -1
        p1 = p1.next

    while p1:
        p1 = p1.next
        p2 = p2.next

    return p2.data


@pytest.mark.parametrize("values, k, expected_result", [
    # @formatter:off
    ([1, 2, 3], 1, 3),
    ([1, 2, 3], 2, 2),
    ([1, 2, 3], 3, 1),
    ([1, 2, 3], 4, -1),
    # @formatter:on
])
@pytest.mark.parametrize("function", [
    return_kth_to_last_simple,
    return_kth_to_last_simplest,
    return_kth_to_last_recursive,
    return_kth_to_last_iterative,
])
def test_algorithm(function, values, k, expected_result):
    linked_list = LinkedList(values)
    assert function(linked_list, k) == expected_result


================================================
FILE: books/cracking-coding-interview/src/ch02_linked_lists/sum_lists.py
================================================
import pytest

from linked_list import (
    LinkedList,
    Node,
)


def sum_lists(list_0: LinkedList, list_1: LinkedList) -> LinkedList:
    result, remainder = [], 0
    node_0, node_1 = list_0.head, list_1.head

    def add_aligned_lists() -> None:
        nonlocal node_0, node_1, result, remainder
        while node_0 and node_1:
            result.append((node_0.data + node_1.data + remainder) % 10)
            remainder = 1 if (node_0.data + node_1.data + remainder) >= 10 else 0
            node_0, node_1 = node_0.next, node_1.next

    def align_remaining_list(node: Node) -> None:
        nonlocal result, remainder
        while node:
            result.append((node.data + remainder) % 10)
            remainder = 1 if (node.data + remainder) >= 10 else 0
            node = node.next

    add_aligned_lists()
    align_remaining_list(node_0)
    align_remaining_list(node_1)

    if remainder:
        result.append(remainder)

    return LinkedList(result)


@pytest.mark.parametrize("list_0, list_1, expected_result", [
    # @formatter:off
    ([7, 1, 6], [5, 9, 2], [2, 1, 9]),
    ([1, 7, 1], [3],       [4, 7, 1]),
    ([9, 9, 9], [1],       [0, 0, 0, 1]),
    ([7, 1],    [3, 1],    [0, 3]),
    ([7, 1],    [3],       [0, 2]),
    # @formatter:on
])
def test_algorithm(list_0, list_1, expected_result):
    list_0, list_1 = LinkedList(list_0), LinkedList(list_1)
    assert sum_lists(list_0, list_1).values == expected_result


================================================
FILE: books/ddd.md
================================================
[go back](https://github.com/pkardas/learning)

# Domain-Driven Design: Tackling Complexity in the Heart of Software

Book by Eric Evans

- [Chapter 1: Crunching Knowledge](#chapter-1-crunching-knowledge)
- [Chapter 2: Communication and the Use of Language](#chapter-2-communication-and-the-use-of-language)
- [Chapter 3: Binding Model and Implementation](#chapter-3-binding-model-and-implementation)
- [Chapter 4: Isolating the Domain](#chapter-4-isolating-the-domain)
- [Chapter 5: A Model Expressed in Software](#chapter-5-a-model-expressed-in-software)
- [Chapter 6: The Life Cycle of a Domain Object](#chapter-6-the-life-cycle-of-a-domain-object)
- [Chapter 7: Using the Language: An Extended Example](#chapter-7-using-the-language-an-extended-example)
- [Chapter 8: Breakthrough](#chapter-8-breakthrough)
- [Chapter 9: Making Implicit Concepts Explicit](#chapter-9-making-implicit-concepts-explicit)
- [Chapter 10: Supple Design](#chapter-10-supple-design)
- [Chapter 11: Applying Analysis Patterns](#chapter-11-applying-analysis-patterns)
- [Chapter 12: Relating Design Patterns to the Model](#chapter-12-relating-design-patterns-to-the-model)
- [Chapter 13: Refactoring Toward Deeper Insight](#chapter-13-refactoring-toward-deeper-insight)
- [Chapter 14: Managing Model Integrity](#chapter-14-managing-model-integrity)
- [Chapter 15: Distillation](#chapter-15-distillation)
- [Chapter 16: Large-Scale Structure](#chapter-16-large-scale-structure)

## Chapter 1: Crunching Knowledge

Effective modeling:

- Binding model and the implementation
- Cultivating a language based on the model
- Developing a knowledge-rich model
- Distilling the model - drop unneeded concepts
- Brainstorming and experimenting

Effective domain modellers are knowledge crunchers (take a torrent of information and prove it for relevant trickle).
Knowledge crunching is a collaborative work, typically led by developers in cooperation with domain experts. Early
versions or prototypes feed experience back into the team and change interpretations.

All projects lack knowledge - people leave, team reorganisations happen - in general, knowledge is lost. Highly
productive teams grow their knowledge continuously - improve technical knowledge along with general domain-modelling
skills, but also seriously learn about specific domain they are working on. The accumulated knowledge makes them
effective knowledge crunchers.

Software is unable to fill in gaps with common sense - that is why knowledge crunching is important.

Example with overbooking strategy: overbooking check should be extracted from the booking functionality to be more
explicit and visible. This is example of domain modeling and securing and sharing knowledge.

## Chapter 2: Communication and the Use of Language

The domain experts and developers use different language. Experts vaguely describe what they want, developers vaguely
understand. Cost of translation, plus the risk of misunderstanding is too high. A project needs a common language.

Ubiquitous language includes: names of classes and prominent operations, terms to discuss. Model based language should
be used to describe artefacts, tasks and functionalities.

Language may change to fit the discussion better. These changes will lead to refactoring of the code. Change in the
language is change to the model.

The domain-model-based terminology makes conversations more concise, you avoid talking about low level implementation
details, instead you use high level concepts (like in the example: Itinerary, Routing Service, Route Specification
instead of cargo id, origin and destination, ...).

Play with the model as you talk about the system, find easier ways to say what you need to say, and take those new ideas
back down to the diagrams and code.

The team should use ONE and only ONE language. Almost every conversation is an opportunity for the developers and domain
experts to play with the model, deepen understanding and fine tune it.

Domain model is something between business terms developers don't understand and technical aspect of the design.

The vital detail about the design in captured in the code. Well written implementation should be transparent and reveal
the model underlying it. The model is not the diagram, diagrams help to communicate and explain the model.

Extreme Programming advocates using no extra design documents at all (usually because the fall out of sync) - the code
should speak for itself. This motivates developers to keep code clean and transparent.

However, if document exists, it should not try to do what code already does well - document should illuminate meaning,
give insight into large-scale structures, clarify design intent, complement the code and the talking.

## Chapter 3: Binding Model and Implementation

Tightly relating the code to an underlying model gives the code meaning and makes the model relevant. Design must map to
the domain model, if not, the correctness of the software is suspect.

Model-Driven Design - discards the dichotomy of analysis model and design to search out a single model that serves both
purposes (ubiquitous language). Each object in the design plays a conceptual role described in the model. Model needs to
be revised to reflect the model in a very literal way, so mapping is obvious. The code becomes expression of the model.

Model-Driven Design is hard to accomplish in procedural languages like C or Fortran. This approach is reserved for
object-oriented programming languages.

Implementation model should not be exposed to the user.

People responsible for the implementation should participate in modeling. Strict separation of responsibilities is
harmful. Modeling and implementation are couples in model-driven design. Any technical person contributing to the model
must spend some time touching the code. Every developer must be involved in some level of discussion about the model.

## Chapter 4: Isolating the Domain

Layered Architecture - the essential principle is that any element of a layer depends only on other elements in the same
layer or on elements of the layers beneath it. Each layer specialises in a particular aspect of a computer program. Most
commonly used layers:

- UI (Presentation) Layer - showing information to the user and interpreting the user's commands.
- Application Layer - this layer does not contain business logic, but only coordinates tasks and delegates work to
  collaborations of domain objects in the next layer down.
- Domain (Model) Layer - responsible for representing concepts of business, information about business situation and
  business rules. This layer is the heart of business software.
- Infrastructure Layer - generic technical capabilities that support the higher layers (message sending, drawing widgets
  on the UI, ...), may also support the pattern of interactions between the 4 layers through an architectural framework.

Partition a complex program into layers, develop a design within each layer that is cohesive and that depends only on
the layers below. Concentrate all the code related the domain model in one layer and isolate it from the rest of the
user interface, application and infrastructure code.

The domain models, free of the responsibility of displaying themselves, storing themselves, managing application tasks
and so forth, can be focused on expressing the domain model. This allows to evolve model to be rich enough and clear
enough to capture essential business knowledge and put it to work.

Such separation allows a much cleaner design for each layer, especially because they tend to evolve at different pace.

Upper layers can user or manipulate elements of lower ones straightforwardly by calling their public interfaces.

Domain-driven design requires only one particular layer to exist.

## Chapter 5: A Model Expressed in Software

ASSOCIATIONS. For every traversable association in the model, there is a mechanism in the software with the same
properties. Constraints on associations should be included in the model and implementation (e.g. president of ... for a
period of time), they make the model more precise and the implementation easier to maintain.

ENTITIES. Object modeling tends to lead us to focus on the attributes of an object, but the fundamental concept of an
entity is an abstract continuity threading through a life cycle and even passing through multiple forms. Sometimes such
an object must be matched with another object even though attributes differ.

Transactions in a banking application, two deposits of the same amount to the same account on the same day are still
distinct transactions. They have identity and are entities.

> When an object is distinguished by its identity, rather than its attributes, make this primary to its definition in
> the model. Keep the class definition simple and focused on life cycle continuity and identity. Define a means of
> distinguishing each object regardless of its form or history.

Identity - this may simply mean unique identifier.

Each entity must have an operational way of establishing its identity with another object - distinguishable even from
another object with the same descriptive attributes.

Defining identity demands understanding of the domain.

VALUE OBJECTS. An object that represents a descriptive aspect of the domain with no conceptual identity. These are
objects that describe things. When you care only about the attributes of an element of the model, classify it as a value
object.

SERVICES. Some concepts from the domain aren't natural to model as objects. Forcing the required domain functionality to
be the responsibility of an entity or value either distorts the definition of a model-based object or adds meaningless
artificial objects. A service is an operation offered as an interface that stands alone in the model, without
encapsulating state. The name *service* emphasises the relationship with other objects. Service have to be stateless.

MODULES. Many don't consider modules as part of the model. Yet it isn't just code being divided into modules, but
concepts. Low coupling between modules minimises the cost of understanding their place in the design. It is possible to
analyse the contents of one module with a minimum of reference to others that interact.

Choose modules that tell the story of the system and contain a cohesive set of concepts. Give the modules names that
become part of the ubiquitous language. Modules and their names should reflect insight into the domain.

Modules need to co-evolve with the rest of the model. This means refactoring modules right along with the model and
code. But this refactoring often doesn't happen.

Use packaging to separate the domain layer from other code. Otherwise, leave as much freedom as possible to the domain
developers to package the domain objects in ways that support their model and design choices.

## Chapter 6: The Life Cycle of a Domain Object

The challenges:

- Maintaining object integrity throughout the life cycle
- Preventing the model from getting swamped by the complexity of managing the life cycle

These issues can be addressed using 3 patterns.

AGGREGATES. It is difficult to guarantee the consistency of changes to object in a model with complex associations.
Invariants need to be maintained that apply to closely related groups of objects, not just discrete objects. Yet
cautious locking schemes cause multiple users to interfere pointlessly with each other and make a system unusable. An
aggregate is a cluster or associated objects that we treat as a unit for the purpose of data changes. Each aggregate has
a root and a boundary. Chose one entity to be the root of each aggregate, and control all access to the objects inside
the boundary through the root. Allow external objects to hold references to the root only.

FACTORIES. When creation of an object, or an entire aggregation, becomes complicated or reveals too much of the internal
structure, factories provide encapsulation (assembly of a car: cars are never assembled and driven at the same time,
there is no value in combining both of these functions into the same mechanism). Creation of an object can be a major
operation by itself, but complex assembly operations do not fit the responsibility of the created objects. Combining
such responsibilities can produce ungainly designs that are hard to understand. Making the client direct construction
muddies the design of the client, breaches encapsulation of the assembled object or aggregate, and overly couples the
client to the implementation of the created object.

Two basic requirements for any good factory:

1. Each creation method is atomic and enforces all invariants of the created object or aggregate.
2. The factory should be abstracted to the type desired, rather than the concrete class created

REPOSITORIES. Associations allow us to find an object based on its relationship to another. But we must have a starting
point for a traversal to an entity of value in the middle of its life cycle. For each type of object that needs global
access, create an object that can provide the illusion of an in-memory collection of all objects of that type. Set up
access through a well-known global interface. Provide methods to add and remove objects, which will encapsulate the
actual insertion or removal of data in the data store. Provide methods that select objects based on some criteria and
return objects. Provide repositories only for aggregate roots that actually need direct access. Keep the client focused
on the model, delegating all object storage and access to the repositories.

Repository provide methods that allow a client to request objects matching some criteria.

## Chapter 7: Using the Language: An Extended Example

The model organises domain knowledge and provides a language for the team. Each object in the model has a clear meaning.

To prevent domain responsibilities from being mixed with those of other parts of the system apply layered architecture.

Modeling and design is not a constant forward process. It will grind to a halt unless there is a frequent refactoring to
take advantage of new insights to improve the model and the design.

The real challenge is to actually find an incisive model, one that captures subtle concerns of the domain experts and
can drive a practical design. Ultimately, we hope to develop a model that captures a deep understanding of the domain.

Refactoring is the redesign of software in ways that do not change its functionality. Rather than doing elaborate
up-front design decisions, developers take code through a continuous series of small, discrete design changes, each
leaving existing functionality unchanged while making the design more flexible or easier to understand.

Initial models usually are naive and superficial, based on shallow knowledge. Versatility, simplicity and explanatory
power come from a model that is truly in tune with the domain.

You will usually depend on creativity and trail and error to find good ways to model the concepts you discover.

## Chapter 8: Breakthrough

The returns from refactoring are not linear. Usually there is marginal return for a small effort, and the small
improvements add up.

Slowly but surely, the team assimilates knowledge and crunches it into a model. Each refinement of code and model gives
developers a clearer view. This clarity creates the potential for a breakthrough.

Don't become paralysed trying to bring about a breakthrough. The possibility usually comes after many modest
refactorings. Most of the time is spent making piecemeal improvements, with model insights emerging gradually during
each successive refinement.

Don't hold back from modest improvements, which gradually deepen the model, even if confined with the same general
conceptual framework.

## Chapter 9: Making Implicit Concepts Explicit

A deep model has power because it contains the central concepts and abstractions that can succinctly and flexibly
express essential knowledge of the user's activities, their problems and their solutions.

The first step is to somehow represent the essential concepts of the domain in the model. Refinement comes later, after
successive iterations of knowledge crunching and refactoring. But this process really gets into gear when an important
concept is recognised and made explicit in the model and design.

Transformation of a formerly implicit concept into an explicit one is a breakthrough that leads to a deep model. More
often, though, the breakthrough comes later, after a number of important concepts are explicit in the model.

Listen to the language the domain experts use. Are there terms that succinctly state something complicated? Are they
correcting your word choice? Do the puzzled looks on their faces go away when you use a particular phrase? These are
hints of a concept that might benefit the model.

Constraints make up a particularly important category of model concepts. They often emerge implicitly, and expressing
them explicitly can greatly improve a design. Sometimes constraints find a natural home in an object or separate method.

Specification - a predicate that determines if an object does satisfy some criteria.

## Chapter 10: Supple Design

The ultimate purpose of software is to serve users. But first, that same software has to serve developers. This is
especially true in a process that emphasises refactoring.

When software with complex behaviour lacks good a design, it becomes hard to refactor or combine elements. Duplication
starts to appear as soon as a developer isn't confident of predicting the full implications of computation. Duplication
is forced when design elements are monolithic, so that the parts cannot be recombined.

Supple design is the complement to deep modelling. Once you have dug out implicit concepts and made them explicit, you
have the raw material. Thorough the iterative cycle, you hammer that material into a useful shape.

If developer must consider the implementation of a component in order to use it, the value of encapsulation is lost.
Tames should conform to the ubiquitous language so that team members can quickly infer their meaning. Write a test of a
behaviour before creating it, to force your thinking into client developer mode.

Place as much of the logic of the program as possible into functions, operations that return results with no observable
side effects.

Decompose design elements (operations, interfaces, classes and aggregates) into cohesive units, taking into
consideration your intuition of the important divisions in the domain. Align the model with the consistent aspects of
the domain that make it a viable area of knowledge in the first place.

Low coupling is fundamental to object design. When you can go all the way. Eliminate all other concepts from the
picture. Then the class will be completely self-contained and can be studied and understood alone. Every such
self-contained class significantly eases the burden of understanding a module.

Where it fits, define an operation whose return type is the same as the type of its arguments.

## Chapter 11: Applying Analysis Patterns

> Analysis patterns are groups of concepts that represent a common construction in business modelling. It may be
> relevant to only one domain, or it may span many domains.

An analysis pattern is a template for solving an organizational, social or economic problem in a professional domain.

## Chapter 12: Relating Design Patterns to the Model

Not all design patterns can be used as domain patterns.

STRATEGY - Domain models contain processes that are not technically motivated but actually meaningful in the problem
domain. When alternative processes must be provided, the complexity of choosing the appropriate process combines with
the complexity of the multiple processes themselves, and things get out of hand. Factor the varying parts of process
into a separate "strategy" object in the model. Factor apart a rule and the behaviour it governs. Implement the rule or
substitutable process following the strategy design pattern. Multiple versions of the strategy object represents
different ways the process can be done.

COMPOSITE - When the relatedness of nested containers is not reflected in the model, common behaviour has to be
duplicated at each level of the hierarchy, and nesting is rigid. Clients must deal with different levels of the
hierarchy through different interfaces, even though there may be no conceptual difference they care about. Recursion
through the hierarchy to produce aggregated information is very complicated. Define an abstract type that encompasses
all members of the composite. Methods that return information are implemented on containers to return aggregated
information about their contents. Leaf nodes implement those methods based on their own values. Clients deal with the
abstract type and have no need to distinguish leaves from containers.

## Chapter 13: Refactoring Toward Deeper Insight

Multifaceted process. There are 3 things you have to focus on:

1. Live in the domain
2. Keep looking at things a different way
3. Maintain an unbroken dialog with domain experts

Seeking insight into the domain creates a broader context for the process of refactoring.

Refactoring toward deeper insight is a continuing process. Implicit concepts are recognised and made explicit.
Development suddenly comes to the brink of a breakthrough and plunges through to a deep model.

## Chapter 14: Managing Model Integrity

Total unification of the domain model for a large system will not be feasible or cost-effective.

BOUNDED CONTEXT. Multiple models are in play on any large project. Yet when code based on distinct models is combined,
software becomes buggy, unreliable and difficult to understand. Communication among team members becomes confused. It is
often unclear in what context a model should not be applied. Therefore, explicitly define the context within which a
model applies. Explicitly set boundaries in terms of team organisation, usage within specific parts of the application.
And physical manifestations such as code bases and database schemas. Keep the model strictly consistent within these
bounds, but don't be distracted or confused by issues outside.

CONTINUOUS INTEGRATION. When a number of people are working in the same bounded context, there is a strong tendency for
the model to fragment. The bigger the team, the bigger the problem, but a few as three or four people can encounter
serious problems. Yet breaking down the system into even-smaller contexts eventually loses a valuable level of
integration and coherency. Therefore, institute a process of merging all code and other implementation artefacts
frequently, with automated tests to flag fragmentation quickly. Relentlessly exercise the ubiquitous language to hammer
out a shared view of the model as the concepts evolve in different people's heads.

CONTEXT MAP. People on other teams will not be very aware of the context sounds and will unknowingly make changes that
blur the edges or complicate the interconnections. When connections must be made between different contexts, they tend
to bleed into each other. Therefore, identify each model in play on the project and define its bounded context. This
includes the implicit models of non-object-oriented subsystems. Name each bounded context, and make the names part of
ubiquitous language. Describe the points of contact between the models, outlining explicit translation for any
communication and highlighting any sharing.

SHARED KERNEL. Uncoordinated teams working on closely related applications can go racing forward for a while, but what
they produce may not fit together. They can end up spending more on translation layers and retrofitting than they would
have on continuous integration in the first place, meanwhile duplicating effort and losing the benefits of a common
ubiquitous language. Therefore, designate some subset of the domain that the two teams agree to share. Of course this
includes, along with this subset of the model, the subset of code or of the database design associated with that part of
the model. This explicitly shared stuff has special status, and shouldn't be changed without consultation with the other
team. Integrate a functional system frequently, but somewhat less often than the pace of continuous integration within
the teams. At these integrations, run the tests of both teams.

CUSTOMER / SUPPLIER DEVELOPMENT TEAMS. The freewheeling development of the upstream team can be cramped if the
downstream team has no veto power over changes, or if procedures for requesting changes are too cumbersome. The upstream
team may even be inhibited, worried about breaking the downstream system. Meanwhile, the downstream team can be
helpless, at the mercy of upstream priorities. Therefore, establish a clear customer / supplier relationship between the
two teams. In planning sessions, make the downstream team play the customer role to the upstream team. Negotiate the
budget and tasks for downstream requirements so that everyone understands the commitment and schedule.

CONFORMIST. When two development teams have an upstream / downstream relationship in which the upstream has no
motivation to provide for the downstream team's needs, the downstream team is helpless. Therefore, eliminate the
complexity of translation between bounded contexts by slavishly adhering to the model of the upstream team.

ANTI-CORRUPTION LAYER. When a new system is being built that must have a large interface with another, the difficulty of
relating two models can eventually overwhelm the intent of the new model altogether, causing it to be modified to
resemble the other system's model, in an ad hoc fashion. Therefore, create an isolating layer to provide clients with
functionality in terms of their own domain model. The layer talks to the other system through its existing interface,
requiring little or no modification to the other system.

SEPARATE WAYS. Integration is always expensive. Sometimes the benefit is small. Therefore, declare a bounded context to
have no connection to the others at all, allowing developers to find simple, specialised solutions within this small
scope.

OPEN HOST SERVICE. When a subsystem has to be integrated with many others, there is more and more to maintain and more
and more to worry about when changes are made. Therefore, define a protocol that gives access to your subsystem as a set
of services.

PUBLISHED LANGUAGE. Direct translation to and from the existing domain models may not be a good solution. Those models
may be overly complex or poorly factored. Therefore, use a well-documented shared language that can express the
necessary domain information as a common medium of communication, translating as necessary into and out of that
language.

## Chapter 15: Distillation

CORE DOMAIN. In designing a large system, there are so many contributing components, all complicated and all absolutely
necessary to success, that the essence of the domain model, can be obscured and neglected. Therefore, boil the model
down. Make the core small.

GENERIC SUBDOMAINS. Anything extraneous makes the core domain harder to discern and understand. Therefore, identify
cohesive subdomains that are not the motivation for your project. Factor out generic models of these subdomains and
place them in separate models.

DOMAIN VISION STATEMENT. In later stages of development, there is a need for explanation the value of the system that
does not require an in-depth study of the model. Therefore, write a short description of the core domain. Keep it
narrow. Write this statement early and revise it as you gain new insight.

HIGHLIGHTED CORE. The mental labor of constantly filtering the model to identify key parts absorbs concentration better
spent on design thinking, and it requires comprehensive knowledge of the model. Therefore, write a brief document that
describes the core domain and the primary interactions among core elements.

COHESIVE MECHANISMS. Computations sometimes reach a level of complexity that begins to bloat the design. The
conceptual *what* is swamped by the mechanistic *how*. Therefore, partition a conceptually cohesive mechanism into a
separate lightweight framework.

SEGREGATED CODE. Elements in the model may partially serve the core domain and partially play supporting role. Core
elements may be tightly coupled to generic ones. Therefore, refactor the model to separate the core concepts from
supporting players and strengthen the cohesion of the core while reducing its coupling to other code.

ABSTRACT CORE. When there is a lot of interaction between subdomains in separate modules, either many references will
have to be created between modules, which defeats much of the value of the partitioning or the interaction will have to
be made indirect, which makes the model obscure. Therefore, identify the most fundamental concepts in the model and
factor them into distinct classes, abstract classes or interfaces.

## Chapter 16: Large-Scale Structure

EVOLVING ORDER. Design free-for-all's produce systems no one can make sense of as whole. Therefore, let this conceptual
large-scale structure evolve with the application, possibly changing to a completely different type of structure along
the way. Don't over constrain the detailed design and model decisions that must be made with detailed knowledge.

SYSTEM METAPHOR. Software decisions tend to be very abstract and hard to grasp. Developers and users alike need tangible
ways to understand the system and share a view of the system as a whole. Therefore, organise the design around metaphor
and absorb it into the ubiquitous language.

RESPONSIBILITY LAYERS. When each individual object has handcrafted responsibilities, there are no guidelines, no
infirmity and no ability to handle large swaths of the domain together. Therefore, look at the conceptual dependencies
in your model and the varying rates and sources of change of different parts of your domain. Refactor the model so that
the responsibilities of each domain object fit nearly within the responsibility of one layer.

KNOWLEDGE LEVEL. In application in which the roles and relationships between entities vary in different situations,
complexity can explode. Objects end up with references to other types to cover a variety of cases, or with attributes
that are used in different ways in different situations. Therefore, create a distinct set of objects that can be used to
describe and constrain the structure and behaviour of the basic model.

PLUGGABLE COMPONENT BEHAVIOUR. When a variety of applications have to interoperate, all based on the same abstractions
but designed independently, translations between multiple bounded contexts limit integration. Duplication and
fragmentation raise costs of development and installation. Therefore, distill an abstract core of interfaces and
interactions and create a framework that allow diverse implementations of those interfaces to be freely substituted. 


================================================
FILE: books/ddia.md
================================================
[go back](https://github.com/pkardas/learning)

# Designing Data-Intensive Applications: The Big Ideas Behind Reliable, Scalable, and Maintainable Systems

Book by Martin Kleppmann

- [Chapter 1: Reliable, Scalable and Maintainable Applications](#chapter-1-reliable-scalable-and-maintainable-applications)
- [Chapter 2: Data Models and Query Languages](#chapter-2-data-models-and-query-languages)
- [Chapter 3: Storage and Retrieval](#chapter-3-storage-and-retrieval)
- [Chapter 4: Encoding and Evolution](#chapter-4-encoding-and-evolution)
- [Chapter 5: Replication](#chapter-5-replication)
- [Chapter 6: Partitioning](#chapter-6-partitioning)
- [Chapter 7: Transactions](#chapter-7-transactions)
- [Chapter 8: The Trouble with Distributed Systems](#chapter-8-the-trouble-with-distributed-systems)
- [Chapter 9: Consistency and Consensus](#chapter-9-consistency-and-consensus)
- [Chapter 10: Batch Processing](#chapter-10-batch-processing)
- [Chapter 11: Stream Processing](#chapter-11-stream-processing)
- [Chapter 12: The Future of Data Systems](#chapter-12-the-future-of-data-systems)

## Chapter 1: Reliable, Scalable and Maintainable Applications

May applications today are data-intensive, CPU is not a problem but amount of data, its complexity and speed of change.
They are built from standard building blocks: database, cache, search index, stream processing, batch processing. These
building blocks have many variants.

*Reliability* - performs as expected, tolerates user's mistakes, good performance, continues to work even if things go
wrong.

Hardware faults - on a cluster with 10 000 disks, you can expect, on average, one disk to die per day. Nowadays,
multi-machine redundancy is no longer required - only in few use cases.

Software errors - e.g. many applications hang simultaneously on 30.06.2012 because of bug in Linux kernel. This kind of
bugs lie dormant for a long time until they are triggered by an unusual set of circumstances.

Human errors - humans are responsible for the majority of errors. There are measures that can be taken in order to
prevent the errors:

- well-defined abstractions, easy to use tools, interfaces that discourage doing the wrong things
- provide fully functional non-production sandbox environment where people can explore and experiment with real data
- test thoroughly at all levels (unit tests, integration, ...)
- provide tools that can recompute the data in case of errors in the past
- set up detailed monitoring

*Scalability* - system's ability to cope with increased load. Load can be described with a few numbers (load parameters)
, e.g. requests per second, read/write ratio, number of simultaneous connections, hit rate on cache or something else.

*Describing performance*

Response times (client waiting time) vary, always look at averages or medians (p50). In order to know how bad you
outliers are you need to look at 95th, 99th and 99.9th percentiles. High percentiles (tail latencies) are important
because they directly affect users' experience. Anyhow, optimising 99.99th percentile might be really expensive.

SLO (service level objectives) and SLA (service level agreements) - contracts that define the expected performance and
availability of a service. Example SLA: service up and median response time < 200 ms, 99th percentile < 1s. High
percentiles are extremely important in backend services that are called multiple times as part of serving a single
end-user request.

*Approaches for coping with load*

Architecture might need to be reworked on every order of magnitude load increase. Because application could handle 2x
bigger load, it doesn't mean it will handle 10x that load.

Scaling up / vertical scaling- moving to more powerful machine. Scaling out / horizontal scaling - distributing the load
across multiple machines.

Distributing stateless services across multiple machines is easy, stateful data systems form a single node to a
distributed setup can introduce a lot of additional complexity. There is no a single, universal approach for all
applications, design is very often highly specific.

*Maintainability*

Design and build systems that will minimise pain during maintenance. Make it easy to understand for new engineers. Allow
for easy changes, adapting for unanticipated use cases as requirements change.

*Simplicity*

Project's complexity grove with time, this slows everyone down. Symptoms of complexity:

- explosion of state space
- tight coupling of modules
- tangled dependencies
- inconsistent naming and terminology
- special-casing to work around issues

Complex software makes it easy to introduce bugs, system makes it harder to understand hidden assumptions, unintended
consequences and many more. **Simplicity should be a key goal for the systems we build**. One of the best tools for
removing complexity is *abstraction*. Great abstraction can hide implementation details behind a clean interface.

*Evolvability*

Requirements change, you learn new facts, new use cases emerge, priorities change, etc. Agile provides a framework for
adapting to change. Modify system and adapt it to changing requirements - pay attention to simplicity and abstractions.

## Chapter 2: Data Models and Query Languages

*Relational Model vs Document Model*. Relational Databases turned out to generalise very well. NoSQL (*Not Only SQL*) is
the latest attempt to overthrow the relational model's dominance.

Driving forces behind NoSQL:

- a need for greater scalability - very large datasets / very high write throughput
- many open source projects
- specialised query operations
- frustration with restrictiveness of relational model

A rule of thumb: is you are duplicating values that could be stored in just one place the schema is not normalised.

Many-to-many relationships are widely used in relational databases, NoSQL reopened the debate on how best to represent
such relationship.

If your data has document-like structure, then it's probably a good idea to use a document model. The relational
database and its shredding (splitting document-like structure into multiple tables) can lead to unnecessary complicated
application code.

Problems with document model: you can not access nested object directly you need to use access path, also it is not
performing well in many-to-many relationships.

Database schema can be compared to languages: relational - compiled language with static typing, document - dynamic (
runtime) type checking - schema on read.

Data locality - because document databases store document as a string continuous string - JSON, XML, ... - often access
will be faster because of locality, if data is split across multiple tables -> multiple disks -> more disk seeks -> more
time required. However, the document database will need to load entire document even if you need a small portion of it.

*Query Languages for Data*

SQL is declarative - you define what you want, and it is up to the computer do determine how to get this data. Most
programming languages are imperative - you define how to process the data.

*MapReduce Querying*

MapReduce - programming model for processing large amounts of data in bulk across many machines. Limited form of
MapReduce is supported by some noSQL data-stores. Something between declarative and imperative programming.

*Graph-Like Data Models*

Very good approach form data with many-to-many relationships. Each vertex has: ID, set of outgoing edges, set of
outgoing edges, a collection of properties (key-value pairs). Each edge has: ID, the tail vertex, the head vertex, label
describing the type of relationship, a collection of properties (key-value pairs).

Graphs give great flexibility in modeling relationships. e.g. France has departments and regions, whereas the US has
counties and states.

Cypher is a declarative query language for property graphs, created for Neo4j DB, e.g.: find the names of all people who
emigrated from the US to Europe:

```cypher
MATCH
  (person) -[:BORN_IN]->  () -[:WITHIN*0..]-> (us:Location {name: "United States"}),
  (person) -[:LIVES_IN]-> () -[:WITHIN*0..]-> (eu:Location {name: "Europe"})
RETURN person.name
```

This can be expressed in SQL (recursive common table expressions), but with one difficulty, `LIVES_IN` might point to
any location (region, country, state, continent), here we are interested only in the US and Europe. 4 lines in Cypher vs
29 lines in SQL.

*Triple-Stores*

Very similar to graph model, all information stored in the form of very simple three-part
statements: `(subject, predicate, object)`, e.g. `(Jim, likes, bananas)`.

## Chapter 3: Storage and Retrieval

In order to tune a storage engine to perform well on your kind of workload, you need to have a rough idea of what the
storage engine is doing under the hood.

*Data Structures That Power Your Database*

Hash Indexes. For example: Key and offset pairs. SSTable - Sorted String Table.

B-Trees - most widely used indexing structure, standard index implementation for almost all relational databases and for
many non-relational databases. B-trees keep key-value pairs sorted by key, which allows efficient key-value lookups. The
number of references to child pages in one page of the B-tree is called the branching factor. A B-tree with *n* keys
always has depth of *O(log n)*. Most databases can fit into a B-tree that is 3-4 levels deep. 4-level tree of 4KB pages
with a branching factor of 500 can store up to 256TB of data.

In order to make db resilient to crashes, it is common for B-tree implementations to include an additional data
structure on disk - WAL - write-ahead log - append only file, every B-tree modification must be written before it can be
applied on the pages of the tree. When db crashes, this log is used to restore the B-tree to consistent state.

LSM-tree:

- faster for writes
- can be compressed better, thus often produce smaller files on disk
- lower storage overheads
- compaction process can sometimes interfere with the performance of ongoing reads and writes
- if throughput is high and compaction is not configured carefully, compaction might not keep up with the rate of
  incoming writes

B-trees are so old, and so well optimised so that they can deliver good, consistent performance for many workloads.

Key-value indexes are for primary key index in the relational model. It is also common to have secondary index. They
don't have to be unique - this can be solved for example by appending row ID.

Clustered index - storing all row data within the index.

Concatenated index - multi-column index, combines several fields into one key by appending one column to another.

What if you search for misspelled data or similar data. Lucene is able to search text for words within a certain edit
distance.

Data structures discussed so far are specific for disks. However, as RAM becomes cheaper and many datasets are feasible
to keep in memory. This led to the development of in-memory databases. Some in-memory key-value stores (Memcached) are
intended for caching, data can be lost on machine restart. Other in-memory databases aim for durability, which can be
achieved with special battery-powered RAM, by writing a log changes to disk or replicating memory state to other
machines. When restarted it needs to load the data from the disk of from a replica. Even though it is an in-memory
database, a disk is still used. Other in-memory databases with relational model: VoltDB, MemSQL, Oracle TimesTen.
RAMCloud is a key-value store, Redis and Couchbase provide weak durability by writing to disk asynchronously.

In-memory databases achieve better performance.

OLTP - Online Transaction Processing - interactive applications - look up fa small number of records, insert or update
records based on user's activity.

OLAP - Online Analytic Processing - second patterns - analytic queries.

In 90s companies stopped using OLTP systems for analytics purposes and shifted to OLAP for running analytics on a
separate database. This separate database is called a data warehouse.

Data warehouse - separate database that analyst can query without affecting OLTP operations. Read-only copy of the data.
Data extracted from OLTP databases, transformed into analysis-friendly schema. Process of getting data info the
warehouse is known as Extract-Transform-Load. Biggest advantage of OLAP for analysis is that this database can be
optimised for large queries.

Many data warehouses use star schema (dimensional modeling). Variation of this schema is called the snowflake schema.
Snowflakes are more normalised than stars.

Fact tables are often 100 columns wide, however `SELECT * ` queries are rarely used. In most OLTP databases, storage is
laid out in a row-oriented fashion. How to execute queries more efficiently? The idea behind column-oriented storage is
simple: don't store all the values from one row together, but store all values from each column together. e.g. one file
= one column - much faster than parsing each row.

Columns can be compressed using for example bitmap encoding - unique values encoded using bits. Efficient in situations
where only few unique values and millions of records. Column compression allows mor rows from a column to fit in L1
cache.

## Chapter 4: Encoding and Evolution

Rolling update / staged rollout - deploying the new version to a few nodes at a time, checking whether the new version
is running smoothly. With client-side applications you are at mercy of the user, who may not install the update for some
time. This means that old and new versions of the code might co-exist for some time.

Backward compatibility - newer code can read data that was written by older code (normally not hard).

Forward compatibility - older code can read data that was written by newer code (this is trickier).

Programs usually work with data in 2 representations:

- in memory - objects, lists, arrays, trees - data structures optimised for efficient access and manipulation by the CPU
- byte sequence structures - for example JSON

The translation from the in-memory to a byte sequence is called encoding. The reverse is called decoding (also: parsing,
deserialization, unmarshalling).

Many programming languages have built-in support for encoding in-memory data structures. Python has pickle, Java has
Serializable, Ruby has Marshal, however:

- encoding is tied to programming language
- potential source of security issues
- Java's built-in serialisation is said to have bad performance

In general, it is bad idea to use language's built-in encoding for anything other than very transient purposes.

JSON:

- built-in support in browsers
- distinguishes strings and numbers
- good support for unicode, no support for binary strings

XML:

- too verbose
- no distinction between numbers and strings
- good support for unicode, no support for binary strings

CSV:

- less powerful than XML and JSON
- no distinction between numbers and strings
- no data schema

Despite flaws of JSON, XML and CSV they are good enough for many purposes, and they will remain popular.

JSON is less verbose than XMAL, but still uses a lot of space - this might be an issue when you are dealing with
terabytes of data. This led to the development of binary encodings for JSON - BSON, BJSON, UBJSON, BISON. XMAL has also
its binary encodings - WBXML and Fast Infoset. However, none of them are widely adopted.

Apache Thrift (Facebook), Protocol Buffers (Google) are binary encoding libraries that are based on the same principle.
Schema defined in interface definition language, this schema can generate code for encoding and decoding data.

Field numbers in Apache Thrift are used for more compact encoding (no need for passing field names through the wire -
CompactProtocol). Required / optional makes no difference for encoding, this is used for the runtime.

Every field you add after the initial deployment of schema must be optional of have default value. Removing is like
adding, you can remove only optional fields. Also, with ProtoBuf / Thrift you can never use the same tag number again.

Avro is another binary encoding format, it has optional code generation for dynamically typed programming languages.

Data can flow through:

- database
- services - REST and RPC, services are similar to databases, they allow clients to submit and query data. A key design
  goal of a service-oriented / microservices architecture is to make the application easier to change and maintain by
  making services independently deployable and evolvable.

REST is not a protocol, but rather a design philosophy that builds upon the principles of HTTP.

SOAP - XML-based protocol for making network API requests.

RPC - Remote Procedure Call - seems convenient at first, but the approach is fundamentally flawed, because a network
request is very different from a local function call:

- local function is predictable - it can either succeed or fail depending on the input, a network request is
  unpredictable - connection might be lost
- a local function call either returns a result or throws an exception, a network request may return without a result -
  timeout
- retry mechanism might cause duplication (fist request went through), unless you build deduplication mechanism
- duration of remote call depends on the network congestion
- when you call a local function you can effectively pass references
- if client and server use different languages, data translation might end up ugly

Despite these problems RPC is not going away, modern frameworks are more explicit about the fact that a remote call i
different from local function invocation.

- message passing - something between database passing and services. Similar to RPC because client's request is
  delivered with low latency, similar to databases because message is not sent via a direct network connection but goes
  through message broker.

Message brokers have a couple of advantages comparing to RPC:

- can acs as a buffer when recipient is unavailable
- can automatically redeliver messages
- avoids the sender to know the IP address and port
- one message can be sent to multiple recipients
- logical decoupling between sender and receiver

## Chapter 5: Replication

Shared-Nothing Architecture - each machine or virtual machine running the database is called a node. Each node uses its
own CPU, RAM and disks independently. Any coordination between nodes is done at the software level using network.

Replication - means keeping a copy of the same data on multiple machines that are connected via a network. Why?

- to reduce latency - copy close to the users
- to allow the system to continue working
- to scale out

If data is not changing, replication is easy, for dealing with replication changes, following algorithms can be used:
single-leader, multi-leader and leaderless replication.

Leaders and Followers - each node (replica) stores a copy of the database . One of the replicas is designated to be a
leader (master), when clients want to write to the database, they must send their requests to the leader. Other replicas

- followers (slaves), take the log from the leader and updates local copy of the data, applying all writes in the same
  order as they were processed by the leader. Writes are accepted only to the leader, read can be performed using any
  follower.

On follower failure, if the connection between leader and follower is temporarily corrupted, follower can recover
easily, because it knows the last processed transaction from the log. It can request missing data from the last
successful transaction. Leader failure is trickier. One of the followers can be promoted to be the new leader, for
example replica with the most recent data (election) - data loss minimisation.

Implementation of Replication Logs:

- Statement-based replication - leader logs every request (statement) - for relational database this means thet every
  INSERT / UPDATE / DELETE statement is forwarded to followers, each follower executes received SQL statement (as if it
  had been received from a client)
    - Problems - what about NOW, RAND - nondeterministic, what about auto-incrementing fields, what about triggers.
      There are some workarounds, like sending request and result or requiring deterministic transactions.
- Write-ahead log (WAL) shipping - log is append-only sequence of bytes containing all writes, this log can be used to
  build replica. This method is used in PostgreSQL and Oracle. Disadvantage of this approach is that log contains
  low-level information - like which disk blocks were changed, so replication is closely coupled to the storage engine (
  or even storage engine version!).
- Logical log replication - alternative approach that uses different log format for replication - decoupling. Usually a
  sequence of records describing writes to database tables at the granularity of a row. Easier backward compatibility -
  leaders and followers can run different engine versions
- Trigger-based replication - triggers have ability to log changes to a separate table, from which an external process
  can read. This allows for replicating for example subset of data.

Problems with replication lag:

- leader-based replication is cool when we need to scale reads, not necessarily writes - common on the web
- synchronous replication - single node failure can make entire system unavailable
- asynchronous replication - follower might fall behind -> inconsistent data (this is temporary situation, if you stop
  writing for a while, the followers will catch up and become consistent with the leader - eventual consistency)

Replica lag - anomalies:

- if user writes and then views, the new data might not yet have reached the replica. Read-your-writes consistency -
  needed guarantee that if the user reloads the page, whey will always see updates they submitted themselves.
    - Solution: owner of the profile views data from the leader, other users view from replica. There are modifications,
      for example: if last update older than 1m -> view from replica.
- when reading from asynchronous followers is that user can see things moving back in time - happens when user makes
  several reads from different replicas
    - Solution: monotonic reads - guarantee stronger than eventual consistency, if user makes several reads in sequence,
      they will not see time go backward (never data after older data)
- consistent prefix reads - is a sequence of writes happens in certain order, then anyone reading those writes will see
  them appear in the same order
    - Solution: make sure that any writes that are casually related to each other are written to the same partition OR
      use algorithm that keep track of casual dependencies

When working with an eventual consistent system, it is worth thinking about how the application behaves if the
replication lag increases to several minutes or hours.

Multi-Leader Replication - more than one node accepting writes, each leader simultaneously acts as a follower to the
other leaders. It rarely makes sense to use a multi-leader setup within a single datacenter, because benefits rarely
outweigh the added complexity, however there are some situations in which this configuration makes sense:

- multi-datacenter operation - one leader in each datacenter, multi-leader setup requires conflict resolution mechanism
  which can be problematic. Multi-leader replication is often considered dangerous territory that should be avoided if
  possible.
- clients with offline operation - for example calendar app have to work even if it is disconnected from the internet,
  if you make changes while you are offline then they need to be synced with a server and all other devices. This
  basically means every device has a local database that acts as a leader. For example CouchDB is designed for this mode
  of operation.

- collaborative editing - multiple people editing the same document - e.g. Google Docs, very similar case to the
  previous one. If you want to guarantee that there will be no editing conflicts, the application must obtain a lock on
  the document before user can edit - this collaboration model is equivalent to single-leader replication with
  transaction on the leader.

Handling Write Conflicts:

- make the conflict detection synchronous - wait for the write to be replicated to all replicas before telling the user
  that write was successful
- avoid conflicts - all writes can go through the same leader, requests from particular user are always routed to the
  same datacenter and use the leader in that datacenter for writing and reading.
- each replica should converge toward consistent state
- custom conflict resolution - this might depend on the application, code might be executed on write or on read

Automatic Conflict Resolution - Amazon was frequently cited example of surprising effects due to conflict resolution
handler - customers were seeing items removed from the cart. Some ideas for automatic conflict resolution:

- conflict-free replicated datatypes - family of data structures that can be concurrently edited by multiple users
- merge-able persistent data structures - similar to GIT
- operational transformation - algorithm behind Google Docs - designed specifically for concurrent edits of an ordered
  list of items - e.g. string.

Replication topology describes the communication paths along which writes are propagated from one node to another (
circular, star, all-to-all).

Leaderless replication - the client sends directly its writes to several replicas, or coordinator node does this on
behalf of the client. When one node is down, some data might be down. For this reason when a client reads from the
database, it sends its requests to multiple replicas and uses data with the most version number. Eventually all the data
will be copied to every replica. 2 approaches with dealing with inconsistent data: whenever client notices inconsistency
or background process looking for differences in the data.

For example in DynamoDB it is possible to set minimum number of replicas that saved the data to mark write as valid.

It is important to monitor replication lag, even if your application can tolerate stale reads.

Dynamo-style databases allow several clients to concurrently write to the same key - this means potential conflicts!
Events may arrive in a different order at different nodes, due to network delays and partial failures (replicas might
store different values). In order to become eventually consistent, the replicas should converge toward the same value.
It is up to the developer to resolve conflict:

- last write wins - discard older values
- detecting happens-before operations (btw. two operations might be considered concurrent when they overlap in time, not
  necessarily at the same time)
- merge concurrently written values
- use version vectors - version number per replica and per key, each replica increments its own version number

## Chapter 6: Partitioning

Partitioning - breaking up the data into partitions (each piece of data belongs to exactly one partition). The main
reason for partitioning is scalability - different partitions can be placed on different nodes.

Partitioning is usually combined with replication. Copies of each partition are stored on multiple nodes. The goal with
partitioning is to spread the data and the query load evenly across nodes. If every node takes fair share, e.g. 10 nodes
should be able to handle 10x much data. If partitioning is unfair it is called skewed. Skew makes partitioning less
effective. In order to reduce skew data needs to be distributed evenly.

One way is to assign a continuous range of keys to each partition (PARTITION 1: A-B, PARTITION 2: C-D, ...). The ranges
of keys are not necessarily evenly spaced, for example majority of entries with letter A. Partition boundaries need to
be carefully selected by application developer with domain knowledge. Partitioning by data is problematic too - all
writes going to single partition, whereas remaining partitions are idle. For example, you could solve this issue by
partitioning first by name (for example sensor name) and then by the time, this will balance the load.

Skew can be reduced by using hash function that is evenly distributing data across partitions. The partition boundaries
can be evenly spaced or chosen pseudorandomly (consistent hashing).

Consistent Hashing - a way of evenly distributing load across an internet-wide system of caches such as CDN. Uses
randomly chosen partition boundaries to avoid the need for central control or distributed consensus.

Using hash of the key loses the ability to do efficient range queries (sort order lost).

Hashing a key can reduce hot spots, but can not reduce them entirely. For example celebrity on social media can cause
storm of activity - this may lead to many writes to the same key. It is up to application developer to handle hot spots

- e.g. add random prefix.

Secondary indexes are slightly more problematic because they don't identify a record uniquely. There are 2 main
approaches to partitioning a database with secondary indexes:

- document-based (local index) - each partition have (local) partitioned secondary indexes, this means reading requires
  extra care. I am looking for a red car - needs to scatter query to two partitions - quite expensive. However, widely
  used.

- term-based (global index) - instead of each partition having its own secondary index, we can construct a global index.
  A global index also needs to be partitioned - for example secondary key `color:red`, cars with names a-d on partition
  0, rest on partition 1. Reads are more efficient, writes are slower.

Data change in the database - throughput increases, dataset increases, machine can fail. Rebalancing - the process of
moving data from one node to another. After rebalancing data should be shared fairly between nodes, when rebalancing
database should remain available for writes and reads and only minimal amount of data should be moved between nodes.

DO NOT USE hash mod N when rebalancing between partitions. Problem with this approach is that number of nodes changes.
This requires moving more data than necessary when new node is added.

Better solution is to create fixed number of partitions (more partitions than the nodes, e.g. 10 nodes - 1000
partitions)
. If new node is added to the cluster, it can steal few partitions from the others. The only thing that changes is
partitions assignment. This is followed by for example by ElasticSearch (number of partitions set up at the beginning).
Choosing the right number of partitions is difficult.

Dynamic partitioning is suitable for key range partitioning.

Automatic rebalancing can be unpredictable, because it is expensive operation - rerouting requests and moving a large
amount of data, this can overload the network. For this reason it is a good approach to have human administrator
performing rebalancing.

How to route request to particular partition? How can system know where is data? This problem is known as service
discovery. System can keep track of the data in separate register. Another possibility is that client connects to any
node, if node can not serve the request, client is forwarded to another node.

## Chapter 7: Transactions

Overhead of transactions > lack of transactions and coding around the lack of transactions.

A transaction is a way for an application to group several reads and writes together into a logical unit. Either entire
transaction succeeds (commit) or fails (abort, rollback). If transaction fails, application can retry. With this error
handling is much simpler.

However sometimes it might be beneficial to weaken transactions or abandon them entirely (for higher availability).

The safety guarantees provided by transactions are often described by ACID acronym. Implementations of ACID might vary
between DBMSs.

- Atomicity - (atomic refers to something that can not be broken into smaller parts), if error happens in the middle of
  transaction, it has to be reverted. If a transaction was aborted, the application can be sure that it didn't change
  anything, so it can be safely retried. Perhaps "abortability" would have been a better term than atomicity.
- Consistency - (terribly overloaded term) in ACID - certain statements about the data must be always true (for example
  correct account balance in banking system). If a transaction starts with a database that is valid, any writes during
  the transaction preserve the validity.
- Isolation - most databases are accessed by several clients at the same time, if they are accessing the same database
  records, you can run into concurrency problems. Isolation means that concurrently executing transactions are isolated
  from each other, they can not step on each other's toes. The classic database textbooks define isolation as
  serialisability (however this is rarely sued because it has performance penalty).
- Durability - the promise that once a transaction has committed successfully, any data it has written will not be
  forgotten, even if there is a hardware fault or the database crash. Anyhow, perfect durability does not exist (for
  example all backup destroyed at the same time).

ACID databases are based on this philosophy: "if the database is in danger of violating its guarantee of atomicity,
isolation or durability, it would rather abandon the transaction entirely than allow it to remain half-finished".

Isolation make life easier by hiding concurrency issues. In reality serialisation is not that simple, it has performance
cost , therefore it so common for systems to use weaker levels of isolation, which protect against some concurrency
issues. Common wisdom: "Use ACID databases if you are handling financial data", however many popular relational
databases use weak isolation even though are considered ACID.

Read committed - most basic level of transaction isolation, makes 2 guarantees:

- no dirty reads - you will only see data that has been committed
- no dirty writes - you will only override data that has been committed

Snapshot isolation - read committed is not solving all the issues (for example non-repeatable reads - when you select
data in the middle of transaction). Data unavailable for few seconds is not a problem, more problematic are long-lasting
data inconsistencies. Read committed is a boon for long-running , read-only queries such as backups and analytics.
Transaction should see a consistent snapshot of the database, frozen at a particular point in time (so data is not
changing when it is being processed). Key principle of snapshot isolation is: readers never block writers and writers
never block readers.

FOR UPDATE tells the database to lock all rows returned by this query.

Serialisable isolation is usually regarded as the strongest isolation level. It guarantees that even though transactions
may execute in parallel, the end result is the same as if they had executed one at a time, serially, without any
currency. Database prevents all possible race conditions.

The simplest way of avoiding concurrency problems is to remove the concurrency entirely - one transaction at a time, in
serial order or a single thread.

Stored procedures gained bad reputation for various reasons: each db vendor has its own language for stored procedures,
code running in a database is difficult to manage (hard to debug, awkward to version and deploy, trickier to test),
badly written procedure may harm overall DB performance. Modern implementations of stored procedures have abandoned
PL/SQL and use existing general-purpose programming languages instead.

Serial execution of transactions makes concurrency control much simpler, but limits the transaction throughput of the
database to the speed of a single CPU core on a single machine. Simple solution is to partition the database, each CPU
core have its own partition. However, if partition needs to access multiple partitions, the database must coordinate
across all the partitions that it touches.

Serial execution is a viable way of achieving serialisable isolation within certain constraints:

- every transaction must be small and fast
- write throughput must be low enough to be handled on a single CPU core
- cross-partition transactions are possible, but there is a hard limit to the extent to which they can be used

2PL - Two-Phase Locking - widely used algorithm for serialisability in databases. Similar to "no dirty writes" - if two
transactions concurrently try to write the same object, the lock ensure that the second writer must wait until the first
one has finished its transactions before it may continue. More specifically:

- If transaction A reads and B wants to write - B must wait until A commits or aborts
- If A writes and B wants to read, B must wait until A commits or aborts

2PL - writers don't just block other writes, they also block readers and vice-versa. The big downside of 2PL is
performance - worse throughput and response times comparing to weak isolation (because of overhead of acquiring and
releasing locks). Also called a "pessimistic concurrency control mechanism" - better to wait until situation is safe
before doing anything.

SI - Serialisable Snapshot Isolation - full serialisability with small performance penalty compared to snapshot
isolation. Very young technology - 2008. Called "optimistic concurrency control technique". Instead of blocking
potentially dangerous transactions, it allows them to work, hoping everything will turn out all right. When transaction
wants to commit, database checks if everything is fine. It performs badly in high contention (many transactions
accessing the same object) - many of transactions need to be aborted.

Reads from the database are made based on snapshot isolation.

## Chapter 8: The Trouble with Distributed Systems

Anything that can go wrong, will go wrong. Working with distributed systems is fundamentally different from writing
software on a single computer. When writing software that runs on several computers, connected by a network, the
situation is fundamentally different.

Partial failure - some parts of the system are broken in an unpredictable way. Partial failures are nondeterministic.
Nondeterminism and partial failures is what makes distributed systems hard to work with.

High-performance computing - supercomputers with thousands of CPUs are used for computationally intensive scientific
computing tasks, such as whether forecasting or molecular dynamics.

Cloud computing - often associated with multi-tenant data centres, commodity computers connected with an IP network,
on-demand resource allocation and metered billing.

Traditional enterprise data centres are somewhere between these two extremes.

If we want to make distributed systems work, we must accept the possibility of partial failure and build fault-tolerance
mechanisms into the software. We need to build a reliable system from unreliable mechanisms (like communication over the
internet, network may fail, bits might be lost, however it somehow works, engineers managed to build something reliable
basing on unreliable foundations).

What can go wrong when sending a request:

- request may be lost
- request might be waiting in a queue and will be delivered later
- remote node may have failed or temporarily stopped responding
- request might have been processed but was lost on a way back or was delayed or will be delivered later

Network problems can be surprisingly common, even in controlled environments like a datacenter operated by one company (
even 12 network faults per month in a medium-sized datacenter, half of them disconnected a single machine and a half an
entire rack). EC2 is notorious for having frequent transient network glitches.

Many systems need to automatically detect fault nodes, for example: load balancer needs to stop sending requests to a
node that is dead. Unfortunately it is hard to tell whether a node is working or not.

Timeout is the only sure way of detecting a fault. Appropriate duration of timeout is difficult to estimate. The
telephone network uses circuit - a fixed, guaranteed amount of bandwidth between 2 callers. On the other hand TCP
dynamically adapts the rate of data transfer to the available network capacity. TCP is optimised for busy networks,
circuit would not work for internet's use case.

Clocks and time is important, in distributed systems we never know the delay between send and received.

Time-of-day clocks - returns current date and time according to some calendar. Usually synchronised with NTP (Network
Time Protocol). Time-of-day clocks are unsuitable for measuring time (clock might be reset during measurement, because
it was desynchronised).

Monotonic clocks - suitable for measuring elapsed time, they are guaranteed to always move forward (time-of-day clock my
jump back in time). NTS may adjust monotonic clock frequency if it discovers it is too slow or too fast.

Software must be designed on the assumption that the network will occasionally be faulty, and the software must handle
such faults gracefully.

> Distributed systems are different from programs running on a single computer - there is no shared memory, only massage
> passing through unreliable network with variable delays and the systems may suffer from partial failures, unreliable
> clocks and processing pauses.

There are algorithms designed to solve distributed systems problems:

- synchronous model - assumes bounded network delay, process pauses and clock error, this means you know the delay, and
  it will not exceed some fixed value. This model is not realistic
- partially synchronous system - system behaves like a synchronous most of the time, but sometimes exceeds the bounds
  for network delay, process pauses and clock drift
- asynchronous model - any timing assumptions are not allowed
- crash-stop faults - algorithm may assume that a node can fail in only one way - by crashing, once crashed never comes
  back
- crash-recovery faults - node can fail at any moment, but has some nonvolatile disk storage that is preserved across
  crashes
- byzantine faults - nodes may do absolutely anything, including trying to trick and deceive other nodes

Partially synchronous and crash-recovery faults are the most common models.

Safety of a system - nothing bad happens, liveness of a system - something good eventually happens. These two are often
used for reasoning about the correctness of a distributed algorithm.

## Chapter 9: Consistency and Consensus

Tolerating faults - keeping the service functioning correctly, even if some internal component is faulty. The best way
of building fault-tolerant systems is to find some general-purpose abstractions with useful guarantees (e.g.
transactions).

When working with a database that provides only weak guarantees (e.g. eventual consistency), you need to be constantly
aware of its limitations (e.g. when you write and immediately read there is no guarantee that you will see the value you
just wrote).

LINEARIZABILITY (atomic consistency, strong consistency, immediate consistency) - is to make a system appear as if there
were only one copy of the data and all operations are atomic.

Easily confused with serialisability (both mean something like "can be arranged in a sequential order"):

- Serialisability - an isolation property of transactions, it guarantees that transactions behave the same as if they
  had executed in some serial order.
- Linearisability - a recency guarantee on reads and writes of a register, it doesn't group operations together into
  transactions, so does not prevent problems like write skew.

Use cases for linearisability:

- locking and leader election - a single-leader system needs to ensure there is indeed only one leader, not several (
  split brain) - it must be linearlisable, all nodes must agree which node owns the lock.
- constraints and uniqueness guarantees - for example unique usernames - you need linearisability. Hard uniqueness
  constraint requires linearisability.
- cross-channel timing dependencies - multiple components in a system can communicate which opens a possibility for race
  conditions

CAP theorem has been historically influential but nowadays has little practical value for designing systems. Better way
of paraphrasing CAP would be "either consistent or available when partitioned".

ORDERING GUARANTEES.

Causality imposes an ordering on the events (what happened before what) - question comes before answer, a message is
sent before it is received, ... These chains of casually dependent operations define the casual order in the system. If
system obeys the ordering imposed by causality, we say it is causally consistent.

Linearisability ensures causality. However, it is not the only way of preserving causality - system can be causally
consistent without incurring the performance (the strongest possible consistency model that does not slow down due to
network errors).

Sequence Number Ordering - sequence numbers or timestamps (not really time-of-day clock, but some logical clock) used to
order events. If there is not a single leader it is less clear how to generate sequence numbers for operations:

- each node can generate its own independent sequence number + node ID
- attach timestamp to each operation
- preallocate blocks of sequence numbers (1-1000 for node A, 1001-2000 for node B, ...)

Methods above allow generating unique sequence numbers efficiently, but do not capture correctly the ordering of
operations across different nodes.

Lamport timestamp - method for generating sequence numbers that is consistent with causality. Every node keeps track of
the maximum counter value it has seen so far, and includes that maximum on every request. Each node appends its node ID
to the final counter, if 2 counter values are the same, higher node ID wins.

The goal to get several nodes to agree on something is not easy, examples:

- leader election - lack of communication may lead to split brain (multiple nodes believe themselves to be the leader)
- atomic commit - in a system that supports transactions spanning several nodes, transaction may fail on some nodes but
  succeed on others (all nodes have to agree on the outcome - abort or accept)

The Impossibility of Consensus - there is no algorithm that is always able to reach consensus if there is a risk that a
node may crash, in a distributed system we must assume that node may crash, so reliable consensus is impossible.

Two-phase locking is an algorithm for achieving atomic transaction commit across multiple nodes (all commit or all
abort). 2 phases:

- the coordinator begins phase 1 - it sends prepare to each of the nodes, asking whether they are able to commit
- the coordinator tracks the responses from the participants, if all say yes - the coordinator sends out a commit
  request, if any of the participant say no - the coordinator sends an abort request to all nodes

This is very similar to wedding ceremony in Western cultures. If the decision was to commit there is no going back, no
matter how many retries it takes. The protocol has 2 crucial points of no return. If the coordinator dies, the nodes
should communicate and come to some agreement. 2PC has bad reputation because of operational problems, low performance
and promising more than it can deliver.

## Chapter 10: Batch Processing

> A system cannot be successful if it is too strongly influenced by a single person. Once the initial design is complete
> and fairly robust, the real test begins as people with many viewpoints undertake their own experiments.

3 types of systems:

- services (online systems) - a service waits for a request or instruction from a client to arrive, when received, the
  service tries to serve it as quickly as possible.
- batch processing (offline systems) - system takes a large amount of input data, runs a job to process it and produces
  some output data. Batch jobs are often scheduled to run periodically. The primary performance measure is throughput.
- stream processing systems (near-real-time systems) - something between online and offline systems. A stream processor
  consumes inputs and produces outputs (rather than responding to request).

Simple Batch Processing can be performed in UNIX via awk, grep and other command line tools (using a chain of commands).

The Unix Philosophy - the idea of connecting programs with pipes. This is possible because of common interface (programs
operating on file descriptors) of programs, which are small and are doing one thing.

The biggest limitation of UNIX tools is that they run only on a single machine and that is where tools like Hadoop come
in.

MapReduce is a bit like Unix tools, but distributed across potentially thousands of machines. MapReduce jobs read and
write files on a distributed filesystem, in Hadoop's implementation of MapReduce the filesystem is called HDFS (Hadoop
Distributed File System - reimplementation of the Google File System).

HDFS is based on the shared-nothing principle. HDFS consists of a daemon process running on each machine, exposing a
network service that allows other nodes to access files stored on that machine. In order to tolerate machine and disk
failures, file blocks are replicated on multiple machines.

To create a MapReduce job, you need to implement 2 callback functions:

- mapper - called once for every inout record, its job is to extract the key and value from the input record.
- reducer - the framework takes the key-value pairs produced by the mapper, collects all the values belonging to the
  same key and calls the reducer with an iterator over collection of values.

Principle:

> Put the computation near the data

it saves copying the input file over the network, reducing network load and increasing locality.

In order to achieve good throughput in a batch processing, the computation must be as much as possible local in one
machine.

HDFS is somewhat like a distributed version of UNIX, where HDFS is the filesystem and MapReduce is a quirky
implementation of a UNIX process. When MapReduce was published it was not all new. Some concepts were already known -
e.g. massively parallel processing databases. Hadoop vs Distributed Databases:

- databases require you to structure data according to particular model, whereas files in a distributed filesystem are
  just byte sequences. Hadoop opened up the possibility of indiscriminately dumping data into HDFS and later figuring
  out how to process it further. MPP databases require careful, up-front modeling of the data. The Hadoop has often been
  used for implementing ETL processes, MapReduce jobs are written to clean up the data, transform it into a relational
  form and import it into an MPP data warehouse for analytic purposes.
- MPP databases are great because they take care of storage, query planning and execution, moreover they use SQL -
  powerful query language. On the other hand not all kinds of processing can be sensibly expressed as SQL queries (
  recommendation systems, full-text search or image analysis). MapReduce gave the engineers the ability to easily run
  their own code over large datasets.
- MPP databases and MapReduce took different approach to handling faults and the use of memory and disk. Natch processes
  are less sensitive to faults than online systems, because they do not immediately affect users if they fail, and they
  always can be run again. If a node fails, most MPP databases abort the entire query, MapReduce can tolerate the
  failure of a map or reduce task. MapReduce dumps partial results to the disk, so they can be restored after failure.
  MPP databases are more willing to store data in the memory for faster access. MapReduce is designed to tolerate
  frequent unexpected task termination, not because hardware is unreliable, it is because the freedom to arbitrarily
  terminate processes enables better resource utilisation in a computing cluster (Google came up wit this idea, this
  design was designed by their resource usage).

MapReduce is just one of many possible programming models for distributed systems. MapReduce has problems with *
materialisation* of the data - the process of writing out intermediate state files. Several new execution engines for
distributed batch processing were developed in order to fix this problem with MapReduce (data-flow engines) - Spark,
Tez, Flink. Dataflow engines provide several options for connecting one operator's output to another's input - sort by
key, tak several inputs and to partition them, but skip the sorting, for broadcast hash joins, the same output from one
operator can be sent to all partitions of the join operator.

Systems like Dryrad and Nephele offer several advantages compared to MapReduce model:

- expensive work (e.g. sorting) only performed in places where it is actually required
- no unnecessary map tasks
- intermediate state between operators kept in memory or written to local disk
- operators can start executing as soon as their input is ready
- existing JVMs can be reused to run new operators

Fully materialised intermediate state to a distributed filesystem makes fault tolerance fairly easy in MapReduce. Spark,
FLink and Tes avoid writing intermediate state to HDFS.

MapReduce - is like writing the output of each command to a temporary file.

Dataflow engines look like much more like UNIX pipes (final result still might be saved to HDFS).

High level APIs like Hive, Pig, Cascading and Crunch became popular because programming MapReduce jobs is quite
laborious.

## Chapter 11: Stream Processing

> Complex systems always evolve from simple system that works. A complex system designed from scratch never works and
> cannot be made to work.

Batch processing must artificially divide data into chunks of fixed duration (for example: processing a day's worth of
data at the end of every day). The problem with daily batch processes is that changes in the input are only reflected in
the output a day later, which is too slow for many impatient users. Delay can be reduced by running the processing more
frequently.

Stream processing - processing every event as it happens. "Stream" refers to data that is incrementally made available
over time.

Event - a small, self-contained, immutable object containing the details of something that happened at some point in
time. An event usually contains a timestamp indicating when it happened (according time-of-day clock). Related events
are usually grouped together into a topic or stream.

Polling the datastore to check for events that have appeared since it last ran becomes expensive if the datastore is not
designed for this kind of usage. It is better for consumers to be notified when new events appear.

Common approach for notifying consumers about new events is to use a messaging system - producer sends a message
containing the event, which is then pushed to consumers.

Direct messaging - direct communication between producers and consumers without going via intermediary nodes. Brokerless
libraries: ZeroMQ, nanomsg - pub-sub messaging over TCP or IP multicast. StatsD and Brubeck use unreliable UDP messaging
for collecting metrics from all machines on the network and monitoring them. Webhooks - a pattern in which a callback
URL of one service is registered with another service, and it makes a request to that URL whenever an event occurs.

Message brokers - kind of database, that is optimised for handling message streams. It runs as a server, with producers
and consumers connecting to it as clients. Producers write messages, consumers receive them by reading them from the
broker. By centralising the data in the broker, these systems can more easily tolerate clients that come and go. A
consequence of queueing is also that consumers are generally asynchronous: when a producer send a message it normally
only waits for the broker to confirm that it has buffered the message, and it does not wait for the message to be
consumed.

Multiple consumers - when multiple consumers read messages in the same topic, two main patterns of messaging are used:

- load balancing - each message is delivered to one of the consumers, so the consumers can share the work of processing
  the messages in the topic. This pattern is useful then the messages are expensive to process, and you want to bale to
  add consumers to parallelize the processing.
- fan-out - each message is delivered to all the consumers, equivalent of having several batch jobs that read the same
  input file.

Message brokers use acknowledgements: a client must explicitly tell the broker when it has finished processing a message
so that the broker can remove it from the queue.

Messages can go out of order because for example network problem and lack of acknowledgement.

Log-based message brokers - durable storage approach of databases combined with the low-latency notification facilities
of messaging. A log is simply an append-only sequence of records on disk. A producer can send a message by appending it
to the end of the log and a consumer can receive message by reading the log sequentially.

In order to scale to higher throughput that a single disk can offer, the log can be partitioned. Different partitions
can be hosted on different machines. A topic can then be defined as a group of partitions that carry messages of the
same type.

Apache Kafka, Amazon Kinsesis Streams and Twitter's DistributedLog are log-based message brokers. Google Pub/Sub is
architecturally similar but exposes a JMS-style API rather than log abstraction.

Even though these message brokers write all messages to disk, they are able to achieve throughput of millions of
messages per second by partitioning across multiple machines.

Log-based approach trivially supports fan-out messaging.

Change Data Capture - the process of observing all data changes written to a database and extracting them in a form in
which they can be replicated to other systems. You can capture the changes in a database and continually apply the same
changes to search index.

Event Sourcing - involves storing all changes to the application state as log of change events. Events are designed to
reflect things that happened at the application level, rather than low-level state changes. Powerful technique for data
modeling: from an application point of view it is more meaningful to record the user's actions as immutable events,
rather than recording the effect of those actions on a mutable database: "student cancelled their course enrolment" vs "
one entry was deleted from the enrolments table". Event Store is a specialised database to support applications using
event sourcing.

Applications that use event sourcing typically have some mechanism for storing snapshots of the current state that is
derived from the log of events, so they don't need to repeatedly reprocess the full log.

CQRS - Command Query Responsibility Segregation - separating the form in which data is written from the form it is read,
by allowing several read views.

Streams can be used to produce other, derived streams. Stream processing has long been used for monitoring purposes:
fraud detection, trading system examining price changes, machines monitoring, monitoring in military.

Complex Event Processing (CEP) - an approach developed in the 1990s for analysing event streams, especially geared
toward the kind of application that requires searching for certain event patterns. CEP allows you to specify rules to
search for certain patterns of events in a stream. CEP systems use a high-level declarative query language like SQL or
GUI.

Stream processing is used also for analytics on streams, boundary between CEP and stream analytics is blurry.
Frameworks: Apache Storm, Spark Streaming, Flink, Concord, Samza, Kafka Streams, Google Cloud Dataflow, Azure Stream
Analytics.

Types of time windows:

- tumbling windows - has a fixed length, and every event belongs to exactly one window. Fo example 1-minute tumbling
  window, events with timestamp between 10:03:00 and 10:03:59 are grouped into one window.
- hopping window - has a fixed length, but allows windows to overlap in order to provide some smoothing.
- sliding window - contains all the events that occur within some interval of each other. For example. a 5-minute
  sliding window would cover events at 10":03:39 and 10:08:12 because they are less than 5 minutes apart.
- session window - has no fixed duration, instead it is defined by grouping together all events for the same user that
  occur closely together in time, and the window ends when the user has been inactive for some time.

Types of stream joins:

- stream-stream join (window join) - you need to choose a suitable window for the join (seconds, days weeks between
  events), also be careful about ordering of received events.
- stream-table join (stream enrichment) - to perform this join, the stream process needs to look at one activity event
  at a time, look up something in the database (local or remote)
- table-table join (materialised view maintenance) - twitter example: when user wants to see their feed, it is too
  expensive to load all profiles' most recent tweets, instead we want a timeline cache, so reading is a simple lookup.
  To implement cache maintenance (append to cache new tweets, remove deleted, ...) you need streams of events for
  tweets.

If events on different streams happen around a similar time, in which order they are processed? If the ordering of
events across streams is undetermined, the join becomes nondeterministic, which means you cannot rerun the same job on
the same input and get the same result. In data warehouses, this issue is known as a slowly changing dimension (SCD). It
is often addressed by using a unique identifier for a particular version of the joined record.

Batch processing frameworks can tolerate faults fairly easily. In stream processing, fault tolerance is less
straightforward to handle. Possible approaches:

- microbatching and checkpointing - break the stream into small blocks, and treat each block like a miniature batch
  process (used in Spark Streaming, batch approx. 1 second long). Apache Flink periodically generate rolling checkpoints
  of state and write them to durable storage.
- atomic commit revisited - in order to give the appearance of exactly-once processing in the presence of faults, we
  need to ensure that all outputs and side effects of processing take effect if and only if the processing is
  successful. Exactly-once message processing in the context of distributed transactions and two-phase commit.
- idempotence - our goal is to discard the partial output of any failed tasks so that they can be safely retried without
  taking effect twice. Distributed transactions are the one way of achieving this, but another way is to rely on
  idempotence. An idempotent operation is one that you can perform multiple times, and it has the same effect as if you
  performed it only once (e.g. setting key in a key-value store, incrementing counter value is not idempotent). Even if
  an operation is not naturally idempotent, it can often be made idempotent with a bit of extra metadata.

## Chapter 12: The Future of Data Systems

The lambda architecture - incoming data should be recorded by appending immutable events to an always-growing dataset,
similarly to event sourcing. From these events, read-optimised views are derived. The lambda architecture proposes
running two different systems in parallel. In the lambda approach, the stream processor consumes the events and quickly
produces an approximate update to the view, the batch processor later consumes the same set of events and produces a
corrected version of derived view.

Federated databases - unifying reads - it is possible to provide a unified query interface to a wide variety of
underlying storage engines and processing methods - an approach known as a federated database or polystore.

Unbundled databases - unifying writes - making it easier to reliably plug together storage systems is like unbundling a
database's index-maintenance features in a way that can synchronise writes across disparate technologies.

Hardware is not quite the perfect abstraction that it may seem. Random bit-flips are very rare on modern hardware but
can happen. Even software lik MySQL or PostgreSQL can have bugs.

Large scale storage systems like HDFS or Amazon S3 do not fully trust disks: they run background processes that
continually read back files, compare them to other replicas and move files from one disk to another, in order to
mitigate the risk of silent corruption.

ACID databases has led us toward developing applications on the basis of blindly trusting technology. Since the
technology we trusted worked well enough most time, auditing mechanisms were deemed worth the investment.

Having continuous end-to-end integrity checks gives you increased confidence about the correctness of your systems,
which in turn allows you to move faster (like automated testing software).

It is not sufficient for software engineers to focus exclusively on the technology and ignore its ethical consequences.
Users are humans and human dignity is paramount.

Algorithmic prison - systematically being excluded from jobs, air travel, insurance coverage, property rentals,
financial services, ... because algorithm said NO. In countries that respect human rights, the criminal system presumes
innocence until proven guilty, on the other hand automated systems can systematically exclude a person from
participating in society without any proof of guilt and with little chance of appeal.

Decisions made by an algorithm are not necessarily any better or worse than those made by a human. Every person is
likely to have biases. In many countries, anti-discrimination laws prohibit treating people differently depending on
protected traits (ethnicity, age, gender, sexuality, disability, beliefs).

Automated decision-making opens the question of responsibility and accountability. Who is responsible if self-driving
car causes an accident?

Besides, the problems of predictive analysis, there are ethical problems with data collection itself. Though experiment,
whenever you see "data" (e.g. data driven company), replace it with the word surveillance (e.g. surveillance driven
company). Even the most totalitarian and oppressive regimes could only dream of putting a microphone in every room and
forcing every person to constantly carry a device capable of tracking their location and movements.

Declining to use a service due to its tracking of users is only an option for the small number of people who are
privileged enough to have the time and knowledge to understand its privacy policy and who are can afford to potentially
miss out on social participation opportunities.

When collecting data, we need to consider not just today's political environments, but all possible future governments.  


================================================
FILE: books/docker-deep-dive.md
================================================
[go back](https://github.com/pkardas/learning)

# Docker Deep Dive

Book by Nigel Poulton

================================================
FILE: books/elixir.md
================================================
[go back](https://github.com/pkardas/learning)

# Elixir in Action

Book by Saša Jurić


================================================
FILE: books/fundamentals-of-architecture.md
================================================
[go back](https://github.com/pkardas/learning)

# Fundamentals of Software Architecture: An Engineering Approach.

Book by Mark Richards and Neal Ford

- [Preface: Invalidating Axioms](#preface-invalidating-axioms)
- [Chapter 1: Introduction](#chapter-1-introduction)
- [Chapter 2: Architectural thinking](#chapter-2-architectural-thinking)
- [Chapter 3: Modularity](#chapter-3-modularity)
- [Chapter 4: Architecture Characteristics Defined](#chapter-4-architecture-characteristics-defined)
- [Chapter 5: Identifying Architectural Characteristics](#chapter-5-identifying-architectural-characteristics)
- [Chapter 6: Measuring and Governing Architecture Characteristics](#chapter-6-measuring-and-governing-architecture-characteristics)
- [Chapter 7: Scope of Architecture Characteristics](#chapter-7-scope-of-architecture-characteristics)
- [Chapter 8: Component-Based Thinking](#chapter-8-component-based-thinking)
- [Chapter 9: Foundations](#chapter-9-foundations)
- [Chapter 10: Layered Architecture Style](#chapter-10-layered-architecture-style)
- [Chapter 11: Pipeline Architecture Style](#chapter-11-pipeline-architecture-style)
- [Chapter 12: Microkernel Architecture Style](#chapter-12-microkernel-architecture-style)
- [Chapter 13: Service-Based Architecture Style](#chapter-13-service-based-architecture-style)
- [Chapter 14: Event-Driven Architecture Style](#chapter-14-event-driven-architecture-style)
- [Chapter 15: Space-Driven Architecture Style](#chapter-15-space-driven-architecture-style)
- [Chapter 16: Orchestration-Driven Service-Oriented Architecture](#chapter-16-orchestration-driven-service-oriented-architecture)
- [Chapter 17: Microservices Architecture](#chapter-17-microservices-architecture)
- [Chapter 18: Choosing the Appropriate Architecture Style](#chapter-18-choosing-the-appropriate-architecture-style)
- [Chapter 19: Architecture Decisions](#chapter-19-architecture-decisions)
- [Chapter 20: Analyzing Architecture Risk](#chapter-20-analyzing-architecture-risk)
- [Chapter 21: Diagramming and Presenting Architecture](#chapter-21-diagramming-and-presenting-architecture)
- [Chapter 22: Making Teams Effective](#chapter-22-making-teams-effective)
- [Chapter 23: Negotiation and Leadership Skills](#chapter-23-negotiation-and-leadership-skills)
- [Chapter 24: Developing a Career Path](#chapter-24-developing-a-career-path)
- [Self-Assessment Questions](#self-assessment-questions)

## Preface: Invalidating Axioms

> Axiom - A statement or proposition which is regarded as being established, accepted, or self-evidently true.

Software architects (like mathematicians) also build theories atop axioms (but the software world is _softer_ than
mathematics).

Architects have an important responsibility to question assumptions and axioms left over from previous eras. Each new
era requires new practices, tools, measurements, patterns, and a host of other changes.

## Chapter 1: Introduction

The industry does not have a good definition of software architecture.

> Architecture is about the important stuff... whatever that is ~ Ralph Johnson

The responsibilities of a software architect encompass technical abilities, soft skills, operational awareness, and a
host of others.

When studying architecture - keep in mind that everything can be understood in context - why certain decisions were
made, was based on the realities of the environment (for example building microservice architecture in 2002 would be
inconceivably expensive).

Knowledge of the architecture structure, architecture characteristics, architecture decisions, and design principles is
needed to fully understand the architecture of the system.

- structure/style: microservices, layered, microkernel, ...
- characteristics: availability, reliability, scalability, fault tolerance, security, ...
- decisions: what is and what is not allowed, rules for a system how it should be constructed
- design principles: guidelines for constructing systems -- leverage async messaging between services to increase
  performance

Expectations of an architect:

- make architecture decisions
    - instead of making technical decisions (use React.js), instruct development teams (use a reactive-based framework)
- continually analyze the architecture
    - validate decisions made years ago in order to prevent structural decay
- keep current with the latest trends
    - the decisions an architect makes tend to be long-lasting and difficult to change. understanding and following key
      trends helps the architect prepare for the future
- ensure compliance with decisions
    - continually verify that development teams are following the architecture decisions and design principles defined
- diverse exposure and experience
    - an architect should be at least familiar with a variety of technologies, effective architect should be aggressive
      in seeking out opportunities to gain experience in multiple languages, platforms and technologies
- have business domain knowledge
    - without business knowledge, an architect cannot communicate with stakeholders and business users and will quickly
      lose credibility
- possess interpersonal skills
    - interpersonal skills, including teamwork, facilitation, and leadership
    - engineers love to solve technical problems, however G. Weinberg said: "no matter what they tell you, it is always
      a people problem"
    - many architects are excellent technologists, but are ineffective architects because of poor communication skills
- understand and navigate politics
    - have negotiation skills, almost every decision an architect makes will be challenged

> All architectures become iterative because of _unknown unknowns_. Agile just recognizes this and does it sooner.

Iterative process fits the nature of software architecture. Trying to build a modern system such as microservices using
Waterfall will find a great deal of friction.

Nothing remain static. What we need is _evolutionary architecture_ - mutate the solution, evolve new solutions
iteratively. Adopting Agile engineering practices (continuous integration, automated machine provisioning, ...) makes
building resilient architectures easier.

Agile methodologies support changes better than planning-heavy processes because of tight feedback loop.

Laws of Software Architecture:

- Everything in software architecture is a trade-off
- If an architect thinks they have discovered something that isn't a trade-off, more likely they just haven't identified
  the trade-off yet
- Why is more important than how

## Chapter 2: Architectural thinking

4 main aspects of thinking like an architect:

1. understanding the difference between architecture and design
    - architecture vs design
        - architecture: defining architecture characteristics, selecting architecture patterns, creating components
        - design: class diagrams, user interface, code testing and development
    - architects and development teams have to form strong bidirectional relationship, be on the same virtual team
    - where does architecture end and design begin? nowhere
    - architecture and design must be synchronized by tight collaboration
2. wide breadth of technical knowledge
    - developer - significant amount of technical depth
        - specialised in languages, frameworks and tools
    - architect - significant amount of technical breadth
        - broad understanding of technology and how to use it to solve particular problems
3. understanding, analyzing, and reconciling trade-offs between various solutions and technologies
    - thinking like an architect is all about seeing trade-offs in every solution
    - the ultimate answer for architectural questions: _it depends on ..._ (budget, business env, company culture, ...)
    - look at the benefits of a given solution, but also analyze the negatives
    - analyze trade-offs and the ask, what is more important, this decision always depend on the environment
4. understanding the importance of business drivers
    - business drivers are required for the success of the system
    - understanding the domain knowledge and ability to translate those requirements into architecture characteristics

_Frozen Caveman Anti-Pattern_: describes an architect who always reverts to their pet irrational concern for every
architecture. This anti-pattern manifests in architects who have been burned in the past by a poor decision/unexpected
occurrence, making them particularly cautious in the future.

How an architect can remain hands-on coding skills?

- do frequent proof-of-concepts
- whenever possible, write best production-quality code (even when doing POCs) -- POC code often remains in the
  repository and becomes the reference or guiding example
- tackle technical debt stories or architecture stories, freeing the development team up to work on the critical
  function user stories
- work on bug fixes
- create simple command-line tools and analyzers to help the development team with their day-to-day tasks
- do code reviews frequently

## Chapter 3: Modularity

Modularity is an organizing principle. If an architect designs a system without paying attention to how the pieces wire
together, they end up creating a system that presents myriad difficulties.

Developers typically use modules as a way to group related code together. For discussions about architecture, we use
modularity as a general term to denote a related grouping of code: classes, functions, or any other grouping.

_Cohesion_ - refers to what extent the parts of a module should be contained within the same module. It is a measure of
how related the parts are to one another.

_Abstractness_ is the ratio of abstract artifacts to concrete artifacts. It represents a measure of abstractness versus
implementation. A code base with no abstractions vs a code base with too many abstractions.

## Chapter 4: Architecture Characteristics Defined

Architects may collaborate on defining the domain or business requirements, but one key responsibility entails defining,
discovering, and analyzing all the things the software must do that isn't directly related to the domain functionality
-- architectural characteristics.

Operational Architecture Characteristics:

- Availability - how long the system will need to be available
- Continuity - disaster recovery capability
- Performance - stress testing, peak analysis
- Recoverability - how quickly is the system required to be on-line again?
- Reliability - if it fails, will it cost the company large sums of money?
- Robustness - ability to handle error and boundary conditions while running
- Scalability - ability for the system to perform and operate as the number of users/requests increases

Structural Architecture Characteristics

- Configurability - ability for the end users to easily change aspects of the software's configuration
- Extensibility - how important it is to plug new pieces of functionality in
- Installability - ease of system installation on all necessary platforms
- Localization - support for the multiple languages, currencies, measures
- Maintainability - how easy it is to apply changes and enhance the system?
- Portability - does the system need to run on more than one platform?
- Supportability - what level of technical support is needed by the application?
- Upgradeability - ability to quickly upgrade from a previous version

Cross-cutting Architecture Characteristics

- Accessibility - access to all users, including those with disabilities
- Archivability - will the data need to be deleted/archived?
- Authentication - security requirements to ensure users are who they say they are
- Authorization - security requirements to ensure users can access only certain functions within application
- Legal - what legislative constraints is the system operation in?
- Privacy - ability to hide transactions from internal company employees
- Security - does the data need to be encrypted in the database, what type of authentication is needed...?
- Supportability - what level of technical support is needed by the application?
- Usability - level of training required for users to achieve their goals with the app

Any list of architecture characteristics will be an incomplete list. Any software may invent important architectural
characteristics based on unique factors. Many of the terms are imprecise and ambiguous. No complete list of standards
exists.

Applications can support only a few of the architecture characteristics we have listed. Firstly, each of the supported
characteristics requires design effort. Secondly, each architecture characteristic often has an impact on others.
Architects rarely encounter the situation where they are able to design a system and maximize every single architecture
characteristics.

> Never shoot for the best architecture, but rather _the least worst_ architecture.

Too many architecture characteristics lead to generic solutions that are trying to solve every business problem, and
those architectures rarely work because the design becomes unwieldy. Architecture design should be as iterative as
possible.

## Chapter 5: Identifying Architectural Characteristics

Identifying the correct architectural characteristics for a given problem requires an architect to not only understand
the domain problem, but also collaborate with the problem domain stakeholders to determine what is truly important from
a domain perspective.

_Extracting architecture characteristics from domain concerns_: translate domain concerns to identify the right
architectural characteristics. Do not design a generic architecture, focus on short list of characteristics. Too many
characteristics leads to greater and greater complexity. Keep design simple. Instead of prioritizing characteristics,
have the domain stakeholders select the top 3 most important characteristics from the final list.

Translation of domain concerns to architecture characteristics:

- Mergers and acquisition -> Interoperability, scalability, adaptability, extensibility
- Time to market -> Agility, testability, deployability
- User satisfaction -> Performance, availability, fault tolerance, testability, deployability, agility, security
- Competitive advantage -> Agility, testability, deployability, scalability, availability, fault tolerance
- Time and budget -> Simplicity, feasibility

_Extracting architecture characteristics from requirements_: some characteristics come from explicit statements in
requirements.

Architecture Katas - in order te become a great architect you need a practice. The Kata exercise provides architects
with a problem stated in domain terms (description, users, requirements) and additional context. Small teams work 45
minutes on a design, then show results to the other groups, who vote on who came up with the best architecture. Team
members ideally get feedback from the experienced architect abut missed trade-offs and alternative designs.

Explicit characteristics - appear in a requirements' specification, e.g. support for particular number of users.

Implicit characteristics - characteristics aren't specified in requirements documents, yet they make up an important
aspect of the design, e.g. availability - making sure users can access the website, security - no one wants to create
insecure software, ...

Architects must remember: there is no best design in architecture, only a least worst collection of trade-offs.

## Chapter 6: Measuring and Governing Architecture Characteristics

- They aren't physics - many characteristics have vague meanings, the industry has wildly differing perspectives
- Wildly varying definitions - different people may disagree on the definition, without agreeing on a common definition,
  a proper conversation is difficult
- Too composite - many characteristics compromise may others at a smaller scale

Operational measures: obvious direct measurements, like performance -- measure response time. High-level teams don't
just establish hard performance numbers, they base their definitions on statistical analysis.

Structural measures: addressing critical aspects of code structure, like cyclomatic complexity - the measurement for
code complexity, computed by applying graph theory to code.

> Overly complex code represents a code smell. It hurts almost every of the desirable characteristics of code bases
> (modularity, testability, deployability, ...). Yet if teams don't keep an eye on gradually growing complexity,
> that complexity will dominate the code base.

Process measures: some characteristics intersect with software development processes. For example, agility can relate to
the software development process, ease of deployment and testability requires some emphasis on good modularity and
isolation at the architecture level.

Governing architecture characteristics - for example, ensuring software quality within an organization falls under the
heading of architectural governance, because it falls within the scope of architecture, and negligence can lead to
disastrous quality problems.

_Architecture fitness function_ - **any mechanism** that provides an objective integrity assessment of some architecture
characteristic or combination of architecture characteristics. Many tools may be used to implement fitness functions:
metrics, monitors, unit tests, chaos engineering, ...

Rather than a heavyweight governance mechanism, fittness functions provide a mechanism for architects to express
important architectural principles and automatically verify them. Developer know that they shouldn't release insecure
code, but that priority competes with dozens or hundreds of other priorities for busy developers. Tools like the
Security Monkey, and fitness functions generally, allow architects to codify important governance checks into the
substrate of the architecture.

## Chapter 7: Scope of Architecture Characteristics

When evaluating many operational architecture characteristics, an architect must consider dependent components outside
the code base that will impact those characteristics.

_Connascence_ - Two components are connascent is a change in one would require the other to be modified in order to
maintain the overall correctness of the system.

If two services in a microservices architecture share the same class definition of some class, they are statically
connascent. Dynamic connascence: synchronous - caller needs to wait for the response from the callee, asynchronous calls
allow fire-and-forget semantics in event-driven architecture.

Component level coupling isn't the only thing that binds software together. Many business concepts semantically bind
parts of the system together, creating functional cohesion.

_Architecture quantum_ - an independently deployable artifact with high functional cohesion and synchronous connascence.

- independently deployable - all necessary components to function independently from other parts of the architecture (
  e.g. a database - the system will not function without it)
- high functional cohesion - how well the contained code is unified in purpose, meaning - an architecture quantum needs
  to do something purposeful
- synchronous connascence - synchronous call within an application context of between distributed services that form
  this architecture quantum.

## Chapter 8: Component-Based Thinking

Architects typically think in terms of components, the physical manifestation of a module. Typically, the architect
defines, refines, manages, and governs components within an architecture.

Architecture Partitioning - several styles exist, with different sets of trade-offs (layered architecture, modular
monolith).

> Convay's Law: Organizations which design systems ... are constrained to produce designs which are copies of
> the communication structures of these organizations.

This law suggests that when a group of people deigns some technical artifact, the communication structures between the
people end up replicated in the design.

Technical partitioning - organizing components by technical capabilities (presentation, business rules, persistence).

Domain partitioning - modeling by identifying domains/workflows independent and decouples from another. Microservices
are based on this philosophy.

Developer should never take components designed by architects as the last words. All software design benefits from
iteration. Initial design should be viewed as a first draft.

Component identification flow:

- identify initial components
- assign requirements to components
- analyze roles and responsibilities
- analyze architecture characteristics
- restructure components

Finding proper granularity for components is one of most difficult tasks. Too fine-grained design leads to too much
communication between components, too coarse-grained encourage high internal coupling.

Discovering components:

- entity trap - anti-pattern when an architect incorrectly identifies the database relationships, this anti-pattern
  indicates lack of thought about the actual workflows of the application.
- actor-actions approach - a popular way to map requirements to components, identify actors who perform activities with
  the application and the actions those actors may perform.
- event storming - the architect assumes the project will use messages and/or events to communicate between components,
  the team tries to determine which events occur in the system based on requirements and identified roles, and build
  components around those event and message handlers.
- workflow approach - identifies the key roles, the kinds of workflows, and builds components around the identified
  activities

Monolithic vs Distributed Architecture:

- monolithic: a single deployable unit, all functionality of the system that runs in the process, typically connected to
  a single database
- distributed: multiple services running in their onw ecosystem, communicating via network, each service can may have
  its own release cadence and engineering practices

## Chapter 9: Foundations

Architecture styles (a.k.a. architecture patterns) - describe a named relationship of components covering a variety of
architecture characteristics. Style name, similar to design patterns, creates a single name that acts as shorthand
between experienced architects.

Big Ball of Mud - the absence of any discernible architecture structure. The lack of structure makes change increasingly
difficult. Problematic testing, deployment, scalability, performance, ... Mess because of lack of governance around code
quality and structure.

Client/Server - separation of responsibilities - backend-frontend/two-tier/client-server.

Architecture styles can be classified into 2 main types:

- monolithic - single deployment of unit code
    - layered, pipeline, microkernel
- distributed - multiple deployment units connected through network
    - service-based, event-driven, space-based, service-oriented, microservices
    - much more powerful in terms of performance, scalability, and availability, but there are trade-offs

_The Fallacies of Distributed Computing:_

1. The Network is Reliable - fact: networks still remain generally unreliable, this is why things like timeouts and
   circuit breakers exist between services. The more a system relies on the network, the potentially less reliable it
   becomes.
2. Latency is Zero - local call is measured in nanoseconds/microseconds, the same call made through a remote access
   protocol is measured in milliseconds. Do you know what the average round-trip latency is for a RESTful call in your
   prod env?
3. Bandwidth is Infinite - communication between remote services significantly utilizes bandwidth causing networks to
   slow down. Imagine 2000 req/s, 500 kb each = 1 Gb! Ensuring that the minimal amount of data is passed between
   services in a distributed architecture is the best way to address this fallacy.
4. The Network is Secure - the surface area for threats and attacks increases by magnitudes when moving from a
   monolithic to a distributed architecture, despite measures like VPNs, trusted networks and firewalls.
5. The Topology Never Changes - network topology (routers, hubs, switches, firewalls, networks, appliances) CAN change,
   architects must be in constant communication with operations and network administrators to know what is changing and
   when so they can make adjustments.
6. There is Only One Administrator - this fallacy points to the complexity of distributed architecture and the amount of
   coordination that must happen to get everything working correctly. Monoliths do not require this level of
   communication and collaboration due to the single deployment unit characteristics.
7. Transport Cost is Zero - transport cost does not refer to latency, but rather to actual cost in terms of money
   associated with making a simple RESTful call. Distributed architectures cost significantly more than monolithic
   architectures, primarily due to increased needs for additional hardware, servers, gateways, firewalls, subnets,
   proxies, ...
8. The Network is Homogenous - notwork is not made up by one network hardware vendor, not all of this heterogeneous
   hardware vendors play well together.

Other distributed considerations:

- distributed logging - debugging in a distributed architecture is very difficult and time-consuming, logging
  consolidation tools may help.
- distributed transactions - in a monolith it is super easy to perform `commit`/`rollback`, it is much more difficult
  todo the same in a distributed system. Distributed systems rely on eventual consistency - this is one of the
  trade-offs. Transactional SAGAs are one way to manage distributed transactions.
- contract maintenance and versioning - a contract is behaviour and data that is agreed upon by both the client and
  service, maintenance is hard due to decoupled services owned by different teams and departments.

## Chapter 10: Layered Architecture Style

The Layered Architecture (n-tiered) - standard for most applications, because of simplicity, familiarity, and low cost.
The style also falls into several architectural anti-patterns (architecture by implication, accidental architecture).

Most layered architectures consist of 4 standard layers: presentation, business, persistence, and database.

The layered architecture is a technically partitioned architecture (as opposed to domain-partitioned architecture).
Groups of components, rather than being grouped by domain, are grouped by their technical role in the architecture. As a
result, any particular business domain is spread throughout all of the layers of the architecture. A domain-driven
design does not work well with the layered architecture style.

Each layer can be either closed or open.

- closed - a request moves top-down from layer to layer, the request cannot skip any layers
- open - the request can bypass layers (fast-lane reader pattern)

The layers of isolation - changes made in one layer of the architecture generally don't impact/affect components in
other layers. Each layer is independent of the other layers, thereby having little or no knowledge of the inner workings
of other layers in the architecture. Violation of this concept produces very tightly coupled application with layer
interdependencies between components This type of architecture becomes very brittle, difficult and expensive to change.

This architecture makes for a good starting point for most applications whe it is not known yet exactly which
architecture will ultimately be used. Be sure to keep reuse at minimum and keep object hierarchies. A good level of
modularity will help facilitate the move to another architecture style later on.

Watch out for the architecture sinkhole anti-pattern - this anti-pattern occurs when requests move from one layer to
another as simple pass-through processing with no business logic performed within each layer. For example, the
presentation layer responds to a simple request from the user to retrieve basic costumer data.

## Chapter 11: Pipeline Architecture Style

Pipeline (a.k.a. pipes, filters) architecture: _Filter -(Pipe)-> Filter -(Pipe)-> Filter -(Pipe)-> Filter_

- pipes - for the communication channel between filters, each pipe is usually unidirectional and point-to-point.
- filters - self-contained, independent from other filters, stateless, should perform one task only. 4 types of filters
  exist within this architecture style
    - producer - the starting point of a proces, sometimes called the source
    - transformer - accepts input, optionally performs a transformation on data, then forwards it to the outbound pipe,
      also known as "map"
    - tester - accepts inout, tests criteria, then optionally produces output, also known as "reduce"
    - consumer - the termination point for the pipeline flow, persist or display the final result

ETL tools leverage the pipeline architecture for the flow and modification of data from one database to another.

## Chapter 12: Microkernel Architecture Style

The microkernel architecture style (a.k.a plug-in) - a relatively simple monolithic architecture consisting of two
components: a core system and plug-in components.

Core system - the minimal functionality required to run the system. Depending on the size and complexity, the core
system can be implemented as a layered architecture or modular monolith.

Plug-in components - standalone, independent components that contain specialized processing, additional features, and
custom code meant to enhance or extend the core system. Additionally, they can be used to isolate highly volatile code,
creating better maintainability and testability within the application. Plug-in components should have no-dependencies
between them.

Plug-in components do not always have to be point-to-point communication with the core system (REST or messaging can be
used instead). Each plug-in can be a standalone service (or even microservice) - this topology is still only a single
architecture quantum due to monolithic core system.

Plug-in Registry - the core system needs to know about which plug-in modules are available and gow to get them. The
registry contains information about each plug-in (name, data, contract, remote access protocol). The registry can be as
simple as an internal map structure owned by the core system, or as complex as a registry and discovery tool (like
ZooKeeper or Consul).

Examples of usages: Eclipse IDE, JIRA, Jenkins, Internet web browsers, ...

Problems that require different configurations for each location or client match extremely well with this architecture
style. Another example is a product that places a strong emphasis on user customization and feature extensibility.

## Chapter 13: Service-Based Architecture Style

A hybrid of the microservices, one of the most pragmatic architecture styles (flexible, simpler and cheaper than
microservices/even-driven services).

Topology: a distributed macro layered structure consisting of a separately deployed user interface, separately deployed
coarse-grained services (domain services) and a monolithic database. Because the services typically share a single
monolithic database, the number of services within an application context range between 4 and 12.

Base on scalability, fault tolerance, and throughput - multiple instances of a domain service can exist. Multiple
instances require some load-balancing.

Many variants exist within the service-based architecture:

- single monolithic user interface
- domain-based user interface
- service-based user interface

Similarly, you can break apart a single monolithic database, going as far as domain-scoped databases.

Service-based architecture uses a centrally shared database. Because of small number of services, database connections
are not usually an issue. Database changes, can be an issue. If not done properly, a table schema change can impact
every service, making database changes very costly task in terms of effort and coordination.

One way to mitigate the impact and risk of database changes is to logically partition the database and manifest the
logical partitioning through federated shared libraries. Changes to a table within a particular logical domain, impacts
only those services using that shared library.

When making changes to shared tables, lock the common entity objects and restrict change access to only the database
team. This helps control change and emphasizes the significance of changes to the common tables used by all services.

Service based architecture - one of the most pragmatic architecture styles, natural fit when doing DDD, preserves ACID
better than any other distributed architecture, good level of architectural modularity.

## Chapter 14: Event-Driven Architecture Style

A popular distributed asynchronous architecture style used to produce highly scalable and high-performance apps. It can
be used for small applications as well as large, complex ones. Made up of decoupled event processing components that
asynchronously receive and process events. It can be used as a standalone style or embedded within other architecture
style (e.g. event-driven microservices architecture).

2 primary topologies:

- the mediator topology - used when you require control over the workflow of an event process
    - an event mediator - manages and controls the workflow for initiating events that require the coordination of
      multiple event processors, usually there are multiple mediators (associated with a particular domain)
    - if an error occurs (no acknowledgement from on eof event processors), the mediator can take corrective action to
      fix the problem
    - the mediator controls the workflow, it can maintain the event state and manage errors
    - operates on commands (send-email, fulfill-order), rather than on events (email-sent, order-fulfilled)
    - cons: not as highly decoupled as the broker topology, lower scalability, hard to model complex workflows
- the broker topology - used when you require a high level of responsiveness
    - no central event mediator
    - message flow is distributed across the event processor components in a chain-like broadcasting fashion
    - a good practice: for each event processor advertise what it did to the rest of the system, regardless of whether
      any other event processor cares about what that action was
    - operates on events (email-sent, order-fulfilled), rather than on commands (send-email, fulfill-order)
    - cons: challenging error handling - no central monitoring/controlling, not possible to restart a business
      transaction (because actions are taken asynchronously)

ERROR HANDLING: the workflow event pattern - leverages delegation, containment, and repair through the use of a workflow
delegate. On error, the event consumer immediately delegates the error to the workflow processor and moves on. The
workflow processor tries to figure out what is wrong with the message (rules, machine learning, ...), once the message
is repaired it can be sent back to the event processor. In case a very problematic error a human agent can determine
what is wrong with the message and then re-submit.

Data loss (lost messages) - a primary concern when dealing with asynchronous communication. Typical data-loss scenarios:

- the message never makes it to the queue or the broker goes does before the event processor can can retrieve the
  message
    - solution: leverage persistent message queues (guaranteed delivery), message persisted in the broker's database (
      not only in the memory)
- event processor de-queues message and crashes before it can process the message
    - solution: _client acknowledge mode_ - message is not deleted from the broker immediately, but waits for
      acknowledgement
- event processor is unable to persist the message in the database
    - solution: leverage ACID transactions

Broadcast - the capability to broadcast events without knowledge of who is receiving the message and what they do with
it. Broadcasting is perhaps the highest level of decoupling between event processors.

In event-driven architecture, synchronous communication is accomplished through **request-reply** messaging. Each event
channel within request-reply messaging has 2 queues (request + reply queue). 2 primary techniques for implementing
request-reply messaging:

1. [PREFERRED] Correlation ID - a field in the reply message usually set to the request message ID.
2. Temporary queue - dedicated for the specific request, created when the request is made, and deleted when the request
   ends. Does not require Correlation ID. Large message volumes can significantly slow down the message broker and
   impact performance and responsiveness.

- Request-Based - for well-structured, data-driven requests (e.g. retrieving customer profile data).
- Event-Based - for flexible, action-based events that require high level of responsiveness and scale, with complex and
  dynamic processing.

## Chapter 15: Space-Driven Architecture Style

In any high-volume application with a large concurrent load, the database will become a bottleneck, regardless of used
caching technologies.

The space-based architecture style is specifically designed to address problems involving high scalability, elasticity,
and high concurrency issues.

_Tuple space_ - the technique of using multiple parallel processors communicating through shared memory.

High scalability, elasticity, and performance are achieved by removing the central database and leveraging replicated
in-memory data grids. Application data is kept in memory and replicated among all active processing units.

Several architecture components that make up a space-based architecture:

- processing unit: containing the application code
    - single or multiple processing units
    - contains in-memory data grid and replication engine usually implemented using: Hazelcast, Apache Ignite, Oracle
      Coherence
- virtualized middleware: used to manage and coordinate the processing units
    - handles the infrastructure concerns (data sync, request handling)
    - made of:
        - messaging grid - manages input request and session state, determines which active processing components are
          available to receive the requests and forwards to one of those processing units (usually implemented using HA
          Proxy and Nginx)
        - data grid - implemented within the processing units as a replicated cache
        - processing grid - (optional component) manages orchestrated request processing when there are multiple
          processing units involved in a single business request
        - deployment manager - monitors response times and user loads, starts up new processing units when load
          increases, and shuts down when the load decreases
- data pumps: used to synchronously send updated data to the database
    - is a way of sending data to another processor which then updates data in a database
    - always asynchronous, provide eventual consistency
    - when a processing unit receives a request and updates its cache, that processing unit becomes the owner of teh
      update and is responsible for sending that update through the data pump so that the database can be updated
      eventually
    - implemented using messaging; messages usually contain the new data values (diff)
- data writers: used to perform the updates from the data pumps
    - accept messages from a data pump and updates the database with the information contained in the message
- data readers: used to read database data and deliver it to processing units upon startup
    - responsible for reading data from the database and sending it to the processing units via reverse data pump
    - invoked in one of 3 situations:
        - a crash of all processing unit instances of the same named cache
        - a redeployment of all processing units within the same named cache
        - retrieving archive data not contained in the replicated cache

Data collision - occurs when data is updated in one cache instance A, and during replication to another cache instance
B, the same data is updated by that cache B.The local update to B will be overridden by the old data from cache A, cache
A will be overridden by cache B. Data Collision Rate factors: latency, number of instances, cache size.

_Distributed cache_ - better data consistency. _Replicated cache_ - better performance and fault tolerance.

Example usages of space-based architecture: well suited for applications that experience high spikes in user or request
volume and apps that have throughput excess of 10k concurrent users - online concert ticketing systems, online auction
systems.

## Chapter 16: Orchestration-Driven Service-Oriented Architecture

This type appeared in the late 1990s when companies were becoming enterprises and architects were forced to reuse as
much as possible because of expensive software licenses (no open source alternatives).

Reuse - the dominant philosophy in this architecture.

- Business Services - sit at the top of this architecture and provide the entry point. No code, just input, output and
  schema information.
- Enterprise Services - fine-grained, shared implementations - atomic behaviors around particular business domain -
  CreateCustomer, CalculateQuote, ... - collection of reusable assets - unfortunately, the dynamic nature of reality
  defies these attempts.
- Application Services - not all services in the architecture require the same level of granularity, these are one-off,
  single-implementation services, for example an application a company doesn't want to take the time to make a reusable
  service.
- Infrastructure Services - supply the operational concerns - monitoring, logging, auth.
- Orchestration Engine - the heart of this architecture, defines the relationship between the business and enterprise
  services, how they map together, and where transaction boundaries lie. It also acts as an integration hub, allowing
  architects to integrate custom code with package and legacy software systems.

This architecture in practice was mostly a disaster.

When a team builds a system primarily around reuse, they also incur a huge amount of coupling between components. Each
change had a potential huge ripple effect. That in turn led to the need for coordinated deployments, holistic testing
and other drags on engineering efficiency.

This architecture manages to find the disadvantages of both monolithic and distributed architectures!

## Chapter 17: Microservices Architecture

There is no secret group of architects who decide what the next big movement will be. Rather, it turns out that many
architects end up making common decisions.

Microservices differ in this regard - it was popularized by a famous blog entry by Martin Fowled and James Lewis.

Microservices Architecture is heavily inspired by the ideas in DDD - bounded context, decidedly inspired microservices.
Within a bounded context, the internal parts (code, data schemas) are coupled together to produce work, but they are
never coupled to anything outside the bounded context.

Each service is expected to include all necessary parts to operate independently.

Performance is often the negative side effect of the distributed nature of microservices. Network calls take much longer
than method calls. It is advised to avoid transactions across service boundaries, making determining the granularity the
key to success in this architecture.

It is hard to define the right granularity for services in microservices. If there are too many services, a lot of
communication will be required to perform work. The purpose of service boundaries is to capture a domain or workflow.

Guidelines to find the appropriate boundaries:

- purpose - a domain, one significant behaviour on behalf of the overall application
- transactions - often the entities that need to cooperate in a transaction show a good service boundary
- choreography - if excellent domain isolation require extensive communication, you may consider merging services back
  into larger service to avoid communication overhead

Microservices Architecture tries to avoid all kinds of coupling - including shared schemas and databases used as
integration points.

Once a team has built several microservices, they realize that each has common elements that benefit from similarity.
The shared sidecar can be either owned by individual teams or a shared infrastructure team. Once teams know that each
service includes a common sidecar, they can build a _service mesh_ - allowing unified control across the infrastructure
concerns like logging and monitoring.

2 styles of user interfaces:

- monolithic user interface - a single UI that calls through the API layer to satisfy user request
- micro-frontends - each service emits the UI for that service, which the frontend coordinates with the other emitted UI
  components

Microservices architectures typically utilize _protocol-aware heterogeneous interoperability_:

- protocol-aware - each service should know how to call other services
- heterogeneous - each service may be written in a different technology stack, heterogeneous means that microservices
  fully support polyglot environments
- interoperability - describes services calling one another, while architects in microservices try to discourage
  transactional calls, services commonly call other services via the network to collaborate

For asynchronous communication, architects often use events and messages (internally utilizing an event-driven
architecture).

The broker and mediator patterns manifest as choreography and orchestration:

- choreography - no central coordinator exists in this architecture
- orchestration - coordinating calls across several services

Building transactions across service boundaries violates the core decoupling principle of the microservices
architecture. DON'T.

> Don't do transactions in microservices - fix granularity instead.

Exceptions always exist (e.g. 2 different services need vastly different architecture characteristics -> different
boundaries), in such situations - patterns exist to handle transaction orchestration (with serious trade-offs).

SAGA - the mediator calls each part of the transaction, records success/failure, and coordinates results. In case of an
error, the mediator must ensure that no part of the transaction succeeds if one part fails (e.g. send a request to undo

- usually very complex). Typically, implemented by having each request in a `pending` state.

> A few transactions across services is sometimes necessary; if it is the dominant feature of the architecture, mistakes
> were made!

Performance is often an issue in microservices - many network calls, which has high performance overhead. Many patterns
exist to increase performance (data caching and replication).

However, one of the most scalable systems yet have utilized this style to great success, thanks to scalability,
elasticity and evolutionary.

Additional references on microservices:

- Building Microservices
- Microservices vs. Service-Oriented Architecture
- Microservices AntiPatterns

## Chapter 18: Choosing the Appropriate Architecture Style

Choosing an architecture style represents the culmination of analysis and thought about trade-offs for architecture
characteristics, domain considerations, strategic goals, and a host of other things.

Preferred architecture shift over time, driven by:

- observations from the past - rely on experience and observations
- changes in the ecosystem - constant change is a reliable feature of the software development
- new capabilities - architects must keep an eye open to not only new tools but new paradigms
- acceleration - new tools create new engineering practices, which lead to new design and capabilities
- domain changes - the business continues to evolve
- technology changes - as technology evolves, organizations have to keep up with at least some of these changes
- external factors - external factors may force a migration to another option

When choosing an architectural style, an architect must take into account all the various factors that contribute to the
structure for the domain design. Architects should go into the decision comfortable with the following things:

- the domain - good general understanding of the major aspects of the domain
- architecture characteristics that impact structure - architect must discover and elucidate the architecture
  characteristics
- data architecture - architects and DBAs must collaborate on database, schema and other DB-related concerns
- organizational factors - external factors may influence design - cost, company's plans, ...
- knowledge of process, trams, and operational concerns - software development process, interaction with operations and
  the QA influence a design
- domain/architecture isomorphism - some problem domains match the topology of the architecture

Several determinations:

- monolith vs distributed
- where should data live
- synchronous or asynchronous communication between services

General tip:

> Use synchronous by default, asynchronous when necessary

## Chapter 19: Architecture Decisions

Making architecture decisions involves gathering enough relevant information, justifying the decision, documenting the
decision, and effectively communicating the decision to the right stakeholders.

Decision anti-patterns:

- covering your assets - occurs when an architect avoids/defers making architecture decisions out of fear of making the
  wrong choice, 2 ways to overcome:
    - wait until you have enough information to justify and validate your decision, but waiting too long holds up
      development teams
    - continually collaborate with development teams to ensure that the decision can be implemented as expected, quickly
      respond to change
- groundhog day - when people don't know why a decision was made, so it keeps getting discussed over and over, architect
  failed to provide a justification for the decision (technical and business justifications)
- email-driven architecture - where people lose, forget, or don't even know an architecture decision has been made and
  therefore cannot implement that decision, notify impacted people directly in order to avoid this anti-pattern

Architecturally significant decisions are those decisions that affect [OR]:

- the structure - impacts the patterns/styles of architecture being used
- nonfunctional characteristics - architecture characteristics (performance, scalability, ...)
- dependencies - coupling points between components/services within the system
- interfaces - how services and components are accessed and orchestrated
- construction techniques - platforms, frameworks, tools, processes

Architecture Decision Records - ADRs - short text file describing a specific architecture decision. 5 main sections:

- title - numbered sequentially and contains short phrase describing the architecture decisions
- status - one of: proposed (must be approved-by a higher-level decision maker), accepted (approved & ready for
  implementation), suspended (decision changed and superseded by another ADR)
- context - what situation forces me to make this decision, this section also provides a way to document the
  architecture (clear & concise)
- decision - the architecture decision, along with full justification for the decision, advised to use following voice:
  we will do, we will use, ... -- this section allows an architect to place more emphasis on _why_ rather than _how_.
  Understanding why a decision was made is far more important than understanding how something works.
- consequences - the overall impact of an architecture decision, this section forces the architect to think about
  whether those impacts outweigh the benefits of the decision. Another good use is to document the trade-offs' analysis.
- [additional] compliance - how the architecture decision will be measured and governed from a compliance perspective
- [additional] notes - various metadata -- author, approval date, approved by, superseded date, last modified date, ...

Authors' recommendation -- store ADRs in a wiki, rather than on Git.

ADRs can be used as an effective means to document a software architecture.

## Chapter 20: Analyzing Architecture Risk

Every architecture has risk associated with it -- risk involving availability, scalability, data integrity, ... -- by
identifying risks, the architect can address deficiencies and take corrective actions.

The Architecture Risk Matrix - 2D array -- the overall impact and the likelihood, each dimension has 3 ratings (low,
medium, high). When leveraging the risk matrix to qualify the risk, consider the impact dimension first and the
likelihood dimension second.

Risk Assessment - a summarized report of the overall risk of an architecture (the risk matrix can be used to build it).

Risk Storming - a collaborative exercise used to determine architectural risk within a specific dimension (area of risk)
-- unproven technology, performance, scalability, availability, data loss, single points of failure, security. Risk
storming is broken down into 3 primary activities:

1. Identification - each participant individually identifies areas of risk within the architecture, should involve
   analyzing only one particular dimension
2. Consensus - highly collaborative activity with the goal of gaining consensus among all participants
3. Mitigation - involves changes or enhancements to certain areas of the architecture

Risk storming can be used for other aspects of software development -- for example story grooming -- story risk, the
likelihood that the story will not be completed.

## Chapter 21: Diagramming and Presenting Architecture

Effective communication becomes critical to an architect's success. No matter how brilliant an architect's ideas are, if
they can't convince managers to fund them and developers to build them.

Diagramming and presenting are 2 critical soft skills for architects.

Irrational Artifact Attachment - is the proportional relationship between a person's attachment to some artifact and
how long it took to produce.If you spend a lot of time on something , you may have an irrational attachment to that
artifact (proportional to the time invested). Use Agile approach in order to avoid this anti-pattern - create
just-in-time artifacts, use simple tools to create diagrams.

Baseline features of a diagram tool:

- layers - used to link a group of items together logically to enable hiding/showing individual layers. An architect can
  build a diagram where they can hide overwhelming details or to incrementally build pictures for presentations
- stencils/templates - allow to build up a library of common virtual components (basic shapes with a special meaning,
  e.g. microservice stencil)
- magnets - assistance in drawing lines

Diagram Guidelines:

- Titles - all elements of the diagram should have title or are well known to the audience
- Lines - should be thick enough to be seen, if lines indicate information flow use arrows
    - solid lines = synchronous communication
    - dotted lines = asynchronous communication
- Shapes - each architect tends to make their own standard set of shapes, hint: use 3D boxes to indicate deployable
  artifacts and rectangles to indicate containership
- Labels - label each item in a diagram, especially if there is a chance of ambiguity for the readers
- Color - use colors when it helps to distinguish one artifact from the other
- Keys - if shapes are ambiguous, include a key on the diagram clearly indicating what each shape represents

Book recommendation: Presentation Patterns

When preparing a presentation - use different type of transition when changing a topic, use the same transition within a
topic.

When presenting, the presenter has 2 presentation channels: verbal and visual. By placing too much text on the slides
and then saying the same words, the presenter is overloading one information channel and starving the other.

Using animations and transitions in conjunction with incremental builds (reveal information gradually) allows the
presenter to make more compelling, entertaining presentations.

Info-decks - slide decks that are not meant to be projected but rather summarize information graphically, essentially
using a presentation tool as a desktop publishing machine. They contain all the information, are meant to be standalone,
no need for presenter.

Invisibility - a pattern where the presenter inserts a blank slide within a presentation to refocus attention solely on
the speaker (turn of one visual channel).

## Chapter 22: Making Teams Effective

A software architect is also responsible for guiding the development team through the implementation of the
architecture.

Software architect should create and communicate constraints, or the box, in which developers can implement the
architecture. Tight boundaries = frustration, loose boundaries = confusion, appropriate boundaries = effective teams.

3 basic types of architect personalities:

- a control freak:
    - controls every detailed aspect of the software development process
    - too fine-grained and too low-level decisions
    - may restrict the development team to use specific technology, library, naming convention, class design
    - steals art of programming away from the developers
- an armchair architect:
    - hasn't coded in a very long time and does not take the implementation details into account
    - creates loose boundaries, in this scenario, development teams end up taking the role of architect, doing the work
      an architect is supposed to be doing
    - in order to avoid such behaviour, an architect should be involved in the technology being used on the project
- an effective architect:
    - produces the appropriate constraints and boundaries, ensures that the team members are working well together and
      have the right level of guidance on the team
    - requires working closely and collaborating with the team, and gaining respect of the team as well

Elastic Leadership - https://www.elasticleadership.com -- knowing how much control to exert on a given development team,
factors to determine how many teams a software architect can manage at once:

- team familiarity - the better team members know each other, the less control is needed because team members start to
  become self-organizing, the newer the team members, the more control needed to help facilitate collaboration among
  team members and reduce cliques within the team
- team size - the larger the team, the more control is needed, the smaller the team, less control is needed
- overall experience - teams with more junior developers require more control and mentoring whereas teams with more
  senior developers require less control
- project complexity - highly complex projects require the architect to be more available to the team and to assist with
  issues that arise, hence more control is needed on the team
- project duration - the shorter the duration, the lass control is needed, the longer the project, the more control is
  needed

3 factors when considering the most effective team size:

- process loss - (Brook's law) the more people you add to a project, the more ime the project will take, example: unable
  to parallelize work, merge conflicts
- pluralistic ignorance - when everyone agrees to a norm because they think they are missing something obvious, rather
  than speaking up, a person chooses to follow the group (similar to "The Emperor's New Clothes" -- the king is naked),
  an architect should observe body language of all team members and ask each person what they think about the proposed
  solution
- diffusion of responsibility - as team size increases, it has a negative impact on communication

An effective architect not only helps guide the development team through the implementation of the architecture, but
also ensures that the team is healthy, happy, and working together to achieve a common goal.

Checklists work and provide an excellent vehicle for making sure everything is covered and addressed. The key to making
teams effective is knowing when to leverage checklists and when not to. Most effective checklists:

- code completion checklist - if everything in the checklist is completed, then the developer can claim they are
  actually done with the code
- unit and functional testing checklist - contains some of the more unusual and edge-case tests that software developers
  tend to forget to test
- software release checklist - releasing software is perhaps one of the most error-prone aspects of the software
  development life cycle, it helps avoid failed builds, deployments, and it significantly reduces the amount of rish
  associated with releasing software

Many items from the checklists can be automated.

> Don't worry about stating the obvious in a checklist. It's the obvious stuff that's usually skipped or missed.

## Chapter 23: Negotiation and Leadership Skills

Negotiation is one of the most important skills a software architect can have. Effective software architects understand
the politics of the organization, have strong negotiation and facilitation skills, and can overcome disagreements when
they occur to create solutions that all stakeholders agree on.

"We must have zero downtime", "I need these features yesterday", ...:

> Leverage the use of grammar and buzzwords to better understand the situation

Enter the negotiation wit as many arguments as possible:

> Gather as much information as possible _before_ entering into a negotiation

Save this negotiation tactic for last:

> When all else fails, state things in terms of cost and time

Does entire system require 99.999% availability or just some parts?:

> Leverage the "divide and conquer" rule to qualify demands or requirements

Demonstrate your point with a real-life example:

> Always remember that demonstration defeats discussion

> Avoid being too argumentative or letting things get too personal in a negotiation -- calm leadership combined with
> clear and concise reasoning will always win a negotiation

Ivory Tower architecture anti-pattern - Ivory tower architects are ones who simply dictate from on high, telling
development teams what to do without regard to their opinion or concerns. This usually leads to a loss of respect for
the architect and an eventual breakdown of the team dynamics.

> When convincing developers to adopt an architecture decision or to do a specific task, provide a justification rather
> than "dictating from on high"

By providing a reason why something needs to be done, developers will more likely agree with the request. Most of the
time, once a person hears something they disagree with, they stop listening. By stating the reason first, the architect
is sure that the justification will be heard.

> If a developer disagrees with a decision, have them arrive at the solution on their own

Win-win situation: the developer either fail trying and the architect automatically gets buy-in agreement for the
architect's decision or the developer finds a better way to address concerns.

Accidental complexity - we have made a problem hard, architects sometimes do this to prove their worth when things seem
too simple or to guarantee that they are always kept in the loop on discussions and decisions. Introducing accidental
complexity into something that is not complex is one of the best ways to become an ineffective leader as an architect.
An effective way of avoiding accidental complexity is what we call the 4C's of architecture:

- communication
- collaboration
- clarity
- conciseness

Be pragmatic, yet visionary. Visionary - Thinking about or planning the future with imagination or wisdom. Pragmatic -
Dealing with things sensibly and realistically in a way that is based on practical rather than theoretical
considerations.

Bad software architects leverage their title to get people to do what they want from them to do. Effective software
architects get people to do things by not leveraging their title as architect, but rather by leading through example,
not title. Lead by example, not by title.

To lead a team and become an effective leader, a software architect should try to become the go-to person on the team -
the person developers go to for their questions and problems. Another technique to start gaining respect as a leader and
become the go-to person on the team is to host periodic brown-bag lunches to talk about specific technique or
technology.

Too many meetings? Ask for the meeting agenda ahead of time to help quantify if you are really needed at the meeting or
not.

Meetings should be either first thing in the morning, right after lunch, or toward the end of the day, but not during
the day when most developers experience flow state.

> The most important single ingredient is the formula of success is knowing how to get along with people ~ Theodore
> Roosevelt

## Chapter 24: Developing a Career Path

An architect must continue to learn throughout their career. Technology breadth is more important to architects than
depth.

The 20-Minute Rule - devote at least 20 minutes a day to your career as an architect by learning something new or diving
deeper into a specific topic. Spend min. 20 minutes to Google some unfamiliar buzzwords.

Technology Radar: https://www.thoughtworks.com/radar

You can create your won personal technology radar. It helps to formalize thinking about technology and balance opposing
decision criteria.

Architects should choose some technologies and/or skills that are widely in demand and track that demand. But they might
also want to try some technology gambits, like open source or mobile development.

Architects can utilize social media to enhance their technical breadth. Using media like Twitter professionally,
architects should find technologists whose advice they respect. This allows to build a network on new, interesting
technologies to assess and keep up with the rapid changes in the technology world.

## Self-Assessment Questions

[Chapter 1: Introduction](#chapter-1-introduction)

1. What are the 4 dimensions that define software architecture?

Knowledge of the architecture structure, architecture characteristics, architecture decisions, and design principles.

2. What is the difference between an architecture decision and a design principle?

Decisions: what is and what is not allowed, rules for a system how it should be constructed. Design principles:
guidelines for constructing systems.

3. List the eight core expectations of a software architect.

Make architecture decisions. Continually analyze the architecture. Keep current with the latest trends. Ensure
compliance with decisions. Diverse exposure and experience. Have business domain knowledge. Posses interpersonal skills.
Understand and navigate politics.

4. What is the First Law of Software Architecture.

Everything in software architecture is a trade-off.

[Chapter 2: Architectural thinking](#chapter-2-architectural-thinking)

1. Describe the traditional approach of architecture versus development and explain why that approach no longer works.

In a traditional model the architect is disconnected from the development teams, and as such the architecture rarely
provides what it was originally set out to do. Architect defines architecture characteristics, selects architecture
patterns and styles, then these artifacts are handed off to the development teams.

Boundaries between architects and developers must be broken down. Unlike the old-school waterfall approaches to static
and rigid software architecture, the architecture of today's systems changes and evolves every iteration. A tight
collaboration is essential for the success.

2. List the three levels of knowledge in the knowledge triangle and provide an example of each.

Stuff you know: Python

Stuff you know you don't know: Deep Learning

Stuff you don't know you don't know: 🤷‍

3. Why is it more important for an architect to focus on technical breadth rather than technical depth?

Architects must make decisions that match capabilities to technical constraints, a broad understanding of a wide variety
of solutions is valuable.

4. What are some of the ways of maintaining your technical depth and remaining hands-on as an architect?

- do frequent proof-of-concepts
- whenever possible, write best production-quality code (even when doing POCs) -- POC code often remains in the
  repository and becomes the reference or guiding example
- tackle technical debt stories or architecture stories, freeing the development team up to work on the critical
  function user stories
- work on bug fixes
- create simple command-line tools and analyzers to help the development team with their day-to-day tasks
- do code reviews frequently

[Chapter 3: Modularity](#chapter-3-modularity)

1. What is meant by the term _connascence_?

Two components mare connascent if a change in one would require the other to be modified in order to maintain teh
overall correctness of the system.

Connascence allows us to go beyond the binary of "coupled" and "not coupled", serving as a tool to measure coupling and
describe how bad it is under different levels and kinds.

2. What is the difference between static and dynamic connascence?

Static connascence refers to source-code-level coupling - name (multiple entities must agree on the name), type (
multiple entities must agree on the type), meaning (multiple entities must agree on the meaning of particular values),
position (multiple entities must agree on the order of the values), algorithm (multiple entities must agree on a
particular algorithm).

Dynamic connascence analyzes calls at runtime - execution (order of execution), timing (timing of the execution of
multiple components), values (several values relate to one another), identity (several values relate to one another and
must change together).

3. What does the connascence of type mean? Is it static or dynamic connascence?

[STATIC] Multiple components must agree on the type of entity.

4. What is the strongest form of connascence?

Identity. Multiple components must reference the same entity. For example when 2 independent components must share and
update a common data source.

5. What is the weakest form of connascence?

Name. Multiple components must agree on the name.

6. Which is preferred within code base -- static or dynamic connascence?

Static. Architects have a harder time determining connascence because we lack tools to analyze runtime calls as
effectively as we can analyze the call graph.

[Chapter 4: Architecture Characteristics Defined](#chapter-4-architecture-characteristics-defined)

1. What three criteria must an attribute meet to be considered an architecture characteristic?

- specifies a non-domain design consideration
- influences some structural aspect of the domain
- is critical or important to application success

2. What is the difference between an implicit characteristic and an explicit one? Provide an example of each.

Implicit - appears in requirements, necessary for project success. Domain knowledge required to uncover such
characteristics.

Explicit - characteristic listed in the requirements.

3. Provide an example of an operational characteristic.

Availability, Continuity, Performance, Reliability, Recoverability, Scalability, ...

4. Provide an example of a structural characteristic.

Configurability, Extensibility, Maintainability, ...

5. Provide an example of a cross-cutting characteristic.

Accessibility, Authentication, Authorization, Legal, Security, Privacy, ...

6. Which architecture characteristic is more important to strive for -- availability or performance?

The ultimate answer for architectural questions: _it depends on ...

[Chapter 5: Identifying Architectural Characteristics](#chapter-5-identifying-architectural-characteristics)

1. Give a reason why it is a good practice to limit the number of characteristics an architecture should support.

Over-specifying architecture characteristics may kill the project. Example: The Vasa - a Swedish warship, it was
supposed to be magnificent, turned out to be too heavy, too complicated.

Keep the design simple.

2. True or false: most architecture characteristics come from business requirements and user stories

True.

3. If a business stakeholders states that time-to-market is the most important business concern, which architecture
   characteristic would the architecture need to support?

Agility, testability, deployability

4. What is the difference between scalability and elasticity?

Scalability - the ability to handle a large number of concurrent users without serious performance degradation.

Elasticity - the ability to handle bursts of requests.

5. You find out that your company is about to undergo several major acquisitions to significantly increase its customer
   base. Which architectural characteristics should you be worried about?

Interoperability, scalability, adaptability, extensibility.

[Chapter 6: Measuring and Governing Architecture Characteristics](#chapter-6-measuring-and-governing-architecture-characteristics)

1. Why is cyclomatic complexity such an important metric to analyze for architecture?

Overly complex code represents a code smell - it harms virtually every one of the desirable characteristics.

2. What is an architecture fitness function? How can they be used to analyze an architecture?

Any mechanism that provides an objective integrity assessment of some architecture characteristic or combination of
architecture characteristics. Many tools may be used to implement fitness functions: metrics, monitors, unit tests,
chaos engineering, ...

3. Provide an example of an architecture fitness function to measure the scalability of an architecture?

Code automatic scalability tests and compare results.

4. What is the most important criteria for an architecture characteristic to allow architects and developers to create
   fitness functions?

Architects must ensure that developers understand the purpose of the fitness function before imposing it on them.

[Chapter 7: Scope of Architecture Characteristics](#chapter-7-scope-of-architecture-characteristics)

1. What is an architectural quantum, and why is it important to architecture?

The architectural quantum is the smallest possible item that needs to be deployed in order to run an application.

3. Assume a system consisting of a single user interface with four independently deployed services, each containing its
   own separate database. Would this system have a single quantum or four quanta? Why?

4 because each service can be deployed separately.

4. Assume a system with an administration portion managing static reference data (such as the product catalog, and
   warehouse information) and a customer-facing portion managing the placement of orders. How many quanta should this
   system be and why? If you envision multiple quanta, could the admin quantum and customer-facing quantum share a
   database? If so, in which quantum would the database need to reside?

2 quantas - ordering and a warehouse management, separate databases.

[Chapter 8: Component-Based Thinking](#chapter-8-component-based-thinking)

1. We define the term component as a building block of an application - something the application does. A component
   usually consist of a group of classes or source files. How are components typically manifested within an application
   or service?

Components - the physical manifestation of a module. Components offer a language-specific mechanism to group artifacts
together, often nesting them to create stratification. Components also appear as subsystems or layers in architecture,
as the deployable unit of work for many event processors.

2. What is the difference between technical partitioning and domain partitioning? Provide an example of each.

Technical partitioning - organizing architecture based on technical capabilities (presentation, business, service,
persistence).

Domain partitioning - a modeling technique for decomposing complex systems. In DDD the architect identifies domains
independent and decoupled from each other. The microservices architecture is based on this philosophy.

3. What is the advantage of domain partitioning?

Better reflects the kinds of changes that most often occur on projects.

4. Under what circumstances would technical partitioning be a better choice over domain partitioning?

Separation based on technical partitioning enables developers to find certain categories of code base quickly, as it is
organized by capabilities.

5. What is the entity trap? Why is it not a good approach for component identification?

Arises when the architect incorrectly identifies the database relationships ads workflows in the application, a
correspondence that rarely manifests in the real world. This anti-pattern indicates lack of thought about the actual
workflows of the application. Components created with entity-trap tend to be too coarse-grained.

[Chapter 9: Foundations](#chapter-9-foundations)

1. List the eight fallacies of distributed computing.

Latency is Zero, Bandwidth is Infinite, The Network is Reliable, The Network is Secure, The Topology Never Changes,
There is Only One Administrator, Transport Cost is Zero, The Network is Homogenous

2. Name three challenges that distributed architectures have that monolithic architectures don't.

Debugging a distributed architecture, Distributed transactions, Contract maintenance and versioning.

3. What is stamp coupling?

Requesting/receiving too much data whereas only a small subset of data is needed -- 2000 req x 10kB VS 2000 req x 100kB

4. What are some ways of addressing stamp coupling?

- create private RESTful API endpoints
- use field selectors in the contract
- use GraphQL
- use internal messaging endpoints


================================================
FILE: books/go/ch01/Makefile
================================================
# Set default target, when 'make' executed, runs 'build' by default:
.DEFAULT_GOAL := build

fmt:
	go fmt ./...
# Keep 'make' from getting confused with directories, in this case with directory 'fmt' (if it is ever created):
.PHONY: fmt

# Before running 'lint', run 'fmt'
lint: fmt
	golint ./...
.PHONY: lint

vet: fmt
	go vet ./...
.PHONY: vet

build: vet
	go build hello.go
.PHONY: build


================================================
FILE: books/go/ch01/hello.go
================================================
package main

import "fmt"

func main() {
	fmt.Println("Hello, world!")
}


================================================
FILE: books/go/ch02/const.go
================================================
package main

import "fmt"

const x int64 = 10

const (
	idKey   = "id"
	nameKey = "name"
)

const z = 20 * 20

func main() {
	const y = "hello"

	fmt.Println(x)
	fmt.Println(y)

	//x = x + 1 // Error
	//y = "bye" // Error

	fmt.Println(x)
	fmt.Println(y)
}


================================================
FILE: books/go/ch02/unicode.go
================================================
package main

import "fmt"

func main() {
	ęąćśż := "hello"
	fmt.Println(ęąćśż)
}


================================================
FILE: books/go/ch03/types.go
================================================
package main

import "fmt"

func main() {
	var x [3]int
	fmt.Println(x)

	var y = [12]int{1, 5: 4}
	fmt.Println(y)

	var z = [...]int{12, 20, 30}
	fmt.Println(z)

	var p = []int{12, 20, 30}
	fmt.Println(p)

	var v []int
	fmt.Println(v == nil)
	fmt.Println(len(v))
	v = append(v, 10, 20)
	fmt.Println(v)
	v = append(v, p...)
	fmt.Println(v)
	fmt.Println(cap(v))

	r := make([]int, 5)
	fmt.Println(r)
	r = make([]int, 0, 20)
	r = append(r, 10, 20)
	fmt.Println(r)

	s := "Hello 😇"
	fmt.Println(s[6:7])
	fmt.Println(s[6:10]) // 4 bytes for emoji

	teams := map[string][]string{
		"Orcas": {"Fred", "Ralph"},
		"Lions": {"Sarah", "Peter"},
	}
	fmt.Println(teams)
	team, ok := teams["Kittens"]
	fmt.Println(team, ok)

	set := map[int]bool{}
	vals := []int{1, 2, 3, 4, 5, 6, 7, 4, 3, 2, 3, 4, 3}
	for _, v := range vals {
		set[v] = true
	}
	fmt.Println(len(set), len(vals))
	if set[1] {
		fmt.Println("1 is in the set")
	}

	type person struct {
		name string
		age  int
		pet  string
	}
	julia := person{
		"Julia",
		30,
		"cat",
	}
	beth := person{
		name: "Beth",
	}
	fmt.Println(julia, beth)

	var bob struct {
		name string
		age  int
		pet  string
	}
	bob.name = "Bob"
	fmt.Println(bob)
}


================================================
FILE: books/go/ch04/case.go
================================================
package main

import "fmt"

func main() {
	words := []string{"a", "cow", "smile", "gopher"}

	for _, word := range words {
		switch size := len(word); size {
		case 1, 2, 3, 4:
			fmt.Println(word, "is a short word!")
		case 5:
			wordLen := len(word)
			fmt.Println(word, "is the exactly the right length:", wordLen)
		case 6, 7, 8, 9:
		default:
			fmt.Println(word, "is a long word!")
		}
	}
}


================================================
FILE: books/go/ch04/for.go
================================================
package main

import (
	"fmt"
)

func main() {
	completeFor()
	conditionOnlyFor()
	infiniteFor()
	forRange()
	labelingStatements()
}

func completeFor() {
	for i := 0; i < 10; i++ {
		fmt.Println(i)
	}
}

func conditionOnlyFor() {
	i := 1
	for i < 100 {
		fmt.Println(i)
		i = i * 2
	}
}

func infiniteFor() {
	for {
		fmt.Println("Hello")
		break
	}
}

func forRange() {
	evenVals := []int{2, 4, 6, 8, 10, 12}
	for i, v := range evenVals {
		fmt.Println(i, v)
	}

	for _, v := range evenVals {
		fmt.Println(v)
	}

	for _, v := range evenVals {
		fmt.Println(v)
	}

	uniqueNames := map[string]bool{"Fred": true, "Paul": true, "Wilma": true}
	for k := range uniqueNames {
		fmt.Println(k)
	}
}

func labelingStatements() {
	samples := []string{"hello", "apple_π!"}
outer:
	for _, sample := range samples {
		for i, r := range sample {
			fmt.Println(i, r, string(r))
			if r == 'l' {
				continue outer
			}
		}
		fmt.Println()
	}
}


================================================
FILE: books/go/ch04/if.go
================================================
package main

import (
	"fmt"
	"math/rand"
)

func main() {
	if n := rand.Intn(10); n == 10 {
		fmt.Println("That's too low")
	} else if n > 5 {
		fmt.Println("That's too big:", n)
	} else {
		fmt.Println("That's a good number:", n)
	}
}


================================================
FILE: books/go/ch05/anonymous.go
================================================
package main

import "fmt"

func main() {
	for i := 0; i < 5; i++ {
		func(j int) {
			fmt.Println("printing", j, "from inside of an anonymous function")
		}(i)
	}
}


================================================
FILE: books/go/ch05/deferExample.go
================================================
package main

import (
	"io"
	"log"
	"os"
)

func getFile(name string) (*os.File, func(), error) {
	f, err := os.Open(name)
	if err != nil {
		return nil, nil, err
	}
	return f, func() {
		f.Close()
	}, nil
}

func main() {
	if len(os.Args) < 2 {
		log.Fatal("no file specified")
	}
	f, closer, err := getFile(os.Args[1])
	if err != nil {
		log.Fatal(err)
	}
	defer closer()
	data := make([]byte, 2048)
	for {
		count, err := f.Read(data)
		os.Stdout.Write(data[:count])
		if err != nil {
			if err != io.EOF {
				log.Fatal(err)
			}
			break
		}
	}
}


================================================
FILE: books/go/ch05/functionAsParam.go
================================================
package main

import (
	"fmt"
	"sort"
)

type Person struct {
	FirstName string
	LastName  string
	Age       int
}

func main() {
	people := []Person{
		{"Pat", "Patterson", 34},
		{"Tracy", "Bobbert", 23},
		{"Fred", "Fredson", 18},
	}
	sort.Slice(people, func(i int, j int) bool {
		return people[i].LastName < people[j].LastName
	})
	fmt.Println(people)
}


================================================
FILE: books/go/ch05/functions.go
================================================
package main

import (
	"errors"
	"fmt"
)

func main() {
	result := Div(5, 2)
	fmt.Println(result)

	MyFunc(MyFuncOpts{
		LastName: "Smith",
		Age:      10,
	})

	fmt.Println(addTo(10, 1, 2, 3, 4, 5))
	fmt.Println(addTo(10, []int{1, 2, 3, 4, 5}...))

	result, remainder, err := divAndRemainder(5, 2)
	if err != nil {
		fmt.Println(err)
	}
	fmt.Println(result, remainder)
}

func Div(numerator int, denominator int) int {
	if denominator == 0 {
		return 0
	}
	return numerator / denominator
}

type MyFuncOpts struct {
	FirstName string
	LastName  string
	Age       int
}

func MyFunc(opts MyFuncOpts) int {
	return opts.Age
}

func addTo(base int, vals ...int) []int {
	out := make([]int, 0, len(vals))
	for _, v := range vals {
		out = append(out, base+v)
	}
	return out
}

func divAndRemainder(numerator int, denominator int) (result int, remainder int, err error) {
	if denominator == 0 {
		err = errors.New("cannot divide by zero")
		return result, remainder, err
	}
	result, remainder, err = numerator/denominator, numerator%denominator, nil
	return result, remainder, err
}


================================================
FILE: books/go/ch05/functionsAreValues.go
================================================
package main

import "fmt"

func main() {
	var opMap = map[string]func(int, int) int{
		"+": add,
		"-": sub,
		"*": mul,
		"/": div,
	}

	fmt.Println(opMap["+"](10, 20))
}

func add(i int, j int) int { return i + j }
func sub(i int, j int) int { return i - j }
func mul(i int, j int) int { return i * j }
func div(i int, j int) int { return i / j }


================================================
FILE: books/go/ch05/returnFunction.go
================================================
package main

import "fmt"

func makeMult(base int) func(int) int {
	return func(factor int) int {
		return base * factor
	}
}

func main() {
	twoBase := makeMult(2)
	threeBase := makeMult(3)

	for i := 0; i < 3; i++ {
		fmt.Println(twoBase(i), threeBase(i))
	}
}


================================================
FILE: books/go/ch06/pointers.go
================================================
package main

import "fmt"

func failedUpdate(px *int) {
	x2 := 20
	px = &x2
}

func update(px *int) {
	*px = 20
}

func main() {
	y := "hello"
	fmt.Println(y, &y, *&y)

	x := 10
	failedUpdate(&x)
	fmt.Println(x)
	update(&x)
	fmt.Println(x)
}


================================================
FILE: books/go/ch07/counter.go
================================================
package main

import (
	"fmt"
	"time"
)

type Counter struct {
	total       int
	lastUpdated time.Time
}

func (c *Counter) Increment() {
	c.total++
	c.lastUpdated = time.Now()
}

func (c Counter) String() string {
	return fmt.Sprintf("total: %d, last updated %v", c.total, c.lastUpdated)
}

func updateWrong(c Counter) {
	c.Increment()
	fmt.Println("in updateWrong:", c.String())
}

func updateRight(c *Counter) {
	c.Increment()
	fmt.Println("in updateRight:", c.String())
}

func main() {
	var c Counter
	fmt.Println(c.String())
	c.Increment()
	fmt.Println(c.String())

	updateWrong(c)
	fmt.Println("in main:", c.String())
	updateRight(&c)
	fmt.Println("in main:", c.String())
}


================================================
FILE: books/go/ch07/dependencyInjection.go
================================================
package main

import (
	"errors"
	"fmt"
	"net/http"
)

func LogOutput(message string) {
	fmt.Println(message)
}

type SimpleDataStore struct {
	userData map[string]string
}

func (sds SimpleDataStore) UserNameForId(userID string) (string, bool) {
	name, ok := sds.userData[userID]
	return name, ok
}

func NewSimpleDataStore() SimpleDataStore {
	return SimpleDataStore{
		userData: map[string]string{
			"1": "Fred",
			"2": "Mary",
			"3": "Pat",
		},
	}
}

type DataStore interface {
	UserNameForId(userID string) (string, bool)
}

type Logger interface {
	Log(message string)
}

type LoggerAdapter func(message string)

func (lg LoggerAdapter) Log(message string) {
	lg(message)
}

type SimpleLogic struct {
	l  Logger
	ds DataStore
}

func (sl SimpleLogic) SayHello(userID string) (string, error) {
	sl.l.Log("in SayHello for " + userID)
	name, ok := sl.ds.UserNameForId(userID)
	if !ok {
		return "", errors.New("unknown user")
	}
	return "Hello, " + name, nil
}

func (sl SimpleLogic) SayGoodbye(userID string) (string, error) {
	sl.l.Log("in SayGoodbye for " + userID)
	name, ok := sl.ds.UserNameForId(userID)
	if !ok {
		return "", errors.New("unknown user")
	}
	return "Goodbye, " + name, nil
}

func NewSimpleLogic(l Logger, ds DataStore) SimpleLogic {
	return SimpleLogic{
		l:  l,
		ds: ds,
	}
}

type MyLogic interface {
	SayHello(userID string) (string, error)
}

type Controller struct {
	l     Logger
	logic MyLogic
}

func (c Controller) SayHello(w http.ResponseWriter, r *http.Request) {
	c.l.Log("In SayHello")
	userID := r.URL.Query().Get("user_id")
	message, err := c.logic.SayHello(userID)
	if err != nil {
		w.WriteHeader(http.StatusBadRequest)
		w.Write([]byte(err.Error()))
		return
	}
	w.Write([]byte(message))
}

func NewController(l Logger, logic MyLogic) Controller {
	return Controller{
		l:     l,
		logic: logic,
	}
}

func main() {
	l := LoggerAdapter(LogOutput)
	ds := NewSimpleDataStore()
	logic := NewSimpleLogic(l, ds)
	c := NewController(l, logic)
	http.HandleFunc("/hello", c.SayHello)
	http.ListenAndServe(":8080", nil)
}


================================================
FILE: books/go/ch07/embedding.go
================================================
package main

import "fmt"

type Employee struct {
	Name string
	ID   string
}

func (e Employee) Description() string {
	return fmt.Sprintf("%s (%s)", e.Name, e.ID)
}

type Manager struct {
	Employee
	Reports []Employee
}

func main() {
	m := Manager{
		Employee: Employee{
			Name: "Bob Bobson",
			ID:   "12345",
		},
		Reports: []Employee{},
	}

	fmt.Println(m.ID)
	fmt.Println(m.Description())
}


================================================
FILE: books/go/ch07/intTree.go
================================================
package main

import "log"

type IntTree struct {
	val         int
	left, right *IntTree
}

func (it *IntTree) Insert(val int) *IntTree {
	if it == nil {
		return &IntTree{val: val}
	}

	if val < it.val {
		it.left = it.left.Insert(val)
	} else if val > it.val {
		it.right = it.right.Insert(val)
	}

	return it
}

func (it *IntTree) Contains(val int) bool {
	switch {
	case it == nil:
		return false
	case val < it.val:
		return it.left.Contains(val)
	case val > it.val:
		return it.right.Contains(val)
	default:
		return true
	}
}

func main() {
	var it *IntTree
	it = it.Insert(5) // calling methods on a nil receiver
	it = it.Insert(3)
	it = it.Insert(10)
	it = it.Insert(2)

	log.Println(it.Contains(2))
	log.Println(it.Contains(12))
}


================================================
FILE: books/go/ch07/interfaces.go
================================================
package main

import "fmt"

type LogicProvider struct{}

func (lp LogicProvider) Process(data string) string {
	return data
}

type Logic interface {
	Process(data string) string
}

type Client struct {
	L Logic
}

func (c Client) Program() {
	data := "whatever"
	c.L.Process(data)
}

func main() {
	c := Client{L: LogicProvider{}}
	c.Program()

	var i interface{}
	i = 1
	i = "a"
	fmt.Println(i)
}


================================================
FILE: books/go/ch07/iota.go
================================================
package main

type MailCategory int

const (
	Uncategorized MailCategory = iota
	Personal
	Spam
	Social
	Ads
)


================================================
FILE: books/go/ch07/types.go
================================================
package main

import "fmt"

type Person struct {
	FirstName string
	LastName  string
	Age       int
}

type King Person // this is not an inheritance

func (p Person) String() string {
	return fmt.Sprintf("%s %s, age %d", p.FirstName, p.LastName, p.Age)
}

type Score int
type Converter func(string) Score
type TeamScore map[string]Score

func main() {
	p := Person{
		FirstName: "Fred",
		LastName:  "Fredson",
		Age:       52,
	}
	fmt.Println(p.String())
}


================================================
FILE: books/go/ch08/customErrors.go
================================================
package main

type Status int

const (
	InvalidLogin Status = iota + 1
	NotFound
)

type StatusErr struct {
	Status  Status
	Message string
	err     error
}

func (se StatusErr) Error() string {
	return se.Message
}

func (se StatusErr) Unwrap() error {
	return se.err
}


================================================
FILE: books/go/ch08/errors.go
================================================
package main

import (
	"errors"
	"fmt"
	"os"
)

func calcRemainderAndMod(numerator, denominator int) (int, int, error) {
	if denominator == 0 {
		return 0, 0, errors.New("denominator is 0")
	}
	return numerator / denominator, numerator % denominator, nil
}

func main() {
	numerator := 20
	denominator := 3
	remainder, mod, err := calcRemainderAndMod(numerator, denominator)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	fmt.Println(remainder, mod)
}


================================================
FILE: books/go/ch08/panic.go
================================================
package main

func doPanic(msg string) {
	panic(msg)
}

func main() {
	doPanic("ERR")
}


================================================
FILE: books/go/ch08/recover.go
================================================
package main

import "fmt"

func div60(i int) {
	defer func() {
		if v := recover(); v != nil {
			fmt.Println(v)
		}
	}()
	fmt.Println(60 / i)
}

func main() {
	for _, val := range []int{1, 2, 0, 6} {
		div60(val)
	}
}


================================================
FILE: books/go/ch08/sentinel.go
================================================
package main

import (
	"archive/zip"
	"bytes"
	"fmt"
)

type Sentinel string

func (s Sentinel) Error() string {
	return string(s)
}

const (
	ErrFoo = Sentinel("foo err")
	ErrBar = Sentinel("bar err")
)

func main() {
	data := []byte("This is not a zip file")
	notZipFile := bytes.NewReader(data)
	_, err := zip.NewReader(notZipFile, int64(len(data)))
	if err == zip.ErrFormat {
		fmt.Println("Told you so")
	}
}


================================================
FILE: books/go/ch08/wrappingErrors.go
================================================
package main

import (
	"errors"
	"fmt"
	"os"
)

func fileChecker(name string) error {
	f, err := os.Open(name)
	if err != nil {
		return fmt.Errorf("in fileChecker: %w", err) // %w wraps the error
		//return fmt.Errorf("in fileChecker: %v", err) // %v does not wrap the error
	}
	f.Close()
	return nil
}

func main() {
	err := fileChecker("not_here.txt")
	if err != nil {
		fmt.Println(err)
		if wrappedErr := errors.Unwrap(err); wrappedErr != nil {
			fmt.Println(wrappedErr)
		}
	}
}


================================================
FILE: books/go/ch09/formatter/formatter.go
================================================
package print

import "fmt"

func Format(num int) string {
	return fmt.Sprintf("The number is %d", num)
}


================================================
FILE: books/go/ch09/main.go
================================================
package main

import (
	"./formatter"
	"./math"
	"fmt"
)

func main() {
	num := math.Double(2)
	output := print.Format(num)
	fmt.Println(output)
}


================================================
FILE: books/go/ch09/math/math.go
================================================
package math

func Double(a int) int {
	return a * 2
}


================================================
FILE: books/go/ch10/deadlock.go
================================================
package main

import "fmt"

func main() {
	ch1 := make(chan int)
	ch2 := make(chan int)

	go func() {
		v := 1
		ch1 <- v
		v2 := <-ch2
		fmt.Println(v2)
	}()

	v := 2
	ch2 <- v
	v2 := <-ch1
	fmt.Println(v, v2)
}


================================================
FILE: books/go/ch10/deadlockSolution.go
================================================
package main

import "fmt"

func main() {
	ch1 := make(chan int)
	ch2 := make(chan int)

	go func() {
		v := 1
		ch1 <- v
		v2 := <-ch2
		fmt.Println(v2)
	}()

	v := 2
	var v2 int

	select {
	case ch2 <- v:
	case v2 = <-ch1:
	}

	fmt.Println(v, v2)
}


================================================
FILE: books/go/ch10/goroutinesExample.go
================================================
package main

func process(val int) int {
	return val * 2
}

func runThingConcurrently(in <-chan int, out chan<- int) {
	go func() {
		for val := range in {
			result := process(val)
			out <- result
		}
	}()
}


================================================
FILE: books/go/notes.md
================================================
[go back](https://github.com/pkardas/learning)

# Learning Go: An Idiomatic Approach to Real-World Go Programming

Book by Jon Bodner

Code here: [click](.)

- [Chapter 1: Setting Up Your Go Environment](#chapter-1-setting-up-your-go-environment)
- [Chapter 2: Primitive Types and Declarations](#chapter-2-primitive-types-and-declarations)
- [Chapter 3: Composite Types](#chapter-3-composite-types)
- [Chapter 4: Blocks, Shadows, and Control Structures](#chapter-4-blocks-shadows-and-control-structures)
- [Chapter 5: Functions](#chapter-5-functions)
- [Chapter 6: Pointers](#chapter-6-pointers)
- [Chapter 7: Types, Methods, and Interfaces](#chapter-7-types-methods-and-interfaces)
- [Chapter 8: Errors](#chapter-8-errors)
- [Chapter 9: Modules, Packages, and Imports](#chapter-9-modules-packages-and-imports)
- [Chapter 10: Concurrency in Go](#chapter-10-concurrency-in-go)
- [Chapter 11: The Standard Library](#chapter-11-the-standard-library)
- [Chapter 12: The Context](#chapter-12-the-context)
- [Chapter 13: Writing Tests](#chapter-13-writing-tests)
- [Chapter 14: Here There Be Dragons: Reflect, Unsafe, and Cgo](#chapter-14-here-there-be-dragons-reflect-unsafe-and-cgo)
- [Chapter 15: A Look at the Future: Generics in Go](#chapter-15-a-look-at-the-future-generics-in-go)

## Chapter 1: Setting Up Your Go Environment

Go is intended for building programs that last, programs that are modified by dozens of developers over dozens of years.
Using Go correctly requires an understanding of how its features are intended to fit together. You can write code that
looks like Java or Python, but you are going to be unhappy with the result.

> $ brew install go

Validate that your env is set up correctly: `go version`

There have been several changes in how Go developers organize their code and their dependencies. For modern Go
development, the rules is simple: **you are free to organize your projects as you see fit**. However, Go still expects
there to be a single workspace (default `$HOME/go`) for third-party Go tools installed via `go install`. You can use
this default or set `$GOPATH` env variable.

Add following lines to `.zshrc`:

```
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
```

Use `go run` when you want to treat a Go program like a script and run the source code immediately. `go run` builds the
binary in a temporary directory, and the deletes the binary after your program finishes. Useful for testing out small
programs or using Go like a scripting language.

Use `go build` to create a binary that is distributed for other people to use. Most of the time, this is what you want
to do. Use the `-o` flag to give the binary a different name or location.

Go programs can be also built from source and installed into your Go work-space via `go install link@version`. Go
developers don't rely on a centrally hosted service (Maven, PyPI, NPM, ...). Instead they share projects via their
source code repositories. If you already installed a tool and want to update it to a newer version, rerun `go install`
with the newer version specified after `@`.

Developers have historically wasted extraordinary amounts of time on format wars. Go defines a standard way of
formatting code, Go developers avoid arguments over code styling. Go developers expect code to look in a certain way and
follow certain rules, and if your code does not, it sticks out.

`go fmt` automatically reformat code to match the standard format.

Go requires a semicolon at the end of every statement. However, Go developers never put the semicolons by themselves;
the Go compiler does it for them.

`go vet` detects things like: passing the wrong number of parameters to formatting methods or assigning values to
variables that are never used.

Make `golint` and `go vet part of your development process to avoid common bugs and non-idiomatic code.

An IDE is nice to use, but it is hard to automate. Modern software development relies on repeatable, automatable builds
that can be run by anyone, anywhere, at any time. Go developers have adopted `make` as their solution.

You can use different Go versions:

```
go get golang.org/dl/go.1.15.6
go.1.15.6 download
go.1.15.6 build
```

In order to update Go version globally on your computer use regular `brew` commands.

## Chapter 2: Primitive Types and Declarations

When trying to figure out what "best" means, there is one overriding principle: write your programs in a way that makes
your intention clear.

LITERAL - in Go refers to writing out a number, character, or string.

- integer literals
    - sequences of numbers, normally base 10, but different prefixes are used to indicate other bases (`0b` binary, `0o`
      octal, `0x` hexadecimal).
    - put underscores in the middle of your literal, use them to improve readability, e.g. `120_000_000`
- floating point literals - they can also have an exponent specified with the letter `e` and a positive or negative
  number, e.g. `6.03e23`
- rune literals - characters surrounded by single quotes, in Go `"` and `'` are _not_ interchangeable.
- string literals - two different ways to create:
    - interpreted string literal (") zero or more rune literals
    - raw string literal (`) can contain any literal character except a backquote
    - strings in Go are immutable

Literals in Go are untyped - they can interact with any variable that is compatible with the literal.

BOOLEAN - `true` or `false`, variable definition defaults to `false`. Go doesn't allow truthiness - e.g. positive
integer can not be treated as `true`.

INTEGER TYPES - 12 different types, more than other languages. 3 rules to follow:

1. If you are working with a binary format or network protocol that has an integer of a specific size or sign, use
   corresponding integer type.
2. If you are writing a library function that should work with any integer type, write a pair of functions, one for
   `int64`, and the other for `uint64`. You can see this pattern in std library (ParseInt/ParseUint, ...)
3. In all other cases, just use `int`.

FLOATING POINT - `float64` is the default type, simples option is to use this type. Don't worry about memory usage,
unless you have used the profiler to determine it is a significant source of problems.

A floating point number cannot represent a decimal value exactly. Do not use them to represent money or any other value
that must have an exact decimal representation.

Go stores floats using IEEE 754 standard. 64 bits for the sign, 11 bits for the exponent, 52 bits to represent mantissa.

Go doesn't allow automatic type promotion, as a language that values clarity of intent and readability. It turns out
that the rules to properly convert one type to another can get complicated and produce unexpected results. You must use
type conversion.

Variable declaration. Go has multiple ways of declaring a variable, because each declaration style communicates
something about how the variable is used.

- `var x int = 10`
- `var x = 10`
- `var x int` - will default to 0
- `var z, y int = 10, 20`
- `var x, y = 10, "hello"`
- `var(...)` - declaration list
- `x := 10`

The most common declaration style within functions is `:=`. Outside a function, use declaration lists. Sometimes you
need to avoid `:=`:

1. When initializing a variable to its zero value, use `var x int`. This makes it clear that the zero is intended.
2. Because `:=` allows assigning to new and existing variables, it is confusing if you use new or existing variable.
   Declare all new variables with `var`, and then use assignment operator (`=`) to both new and old variables.
3. When you need to convert type during assignment, use `var x byte = 20`, not `x := byte(20)`.

Go allows Unicode characters and letters in the variable name. However, don't use this feature.

Naming:

- use `camelCase`, even for constant vars
- use single letters for e.g. loops: `k`, `v` are common names for `key`, `value`; `i` for `integer`, ...
- do not put type in the variable name
- use short names, they remove repetitive typing and force you to write smaller blocks of code (if you need a complete
  name to keep track of it, it is likely that your block of code does too much)

## Chapter 3: Composite Types

ARRAYS - rarely used in Go. All the elements in the array must be of the type that is specified.

```go
var x [3] int
var x [3] int{10, 20, 30}
var x = [12]int{1, 5: 4}  // Sparse array (most elements are set to zero value)
var x = [...]int{12, 20, 30}
```

Arrays are rarely used in go because they come with an unusual limitations:

- _size_ of the array is part of the _type_, `[3]int` has different type than `[4]int`, you can't use a variable to
  specify the size of an array
- you can't use a type conversion to convert arrays of different sizes to identical types

Don't use arrays unless you know the exact length you need ahead of time. Arrays in Go exist to provide backing stores
for SLICES.

SLICES - slices remove limitations of arrays. We can write a single function that processes slices of any size. We can
also grow slices as needed.

Slice definition:

```go
var x = []int{12, 20, 30}
```

Using `[...]` makes an array. Using `[]` makes a slice.

`nil` in Go has no type, can be assigned or compared against values of different types.

Built-in functions:

- `len` - `len(nil)`
- `append` - `x = append(x, 10, 20, 30)`, `x = append(x, y...)` (`...` used to expand the source slice)
- `cap` - `cap(v)` - returns the current capacity of a slice
- `make` - `x := make([]int, 5)` - it allows us to specify the type, length, and optionally, the capacity
- `copy` - `numberOfElementsCopied := copy(destination, source)` - if you need to create a copy that is independent of
  the original

Go is _Call by value_ - every time you pass a parameter to a function, Go makes a copy of the value that is passed in.

When a slice grows via `append`, Go increases a slice by more than a one when it runs out of capacity. Doubles the size
when the size of the capacity is less than 1024 and then grow by at least 25% afterward.

`make` and `append` is a preferred way of declaring slices.

Slicing: `[startingOffset: endingOffset]`. In Go when you take a slice from a slice, you are not making a copy of the
data, Instead you have 2 variables that are sharing two variables. Avoid modifying slices after they have been sliced or
if they were produced by slicing. Use the full slice expression to prevent `append` from sharing capacity between
slices (`x[:2:2]`, `x[2:4:4]`). The last position indicates the last position in the parent slice's capacity that is
available for the subslice. Subtract the starting offset from this number to get the subslice's capacity.

Array can be converted to Slice by using slicing expression.

Go allows us to use slicing notation to make substrings. Be very careful when doing so. Strings are immutable, they
don't have modification problem BUT a string is composed of _bytes_, a code point in UTF-8 can be anywhere from one to
four bytes long. When dealing with languages other than English or with emojis, you run into code points that are
multiple bytes long.

UTF-8 is very clever, in worst case uses 4 bytes, in best case only one. The only downside is that you cannot randomly
access a string encoded with UTF-8.

MAPS - dictionary/hash map. Declaration `map[keyType]valueType`.

- maps automatically grow as you add key-value pairs
- is you know how many key-value pairs you plan to insert into a map, you can use `make` to create a map with specific
  initial size
- passing a _map_ to the _len_ function tells you the number of key-value pairs in a _map_
- the zero value for a map is nil
- maps are not comparable

Go doesn't allow you to define your own hash algorithm.

Comma ok idiom - boolean value, if ok is true - key is present, ok is false - key is not present.

- `delete` - `delete(m, key)` (remove key-value pair from the map)

Go does not include sets, but you can use a map to simulate some of its features. Set simulation:

```go
set := map[int]bool{}
```

If you need sets that provide operations like union, intersection, and subtraction - write one yourself or use 3rd-party
library.

STRUCT - when you have related data that you want to group together.

```
type person struct {
    name string
    age  int
    pet  string
}
julia := person{
    "Julia",
    30,
    "cat",
}
beth := person{
    name: "Beth",
}
```

Anonymous struct - without giving it a name first:

```
var person struct {
    name string
    age  int
    pet  string
}
person.name = "Bob"
```

Whether struct is comparable depends on struct's fields. Structs that are entirely composed of comparable types are
comparable, those with slice or map fields are not. Unlike Python, there are no methods that can be overridden to
redefine equality.

Go allows you to perform a type conversion from one struct to another _if the fields of both structs have the same
names, order, and types_.

## Chapter 4: Blocks, Shadows, and Control Structures

BLOCKS - Go lets you declare variables in lots of places. You can declare them outside of functions, as the parameters
to functions, and as local variables within functions.

Each place where a declaration occurs is called a _block_. Variables, constants, types and functions declared outside
any functions are placed in the package module.

`:=` reuses variables that are declared in the current block. When using `:=` make sure that you don't have any
variables from an outer scope on the left-hand side, unless you intend to shadow them.

Sometimes avoid using `:=` because it may make it unclear what variables are being used.

There is a `shadow` linter - a tool to detect shadowing.

The Universe Block - the block that contains all other blocks. Never redefine any of the identifiers in the universe
block (`true`, `false`, `string`, `int`, ...). If you accidentally do so, you will get some very strange behavior.

IF - Go doesn't require you to put parenthesis around the condition. You can declare variables that are scoped to the
condition and to both the `if` and `else` blocks.

```go
if n := rand.Intn(10); n == 10
```

Having this special scope is very handy, it lets you create variables that are available only where they are needed.
Once the series of `if/else` statements ends, `n` is undefined.

FOR - Go has 4 formats of `for`.

- C-style `for`
- condition only `for`
- infinite `for`
- `for-range`

When iterating over `map`, some runs may be identical. This is a security feature. In older Go versions, the iteration
order was usually the same. People used to write code that the order was fixed, and this would break at weird times.
Random read, prevents _Hash DoS_ attack.

When iterating over a string with `for-range` loop, it iterates over the runes, not the bytes. Whenever a `for-range`
loop encounters a multibyte rune in a string, it converts the UTF-8 representation into a single 32-nit number and
assigns it to the value.

Every time the `for-loop` iterates over your compound type, it copies the value from the compound type to the value
variable.

SWITCH - like an `if` statement, you can declare a variable that is scoped to all the branches of the switch statement.

If you have a `switch` statement inside a `for loop`, and you want to break out of the `for loop`, put a label on
the `for` statement, and then do `break label`. If you don't use a label, Go assumes that you want to break out of the
case.

You can create a "blank switch" - this allows you to use any boolean comparison for each case. There isn't a lot of
difference between a series of `if/else` statements and a blank `switch`. Favor blank `switch` statements over `if/else`
chains when you have multiple related cases. Using a `switch` makes the comparisons more visible and reinforces that
they are a related set of concerns.

GOTO - Traditionally `goto` was dangerous because it could jump to nearly anywhere in a program (jump into/out of a
loop, skip variable definitions, or into the middle of a set of statements in `if`). This made it difficult to
understand what a goto-using program did.

Go has a `goto` statement (most modern languages don't). You should still do what you can to avoid using it. Go forbids
jumps that skip over variable declarations and jumps that go into an inner or parallel block.

## Chapter 5: Functions

`main` - the starting point for every Go program.

Go is a typed language, so you must specify the types of parameters. If a function returns a value, you must supply a
return.

Go doesn't have named and optional input parameters. If you want to emulate named and optional parameters, define a
struct that has fields that match the desired parameters, and pass the struct to your function.

Not having named and optional parameters isn't a limitation. A function shouldn't have more than a few parameters, and
named and optional parameters are mostly useful when a function has many inputs. If you find yourself in that situation,
your function is quite possibly too complicated.

Variadic input - `func addTo(base int, vals ... int)` - must be the last parameter in the input parameter list.

Go allows for multiple return values - `def divAndRemainder(numerator int, denominator int) (int, int, error)`. You can
pre-declare variables that you use within function to hold the return
values: `def divAndRemainder(numerator int, denominator int) (result int, remainder int, err error)`. Name that is used
for a named returned value is local to the function - it doesn't enforce any name outside the function.

If you use named return values, you can use empty/blank/naked return - never use it. This returns the last values
assigned to the named return values. It can be really confusing to figure out what value is actually returned.

Use `_` whenever you don't need to read a value that is returned by a function.

Just like in many other languages, functions in Go are values. Any function that has the exact same number and types of
parameters and return values meets the type signature.

Anonymous functions - they don't have a name. You don't have to assign them to a variable. You can write them inline and
call them immediately.

Functions declared inside functions are called _closures_. This is a computer science word that means that functions
declared inside of functions are able to access and modify variables declared in the outer function.

Not only you can use a closure to pass some function state to another function, you can also return a closure from a
function.

`defer` - used to release resources. Programs often create temporary resources, like files or network connections, that
need to be cleaned up. You can `defer` multiple closures in a Go function. They run last-in-first-out order - the last
defer registered runs first.

In Go, _defer_ statements delay the execution of the function or method or an anonymous method until the nearby
functions returns. In other words, defer function or method call arguments evaluate instantly, but they don't execute
until the nearby functions returns.

A common pattern in Go is for a function that allocates a resource to also return a closure that cleans up the resource.

Empirical Software Engineering:
> Of... eleven proposed characteristics, only two markedly influence complexity growth: the nesting depth and the lack
> of structure.

Go is _Call By Value_ - it means that when you supply a variable for a parameter to a function, Go always makes a copy
of the value of the variable. Every type in Go is a value type. It is just that sometimes the value is pointer (map,
slice).

## Chapter 6: Pointers

A pointer - a variable that holds the location in memory where a value is stored. Every variable is stored in one or
more contiguous memory locations - _addresses_.

- `&` - the _address_ operator, returns the address of the memory location where the value is stored.
- `*` - the _indirection_ operator, returns pointed-to value.

Example pointer **type**: `*int`

Before de-referencing a pointer, you must make sure that the pointer is non-nil. Your program will panic if you attempt
to de-reference a _nil_ pointer.

Java, Python, JavaScript, and Ruby are pass-by-value (values passed to functions are copies) - just like Go. Every
instance of a class in these languages is implemented as a pointer. When a class instance is passed to a function or
method, the value being copied is the pointer to the instance.

> Immutable types are safer from bugs, easier to understand, and more ready for change. Mutability makes it harder to
> understand what your program is doing, and much harder ro enforce contracts.

The lack of immutable declarations in Go might seem problematic, but the ability to choose between value and pointer
parameter types addresses the issue.

Be careful when using pointers in Go. They make it hard to understand data flow anc can create extra work for the
garbage collector. Rather than populating a struct by passing a pointer to it to function, have the function instantiate
the struct. The only time you should use pointer params to modify a variable is when the function expects an interface,
You see this pattern when working with JSON.

The time to pass a pointer into a function ~ 1ns. Passing a value into a function takes longer as the data gets larger,
1ms for ~10MB data. So if data is large enough, there are performance benefits from using a pointer. On the other hand
it does not pay off to use a pointer for small data (< 1MB), e.g. 100 byte data: 30ns (pointer) vs 10ns (copy value).

Pointers indicate mutability - be careful when using this pattern.

Avoid using maps for input or return values (map is implemented as a pointer to a struct). Rather than passing map
around, use a struct. Passing a slice to a function has even more complicated behavior: any modification to the contents
is reflected, but use of _append_ is not reflected. As the only linear data structure, slices are often passed around in
Go programs - by default you should assume that a slice is not modified by a function.

Garbage - data that has no more pointers pointing to it. Once there are no more pointers pointing to some data, the
memory can be reused. If the memory isn't recovered, the program's memory usage would continue to grow until the
computer run out of RAM. The job of a garbage collector is to automatically detect unused memory and recover it.

The Stack - consecutive block of memory, allocation fast and simple, local variables along parameters passed into a
function stored on a stack. You have to know exactly how big it is at compile time. When the compiler determines that
the data can't be stored on the stack, the data the pointer points to _escapes_ the stack and the compiler stores the
data on the heap.

The Heap - memory managed by the garbage collector. Go's garbage collector favours lower latency (< 500ms, finish as
quickly as possible) over throughput (find the most garbage possible in a single scan). If your program creates a lot of
garbage, the garbage collector will not find all the garbage during a cycle, slowing down the collector and increasing
memory usage.

Go encourages you to use pointers sparingly. We reduce the workload of the garbage collector by making sure that as much
as possible is stored on the stack.

## Chapter 7: Types, Methods, and Interfaces

Go is designed to encourage the best practices that are advocated by software engineers, avoiding inheritance while
encouraging composition.

Methods: `func (p Person) String() string`, `(p Person)` is like `self` or `this`, however it is non-idiomatic to
use `self` or `this`. This is called a _receiver_, usually should have a short name. Methods can not be overloaded. You
can't add methods to the types you don't control.

- If method modifies the receiver, you _must_ use a pointer receiver
- If method needs to handle _nil_ instances, you _must_ use a pointer receiver
- If method doesn't modify the receiver, you can _use_ a value receiver

When a type has any pointer receiver methods, a common practice is to be consistent and use pointer receivers for all
methods, even the ones that don't modify the receiver.

Do not write getters/setters. Go encourages you to directly access a field. Reserve methods for business logic.

Defining a user-defined type based on other type, makes code clearer by providing a name for a concept and describing
the kind of data that is expected (e.g. type `Percentage` vs `int`).

Go doesn't have enumerations, instead it has `iota` - which allows you to assign an increasing value to a set of
constants. `iota` makes sense when you care about being able to differentiate between a set of values, and don't
particularly care what the value is behind the scenes. If the actual value matters, specify it explicitly.

Embedding - promote methods on the embedded type to the containing struct. Embedding support is rare in programming
languages. Do not mislead embedding with inheritance, they are not the same. If the containing struct has fields/methods
with the same name, you need to use embedded field type to refer to the obscured fields/methods.

The real star of Go's design - implicit interfaces. `interface` literal lists all methods that must be implemented by a
concrete type to meet the interface. Interfaces are usually named with `er` endings (`io.Reader`, `io.Closer`
, `json.Marshaller`, `http.Handler`).

Go blends duck-typing and Java's interfaces. Implicit interfaces give the flexibility of changing implementation and
make it easier to understand whe the code is doing.

> Interfaces specify what callers need. The client code defines the interface to specify what functionality it requires.

**Accept interfaces, return structs.** The business logic invoked by your functions should be invoked via interfaces,
but the output of your functions should be a concrete type. Go encourages small interfaces.

Sometimes you need to say that a variable could store any value, Go uses `interface{}` to represent this. It matches
every type in Go. However, avoid this. Go was designed as a strongly typed language and attempts to work around this are
unidiomatic.

Dependency injection - code should explicitly specify the functionality it needs to perform its task. Implicit
interfaces make dependency injection an excellent way to decouple your code.

> "Dependency Injection" is a 25-dollar term for a 5-cent concept. [...] Dependency injection means giving an object its
> instance variables. [...].

> Dependency injection is basically providing the objects that an object needs (its dependencies) instead of having it
> construct them itself. It's a very useful technique for testing, since it allows dependencies to be mocked or stubbed
> out.

Use `Wire` if you think writing dependency injection code by hand is too much work.

Go is not Object-Oriented, nor functional, nor procedural. It is practical. It borrows concepts from many places with
the overriding goal of creating a language that is simple, readable, and maintainable by large teams for many years.

## Chapter 8: Errors

Go handles errors by returning a value of type `error` as the last return value for a function (convention). The Go
compiler requires that all variables must be read. Making errors returned values forces developers to either check and
handle error conditions or make it explicit that they are ignoring errors by using an underscore (`_`) for the returned
error value.

`errors.New("denominator is 0")` - error messages should not be capitalized nor should they end with punctuation or new
line. Second option is to create error using `fmt.Errorf("denominator is 0")`

_sentinel errors_ - pattern, errors meant to signal that processing cannot continue due to a problem with the current
state. By convention, their names start with `Err`. Be sure you need a sentinel error beg=fore you define one. It is
part of your public API and you have committed to it being available in all future backward-compatible releases.

`error` is an interface, you can define your own errors that include additional information for logging or error
handling. Even when you define your own custom error types, always use `error` as the return type for the error result.
Be sure you don't return an uninitialized instance (`var genErr StatusErr`), instead, explicitly return `nil`.

_Wrapping the error_ - when you preserve an error while adding additional information. When you have a series of wrapped
errors, it is called an _error chain_. You don't usually call `errors.Unwrap` directly. Instead, you use `errors.Is`
and `errors.As` to find specific wrapped error. If you want to wrap an error with your custom error type, your error
type needs to implement the `Unwrap` method.

- `errors.Is` - to check if the returned error or any error that it wraps match a specific sentinel error instance
- `errors.As` - allows you to check if a returned error (or any error it wraps) matches a specific type

If there are situations in your programs that are unrecoverable, you can create your own panics. Go provides a way to
capture a panic to provide a more graceful shutdown or to prevent shutdown at all. Reserve `panic` for fatal situations
use `recover` as a way to gracefully handle these situations. If program panics, be careful about trying to continue
executing after the panic.

## Chapter 9: Modules, Packages, and Imports

A module is the root of a Go library or application, stored in a repository. Modules consist one or more packages, which
give the module organization and structure.

A collection of Go source code becomes a module when there is a valid `go.mod` file in its root directory
-- `go mod init MODULE_PATH`. `MODULE_PATH` - globally unique name that identifies your module (e.g. github link).

Go uses capitalization to determine if a package-level identifier is visible outside the package where it is declared.
Anything you export is part of your package's API. Be sure you want to expose certain things to clients. Document all
exported identifiers and keep the backward-compatible.

As a general rule, make the name of the package match the name of the directory that contains the package. Package names
should be descriptive. Don't repeat name in a function and package (`extract.Names` > `extract.ExtractNames`).

If your code is small -- kep it in a single package. Introduce packages ac codebase grows.

In case of conflicting names, you can alias an import (`import crand "crypto/rand`). Usage of `.` (imports all
identifiers into the current package's namespace) is discouraged -- like usage of `*` in Python.

Go has its own format form writing comments that are automatically converted into documentation -- `godoc` format. Place
the documentation directly above the item being documented. Start the comment with the name of the item. Use a blank
comment to break comment into multiple paragraphs. Use indenting.

`go doc PACKAGE_NAME.IDENTIFIER_NAME` - views `godoc`.

When you create a package called `internal`, the exported identifiers are only accessible to the direct parent of
internal and the sibling packages of `internal`.

You might want to rename or move some identifiers -- to avoid backward-breaking change, don't remove the original
identifiers, provide an alternate name instead (`type Bar = Foo`).

SemVer - semantic versioning: _major_._minor_._patch_:

- `patch` - incremented when fixing a bug
- `minor` - incremented when a new, backward-compatible feature is added
- `major` - incremented when making a change that breaks backward compatibility

The import compatibility rule says that all minor and patch versions of a module must be backward-compatible. If they
aren't it is a bug.

`pkg.go.dev` - a single service that gathers together documentation of Go modules.

## Chapter 10: Concurrency in Go

Concurrency - the CS term for breaking up a single process into independent components and specifying how these
components safely share data. Most languages provide concurrency via a library that uses OS-level threads that share
data by attempting to acquire locks. Go is different, and is based on Communicating Sequential Processes.

_Concurrency is not parallelism._ Concurrency is a tool to better structure the problem you are solving - whether
concurrent code runs in parallel depends on the hardware and if the algorithm allows it.

Whether you should use concurrency depends on how data flows through the steps in your program. Concurrency isn't free,
it may come with a huge overhead. That's why concurrent code is used for I/O -- a lot of waiting, we can do different
times in the meantime.

`goroutine` - the core concept in GO's concurrency model. Lightweight processes, managed by the Go runtime. Faster to
create than thread creation (no system-level resources). Small initial stack size, smaller than thread stack -- grows as
needed. Switching between _goroutines_ is faster because it happens within the process.

- process - an instance of a program that is being run
- threads - a process is composed of one or more threads, a thread is a unit of execution that is given some time to run
  by the OS, threads within a process share resources

Go is able to spawn even tens of thousands of simultaneous _goroutines_. Any function can be launched as a _goroutine_.

Goroutines communicate using _channels_ (`ch := manke(chan int)`) - channels are reference types. Use `<-` to interact
with a channel (read `<-chan`, write `chan<-`). Each value written to a channel can be read once. If multiple goroutines
are reading from the same channel, a value will be read by only of them.

By default, channels are unbuffered - every write to an open, unbuffered channel causes the writing goroutine to pause
until another goroutine reads from the same channel. Buffered channels (`ch := make(chan int, 10)`) - these channels
buffer a limited number of writes without blocking. Most of the time, use unbuffered channels.

Any time you are reading from a channel that might be closed, use the comma ok idiom to ensure that the chanel is stil
open.

`select` - the control structure for concurrency in Go, solves _starvation_ problem. Checks if any of its cases can be
processed, the deadlock is avoided. Select is often embedded within a for-loop.

Concurrency practices and patterns:

1. Keep your APIS concurrency-free - never export channels or mutexes in your API.
2. Goroutines, for Loops, and Varying Variables - any time goroutine uses a variable whose value might change, pass the
   current value of the variable into the goroutine.
3. Always clean up your goroutines - make sure that it will eventually exit. If a goroutine doesn't exit, the scheduler
   will periodically give it time to do nothing.
4. The Done Channel Pattern - provides a way to signal a goroutine that it's time to stop processing. It uses a channel
   to signal that it is time to exit.
5. Using a cancel function to terminate a goroutine - return a cancellation function alongside the channel.
6. When to use buffered and unbuffered channels - buffered channels are useful when you know how many goroutines you
   have launched, want to limit the number of goroutines you will launch, or want to limit the amount of work that is
   queued up.
7. Backpressure - systems perform better when their components limit the amount of work they are willing to perform. We
   can use buffered channel and a select statement to limit the number of simultaneous requests in a system.
8. Turning off a case in a select - if one of the cases in a _select_ is reading a closed channel, it will always be
   successful. Use a `nil` channel to disable a case, set the channel's variable to `nil` and then `continue`.
9. How to time out code - use `case <- time.After(2 * time.Second):`.
10. Using WaitGroups - sometime some goroutine needs to wait for multiple goroutines to complete their work. If you are
    waiting for a single goroutine, you can use the done channel pattern that we saw earlier. But if you are waiting gon
    several goroutines, you need to use a `WaitGroup`.
11. Running code exactly once - `sync.Once` - a handy type that enables this functionality.
12. Putting our concurrent tools together - by structuring our code with goroutines, channels and select statements, we
    separate the individual parts to run and complete in any order and cleanly exchange data between the dependant
    parts.

`mutex` - mutual exclusion, the job of a mutex is to limit the concurrent execution of some code or access to a shared
piece of data.This protected part is called the _critical section_.

> Share memory by communicating, do not communicate by sharing memory.

Decision tree - use channels or mutexes:

- If you are coordinating goroutines or tracking a value as it is transformed by a series of goroutines, use channels
- If you are sharing access to a field in a struct, use mutexes
- If you discover a critical performance issue when using channels, and you cannot find any other way to fix the issue,
  modify your code to use a mutex

## Chapter 11: The Standard Library

Like Python, Go has "batteries included" philosophy - it provides many of the tools that you need to build an
application.

`io` - contains one of the most useful interfaces - `io.Writer` and `io.Reader`.

`time` - two main types used to represent time - `time.Duration` (used to represent a period of time,
e.g.: `2 * time.Hour`) and `time.Time` (used to represent a moment of time). It is possible to extract month, day, year,
... from `Time`.

Most OS keep track of two different sorts of time:

- the wall clock - current time
- monotonic clock - counts up from the time the computer was booted

`encoding/json` - Go includes support for converting Go data types to and from JSON.

- marshalling - Go data type -> encoding
- unmarshalling - encoding -> Go data type

We specify the rules for processing our JSON with _struct tags_, strings that are written after the fields in a
struct (`tagName: "tagValue"`, e.g.: `json:"id"`).

`net/http` - a production of quality HTTP/2 client and server.

- Client - make HTTP requests and receive HTTP responses
- Server - responsible for listening for HTTP requests

Even though Go provide the server, use idiomatic third-party modules to enhance the server.

## Chapter 12: The Context

Servers need a way to handle metadata on individual requests. Go uses a construct called the context.

Context - an instance that meets the Context interface. An empty context is a starting point: each time you add metadata
to the context, you do so by wrapping the existing context using one of the factory functions in the context package.

Cancellation - a request that spawns several goroutines, each one calling a different HTTP service. If one service
returns an error that prevents you from returning a valid response, there is no point in continuing to process the other
goroutines. In go this is called _cancellation_.

There are 4 things a server can do to manage its load:

- Limit simultaneous requests
- Limit how many requests are queued waiting to run
- Limit how long a request can run
- Limit the resources a request can use

Go provides tools to handle the first three - first two -> limit number of goroutines, the context provides a way to
control how long a request runs.

The context provides a way to pass per-request metadata through your program.

## Chapter 13: Writing Tests

Go includes testing support as part of its standard library. The `testing` package provides the types anf unctions to
write tests, while the `go test` tool runs your tests and generates reports.

Go tests are placed in the same directory and the same package as the production code. Tests are able to access and test
un-exported functions and variables. If you want to test just the public API, Go has a special convention for this.
Use `packagename_test` for the package name.

Every test written in a file whose name ends with `_test.go`. Test functions start with the word `Test` and take in a
single parameter of type `*testing.T`.

It is possible to write set-up and tear-down code.

Use `go-cmp` (third-party module) in order to compare two instances of a compound type.

Adding the `-cover` flag to the `go test` command calculates coverage information and includes a summary in the test
output. `-coverprofile=c.out` saves the coverage infor to a file. `-html=c.out` generates an HTML representation of your
source code coverage.

> Code coverage is necessary, but it is not sufficient. You can have 100% code coverage and still have bugs in your
> code.

> When your code depends on abstractions, it is easier to write unit tests.

A stub returns a canned value for given output, whereas a mock validates that a set of calls happen in the expected
order with the expected inputs.

`httpest` package to make it easier to stub HTTP services. Even though `httptest` provides a way to avoid testing
external services, you should still write _integration_ tests - automated tests that connect to other services. These
validate your understanding of the service's APIs is correct.The challenge is figuring out how to group your automated
tests - you want to run integration tests when the support environment is present. Also, integration tests tend to be
slower than unit tests, so they usually run less frequently.

Go includes a _race checker_ - it helps to find accidental references to a variable from two different goroutines
without acquiring a lock. It is not guaranteed to find every single data race in your code, but if it finds one, you
should put proper locks around what it finds. Do not solve race conditions by inserting "sleeps" into the code.

## Chapter 14: Here There Be Dragons: Reflect, Unsafe, and Cgo

Go is a safe language, but sometimes your Go programs need to venture out into less defined areas.

Reflection allows us to examine types at runtime. It also provides the ability to examine, modify, and create variables,
functions, and structs at runtime.

- `database/sql` - uses reflection to send requests to databases and read data back
- `text/template` and `html/template` - use reflection to process the values that are passed to the templates
- `fmt` - uses reflection to detect the type of the provided parameters
- `errors` - uses reflection to implement `errors.Is` and `errors.As`
- `sort` - uses reflection to implement functions that sort and evaluate slices of any type

Most of these examples have one thing in common - they involve accessing and formatting data that is being imported into
or exported out of Go program.
`reflect` package is build around 3 core concepts:

- `types` - `reflect.TypeOf` returns a value of type `reflect.Type`, which represents the type of variable passed into
  the function
- `kinds` - `Kind` method on `reflect.Type` returns a value of type `reflect.Kind`, which is a constant that says what
  the type is made of - a slice, a map, a pointer, a struct, an interface, an array, a function, an int, ...
- `values` - we can use `reflec.ValueOf` to create a `reflect.Value` instance that represents the value of a variable

Other use cases:

- use reflection to check if an interface's value is nil
- use reflection to write a data marshaller
- use reflection to automate repetitive tasks, e.g. create a new function without writing repetitive code

While reflection is essential when converting data at the boundaries of Go, be careful using it in other situations.

`unsafe` - allows to manipulate memory. Very small and very odd. There are 2 common patterns in `unsafe` code:

- conversion between 2 types of variables that are normally not convertable
- reading/modifying the bytes in a variable

The majority of _unsafe_ usages were motivated by integration with operating systems and C code. Developers also
frequently use _unsafe_ to write more efficient Go code.

The _unsafe_ package is powerful and low-level! Avoid using it unless you know what you are doing, and you need the
performance improvements that it provides.

Nearly every programming language provides a way to integrate with C libraries. Go calls its FFI (Foreign Function
Interface) to C `cgo`. `cgo` is for integration, not performance. `cgo` isn't fast, and it is not easy to use for
nontrivial programs, the only reason to use `cgo` is if there is a C library that you must use and there is no suitable
Go replacement.

## Chapter 15: A Look at the Future: Generics in Go

Generics reduce repetitive code and increase type safety. Generics is the concept that it is sometimes useful to write
functions where the specific type of parameter or field is specified when it is used.

Many common algorithms, such as map, reduce, and filter had to be reimplemented for different types.

> Properly written, Go is boring... well-written Go programs tend to be straightforward and sometimes a bit repetitive.


================================================
FILE: books/hands-on-ml.md
================================================
[go back](https://github.com/pkardas/learning)

# Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow: Concepts, Tools, and Techniques to Build Intelligent Systems

Book by Aurelien Geron

[TOC]

TODO: *Re-read Part I.*

## Chapter 10: Introduction to Artificial Neural Networks with Keras

ANNs - Artificial Neural Networks - inspired by the networks of biological neurons, have gradually become quite
different from their biological cousins.

ANNs introduced in 1943 by McCulloch and Pitts - a simplified computational model of how biological neurons might work
together in animal brains to perform complex computation using propositional logic.

McCulloch and Pitts proposed an artificial neuron that has one or more binary inputs (on/off) and one binary output. The
artificial neuron activates its output when more than a certain number of its inputs are active. They showed that even
such simplified model is possible capable of performing various logical computations.

The Perceptron - is one of the simplest ANN architectures, invented in 1957. It is based on slightly different
artificial neuron - threshold logic unit or linear threshold unit. The inputs and outputs are numbers (instead of binary
on/off values) and each input connection is associated with a weight. The TLU computes a weighted sum of its inputs,
then applies a step function to that sum and outputs the result. Most commonly used step function is the Heaviside step
function.

A single TLU can be used for simple linear binary classification.

A perceptron is composed of a single layer of TLUs, each TLU connected to all inputs (when all the neurons are connected
to every neuron in the previous layer, the layer is called a fully connected layer). The inputs of the Perceptron are
fed to special passthrough neurons from the input layer. Extra bias feature is generally added (neuron that always
output 1). A Perceptron with 2 inputs and three outputs can classify instances simultaneously into three different
binary classes - multi-output classifier.

How perceptron is trained? "Cells that fire together, wire together" - the connection between weight between 2 neurons
tend to increase when they fire simultaneously (Hebb's rule). The perceptron is fed one example at a time, when it
outputs wrong answer, it reinforces the connection weights from the inputs that would have contributed to the correct
answer.

In fact single Perceptron is similar to SDGClassifier.

Back-propagation training algorithm - it is Gradient Descent using an efficient technique for computing the gradients
automatically in just 2 passes through network - one forward, one backward. It can find out how each connection weight
and each bias term should be tweaked in order to reduce the error.

In other words: for each training instance, the back propagation algorithm first makes a prediction (forward pass) and
measures the error, then goes through each layer in reverse to measure the error contribution from each connection (
reverse pass) and finally tweaks the connection weights to reduce error (Gradient Descent step).

When building MLP for regression you don't want to use any activation function for the output neurons, so they are free
to output any range of values. If output needs to be always positive ReLU can be used in the output layer.

The loss function to use during training is typically the mean squared error, but if there are many outliers in
the training set, mean absolute error might be a better choice.

MLP can be used also for classification.

Tensorflow 2 adopted Keras' high-level API + introduced some additional functionalities.

**Sequential API** - the simplest kind of Keras model for neural networks that are just composed of a single stack of layers
connected sequentially. Flatten - preprocessing layer whose role is to convert each input into 1D array. Once model is
defined it needs to be compiled - you need to specify loss function and optimiser to use, optionally list of metrics can
be passed. Then model can be trained.

If the training set is very skewed, with some classes being overrepresented and others underrepresented, it would be
useful to set the class_weight argument when calling the fit method.

If you are not satisfied with model's performance - adjust hyperparametrs if longer training is not bringing any
additional benefits.

Model estimates probabilities per class.

When layers are created they are called like functions -> `keras.layers.Dense(30)(prev_layer)` - This is why it is
called the **Functional API**, this is the way of telling Keras how to join layers.

A model can have multiple inputs and multiple outputs, depending on the task.

Sequential API and Functional API are declarative, for more declarative programming style is **Subclassing API**. Simply
subclass the `Model` class, create layers in the constructor and use them to perform computations in the `call` method.
Subclassing API is very limited, it does not allow viewing model's summary, also Keras na not inspect the model ahead of
time. So Sequential and Functional APIs are preferred.

It is possible to save and load Keras model to/from disk. Keras will use HDF5 format to save model's architecture and
all the values of all the model parameters for every layer (weights and bias). When training enormous model, it is a
good idea to save checkpoints at regular intervals during training to avoid loosing everything if computer crashes. In
order to make checkpoints you have to use callbacks.

 
================================================
FILE: books/head-first-design-patterns/ch_01_strategy.py
================================================
class FlyBehavior:
    def fly(self) -> None:
        raise NotImplementedError


class QuackBehavior:
    def quack(self) -> None:
        raise NotImplementedError


class Duck:
    def __init__(self, fly_behavior: FlyBehavior, quack_behavior: QuackBehavior) -> None:
        self.fly_behavior = fly_behavior
        self.quack_behavior = quack_behavior

    def perform_fly(self) -> None:
        self.fly_behavior.fly()

    def perform_quack(self) -> None:
        self.quack_behavior.quack()

    def display(self) -> None:
        raise NotImplementedError


class FlyWithWings(FlyBehavior):
    def fly(self) -> None:
        print("I am using wings!")


class FlyNoWay(FlyBehavior):
    def fly(self) -> None:
        print("I am not flying.")


class Quack(QuackBehavior):
    def quack(self) -> None:
        print("QUACK")


class Squeak(QuackBehavior):
    def quack(self) -> None:
        print("SQUEAK")


class MuteQuack(QuackBehavior):
    def quack(self) -> None:
        print("<SILENCE>")


class MallardDuck(Duck):
    def __init__(self) -> None:
        super().__init__(FlyWithWings(), Quack())

    def display(self) -> None:
        print("Looks like a mallard.")


duck = MallardDuck()
duck.display()
duck.perform_fly()
duck.perform_quack()


================================================
FILE: books/head-first-design-patterns/ch_02_observer.py
================================================
class Observer:
    def update(self) -> None:
        raise NotImplementedError


class Subject:
    def register_observer(self, observer: Observer) -> None:
        raise NotImplementedError

    def remove_observer(self, observer: Observer) -> None:
        raise NotImplementedError

    def notify_observers(self) -> None:
        raise NotImplementedError


class DisplayElement:
    def display(self) -> None:
        raise NotImplementedError


class WeatherData(Subject):
    def __init__(self):
        self._observers = []
        self.temperature = 0.0
        self.humidity = 0.0
        self.pressure = 0.0

    def register_observer(self, observer: Observer) -> None:
        self._observers.append(observer)

    def remove_observer(self, observer: Observer) -> None:
        self._observers.remove(observer)

    def notify_observers(self) -> None:
        for observer in self._observers:
            observer.update()

    def set_measurements(self, temperature: float, humidity: float, pressure: float) -> None:
        self.temperature = temperature
        self.humidity = humidity
        self.pressure = pressure
        self.notify_observers()


class CurrentConditionsDisplay(Observer, DisplayElement):
    def __init__(self, weather_data: WeatherData):
        self._temperature = 0.0
        self._humidity = 0.0
        self._weather_data = weather_data
        self._weather_data.register_observer(self)

    def display(self) -> None:
        print(f"Current conditions: {self._temperature}°C, {self._humidity}%")

    def update(self) -> None:
        self._temperature = self._weather_data.temperature
        self._humidity = self._weather_data.humidity
        self.display()


class AvgTempDisplay(Observer, DisplayElement):
    def __init__(self, weather_data: WeatherData):
        self._temperature = []
        self._weather_data = weather_data
        self._weather_data.register_observer(self)

    def display(self) -> None:
        print(f"Average temperature: {sum(self._temperature) / len(self._temperature)}°C")

    def update(self) -> None:
        self._temperature.append(self._weather_data.temperature)
        self.display()


weather_data = WeatherData()
current_display = CurrentConditionsDisplay(weather_data)
forecast_display = AvgTempDisplay(weather_data)

weather_data.set_measurements(23.0, 68.1, 1018.0)
weather_data.set_measurements(24.2, 70.4, 1019.2)
weather_data.set_measurements(25.8, 71.2, 1018.4)


================================================
FILE: books/head-first-design-patterns/ch_03_decorator.py
================================================
class Beverage:
    @property
    def description(self) -> str:
        return self.__class__.__name__

    @property
    def cost(self) -> float:
        raise NotImplementedError


class CondimentDecorator(Beverage):
    def __init__(self, beverage: Beverage):
        self._beverage = beverage

    @property
    def description(self) -> str:
        return f"{self._beverage.description}, {super(CondimentDecorator, self).description}"

    @property
    def cost(self) -> float:
        raise NotImplementedError


class Espresso(Beverage):
    @property
    def cost(self) -> float:
        return 1.99


class HouseBlend(Beverage):
    @property
    def cost(self) -> float:
        return 0.89


class Mocha(CondimentDecorator):
    @property
    def cost(self) -> float:
        return self._beverage.cost + 0.20


class Soy(CondimentDecorator):
    @property
    def cost(self) -> float:
        return self._beverage.cost + 0.15


beverage = Espresso()
beverage = Mocha(beverage)
beverage = Mocha(beverage)
beverage = Soy(beverage)
print(f"${beverage.cost} for '{beverage.description}'")


================================================
FILE: books/head-first-design-patterns/ch_04_factory.py
================================================
class Ingredient:
    def __init__(self):
        print(self.__class__.__name__)


class ThinCrustDough(Ingredient):
    pass


class ThickCrustDough(Ingredient):
    pass


class MarinaraSauce(Ingredient):
    pass


class PlumTomatoSauce(Ingredient):
    pass


class MozzarellaCheese(Ingredient):
    pass


class ReggianoCheese(Ingredient):
    pass


class Garlic(Ingredient):
    pass


class Onion(Ingredient):
    pass


class Mushroom(Ingredient):
    pass


class SlicedPepperoni(Ingredient):
    pass


class FreshClams(Ingredient):
    pass


class FrozenClams(Ingredient):
    pass


class PizzaIngredientFactory:
    def create_dough(self):
        raise NotImplementedError

    def create_sauce(self):
        raise NotImplementedError

    def create_cheese(self):
        raise NotImplementedError

    def create_veggies(self):
        raise NotImplementedError

    def create_pepperoni(self):
        raise NotImplementedError

    def create_clam(self):
        raise NotImplementedError


class NYPizzaIngredientFactory(PizzaIngredientFactory):
    def create_dough(self):
        return ThinCrustDough()

    def create_sauce(self):
        return MarinaraSauce()

    def create_cheese(self):
        return ReggianoCheese()

    def create_veggies(self):
        return [Garlic(), Onion()]

    def create_pepperoni(self):
        return SlicedPepperoni()

    def create_clam(self):
        return FreshClams()


class ChicagoPizzaIngredientFactory(PizzaIngredientFactory):
    def create_dough(self):
        return ThickCrustDough()

    def create_sauce(self):
        return PlumTomatoSauce()

    def create_cheese(self):
        return MozzarellaCheese()

    def create_veggies(self):
        return [Garlic(), Mushroom()]

    def create_pepperoni(self):
        return SlicedPepperoni()

    def create_clam(self):
        return FrozenClams()


class Pizza:
    name = ...

    def __init__(self, ingredient_factory: PizzaIngredientFactory):
        self._ingredient_factory = ingredient_factory

    def prepare(self) -> None:
        raise NotImplementedError

    def bake(self) -> None:
        print("Bake for 25 minutes at 350")

    def cut(self) -> None:
        print("Cutting the pizza into diagonal slices")

    def box(self) -> None:
        print("Place the pizza in official PizzaStore box")


class CheesePizza(Pizza):
    def prepare(self) -> None:
        print(f"Preparing {self.name}")
        self._ingredient_factory.create_dough()
        self._ingredient_factory.create_sauce()
        self._ingredient_factory.create_cheese()


class ClamPizza(Pizza):
    def prepare(self) -> None:
        print(f"Preparing {self.name}")
        self._ingredient_factory.create_dough()
        self._ingredient_factory.create_sauce()
        self._ingredient_factory.create_cheese()
        self._ingredient_factory.create_clam()


class PizzaStore:
    def order_pizza(self, pizza_type: str) -> Pizza:
        pizza = self.create_pizza(pizza_type)

        pizza.prepare()
        pizza.bake()
        pizza.cut()
        pizza.box()

        return pizza

    # Factory Method:
    def create_pizza(self, pizza_type: str) -> Pizza:
        raise NotImplementedError


class NYPizzaStore(PizzaStore):
    def create_pizza(self, pizza_type: str) -> Pizza:
        ingredient_factory = NYPizzaIngredientFactory()

        match pizza_type:
            case "cheese":
                pizza = CheesePizza(ingredient_factory)
                pizza.name = "NY Style Sauce and Cheese Pizza"
            case "clam":
                pizza = ClamPizza(ingredient_factory)
                pizza.name = "NY Style Sauce and Clam Pizza"
            case _:
                raise RuntimeError("Unknown pizza type")

        return pizza


class ChicagoPizzaStore(PizzaStore):
    def create_pizza(self, pizza_type: str) -> Pizza:
        ingredient_factory = ChicagoPizzaIngredientFactory()

        match pizza_type:
            case "cheese":
                pizza = CheesePizza(ingredient_factory)
                pizza.name = "Chicago Style Deep Dish Cheese Pizza"
            case "clam":
                pizza = ClamPizza(ingredient_factory)
                pizza.name = "Chicago Style Deep Dish Clam Pizza"
            case _:
                raise RuntimeError("Unknown pizza type")

        return pizza


ny_store = NYPizzaStore()
ny_store.order_pizza("cheese")

chicago_store = ChicagoPizzaStore()
chicago_store.order_pizza("cheese")


================================================
FILE: books/head-first-design-patterns/ch_05_singleton.py
================================================
class ChocolateBoiler:
    _instance = None

    def __new__(cls):
        if not cls._instance:
            cls._instance = super(ChocolateBoiler, cls).__new__(cls)
        return cls._instance


boiler_0 = ChocolateBoiler()
boiler_1 = ChocolateBoiler()

print(f"#0: {boiler_0}")
print(f"#1: {boiler_1}")
print(f"Are they the same object? {boiler_0 is boiler_1}")


# Implementation using variable - instantiated on module import:
class ChocolateBoiler:
    pass


chocolate_boiler = ChocolateBoiler()
print(f"Are they the same object? {chocolate_boiler is chocolate_boiler}")


# Implementation using function - using 'attr':
def get_chocolate_boiler() -> ChocolateBoiler:
    if not hasattr(get_chocolate_boiler, "instance"):
        setattr(get_chocolate_boiler, "instance", ChocolateBoiler())
    return getattr(get_chocolate_boiler, "instance")


print(f"Are they the same object? {get_chocolate_boiler() is get_chocolate_boiler()}")

# Implementation using function - using variable:
_chocolate_boiler = None


def get_chocolate_boiler() -> ChocolateBoiler:
    global _chocolate_boiler

    if not _chocolate_boiler:
        _chocolate_boiler = ChocolateBoiler()

    return _chocolate_boiler


print(f"Are they the same object? {get_chocolate_boiler() is get_chocolate_boiler()}")


================================================
FILE: books/head-first-design-patterns/ch_06_command.py
================================================
from typing import List


class Device:
    @property
    def name(self) -> str:
        return self.__class__.__name__

    def on(self) -> None:
        print(f"{self.name} was turned on")

    def off(self) -> None:
        print(f"{self.name} was turned off")


class Light(Device):
    pass


class Tv(Device):
    pass


class Stereo(Device):
    def __init__(self) -> None:
        self.volume = 0

    def set_cd(self) -> None:
        print(f"{self.name} CD set")

    def set_volume(self, volume: int) -> None:
        print(f"{self.name} Volume set to {volume}")
        self.volume = volume


class Command:
    def execute(self) -> None:
        raise NotImplementedError

    def undo(self) -> None:
        raise NotImplementedError


class NoCommand(Command):
    def execute(self) -> None:
        pass

    def undo(self) -> None:
        pass


class MarcoCommand(Command):
    def __init__(self, commands: List[Command]):
        self._commands = commands

    def execute(self) -> None:
        for command in self._commands:
            command.execute()

    def undo(self) -> None:
        for command in self._commands[::-1]:
            command.undo()


class DeviceOnCommand(Command):
    def __init__(self, device: Device) -> None:
        self._device = device

    def execute(self) -> None:
        self._device.on()

    def undo(self) -> None:
        self._device.off()


class DeviceOffCommand(Command):
    def __init__(self, device: Device) -> None:
        self._device = device

    def execute(self) -> None:
        self._device.off()

    def undo(self) -> None:
        self._device.on()


class StereoVolumeUpCommand(Command):
    def __init__(self, stereo: Stereo) -> None:
        self._stereo = stereo

    def execute(self) -> None:
        self._stereo.set_volume(stereo.volume + 1)

    def undo(self) -> None:
        self._stereo.set_volume(stereo.volume - 1)


class RemoteControl:
    def __init__(self):
        self._on_commands = [NoCommand()] * 7
        self._off_commands = [NoCommand()] * 7
        self._undo_commands = []

    def set_command(self, slot: int, on_command: Command, off_command: Command) -> None:
        self._on_commands[slot] = on_command
        self._off_commands[slot] = off_command

    def on_button_pushed(self, slot: int) -> None:
        self._on_commands[slot].execute()
        self._undo_commands.append(self._on_commands[slot])

    def off_button_pushed(self, slot: int) -> None:
        self._off_commands[slot].execute()
        self._undo_commands.append(self._off_commands[slot])

    def undo_button_pushed(self) -> None:
        if not self._undo_commands:
            return
        self._undo_commands.pop().undo()


light = Light()
tv = Tv()
stereo = Stereo()

light_on_command, light_off_command = DeviceOnCommand(light), DeviceOffCommand(light)
tv_on_command, tv_off_command = DeviceOnCommand(tv), DeviceOffCommand(tv)
stereo_on_command, stereo_off_command = DeviceOnCommand(stereo), DeviceOffCommand(stereo)

volume_up_command = StereoVolumeUpCommand(stereo)

party_on_command = MarcoCommand([light_on_command, tv_on_command, stereo_on_command, volume_up_command])
party_off_command = MarcoCommand([light_on_command, tv_on_command, stereo_off_command])

remote = RemoteControl()
remote.set_command(0, light_on_command, light_off_command)
remote.set_command(1, tv_on_command, tv_off_command)
remote.set_command(2, stereo_on_command, stereo_off_command)
remote.set_command(3, party_on_command, party_off_command)

remote.on_button_pushed(1)
remote.on_button_pushed(3)
remote.undo_button_pushed()


================================================
FILE: books/head-first-design-patterns/ch_07_adapter.py
================================================
class Duck:
    def quack(self) -> None:
        raise NotImplementedError

    def fly(self) -> None:
        raise NotImplementedError


class Turkey:
    def gobble(self) -> None:
        raise NotImplementedError

    def fly(self) -> None:
        raise NotImplementedError


class WildTurkey(Turkey):
    def gobble(self) -> None:
        print("Gobble Gobble")

    def fly(self) -> None:
        print("I am flying a short distance")


class TurkeyAdapter(Duck):
    def __init__(self, turkey: Turkey):
        self._turkey = turkey

    def quack(self) -> None:
        self._turkey.gobble()

    def fly(self) -> None:
        self._turkey.fly()


# We ran out of ducks, so we use turkeys:
turkey = WildTurkey()
turkey_adapter = TurkeyAdapter(turkey)

turkey_adapter.quack()


================================================
FILE: books/head-first-design-patterns/ch_07_facade.py
================================================
from unittest.mock import Mock


class HomeTheaterFacade:
    def __init__(self, amplifier, tuner, projector, lights, screen, player, popper):
        self._amplifier = amplifier
        self._tuner = tuner
        self._projector = projector
        self._lights = lights
        self._screen = screen
        self._player = player
        self._popper = popper

    # Wrap complex behavior into single method:
    def watch_movie(self, movie):
        self._popper.on()
        self._popper.pop()

        self._lights.dim(10)

        self._screen.down()

        self._projector.on()

        self._amplifier.on()
        self._amplifier.set_volume(20)

        self._player.on()
        self._player.play(movie)


home_theater = HomeTheaterFacade(*([Mock()] * 7))
home_theater.watch_movie("Joker")


================================================
FILE: books/head-first-design-patterns/ch_08_template_method.py
================================================
class CaffeineBeverage:
    def prepare_recipe(self) -> None:
        self._boil_water()
        self._brew()
        self._pour_in_cup()
        self._add_condiments()

    def _boil_water(self) -> None:
        print("Boiling water")

    def _pour_in_cup(self) -> None:
        print("Pouring in a cup")

    def _brew(self) -> None:
        raise NotImplementedError

    def _add_condiments(self) -> None:
        raise NotImplementedError


class Tea(CaffeineBeverage):
    def _brew(self) -> None:
        print("Steeping the tea")

    def _add_condiments(self) -> None:
        print("Adding Lemon")


class Coffee(CaffeineBeverage):
    def _brew(self) -> None:
        print("Dripping Coffee through filter")

    def _add_condiments(self) -> None:
        print("Adding Sugar and Milk")


Coffee().prepare_recipe()
Tea().prepare_recipe()


================================================
FILE: books/head-first-design-patterns/ch_09_composite.py
================================================
from __future__ import annotations

from abc import ABC
from dataclasses import dataclass


class MenuComponent:
    def add(self, menu_component: MenuComponent):
        raise NotImplementedError

    def remove(self, menu_component: MenuComponent):
        raise NotImplementedError

    def get_child(self, i: int):
        raise NotImplementedError

    def print(self):
        raise NotImplementedError


@dataclass
class MenuItem(MenuComponent, ABC):
    name: str
    description: str
    vegetarian: bool
    price: float

    def print(self):
        print(f"{self.name}, {self.price}, {self.description}")


class Menu(MenuComponent):
    def __init__(self, name: str):
        self._name = name
        self._menu_components = []

    def add(self, menu_component: MenuComponent):
        self._menu_components.append(menu_component)

    def remove(self, menu_component: MenuComponent):
        self._menu_components.remove(menu_component)

    def get_child(self, i: int):
        return self._menu_components[i]

    def print(self):
        print(self._name)
        for menu_component in self._menu_components:
            menu_component.print()


class Waitress:
    def __init__(self, menu_component: MenuComponent):
        self._menu_component = menu_component

    def print_menu(self):
        self._menu_component.print()


breakfast_menu = Menu("BREAKFAST")
dinner_menu = Menu("DINNER")
dessert_menu = Menu("DESSERT")

all_menus = Menu("ALL MENUS")
all_menus.add(breakfast_menu)
all_menus.add(dinner_menu)

dinner_menu.add(MenuItem("Pasta", "Pasta with marinara Sauce", True, 3.89))
dinner_menu.add(dessert_menu)

dessert_menu.add(MenuItem("Apple Pie", "Apple pie with a flaky crust, topped with vanilla ice cream", True, 1.59))

Waitress(all_menus).print_menu()


================================================
FILE: books/head-first-design-patterns/ch_09_iterator.py
================================================
from collections.abc import Iterator
from dataclasses import dataclass
from typing import (
    Dict,
    List,
    Union,
)


@dataclass
class MenuItem:
    name: str
    description: str
    vegetarian: bool
    price: float


class DinnerMenuIterator(Iterator):
    # Just for demonstration purposes!
    def __init__(self, collection: List[MenuItem]):
        self._collection = collection
        self._position = 0

    def __next__(self) -> MenuItem:
        try:
            value = self._collection[self._position]
            self._position += 1
        except IndexError:
            raise StopIteration()

        return value


class DinnerMenu:
    # Just for demonstration purposes!
    menu = [
        MenuItem("Vegetarian BLT", "Fake Bacon with lettuce on whole wheat", True, 2.99),
        MenuItem("BLT", "Bacon with lettuce on whole wheat", False, 2.99),
        MenuItem("Soup of the day", "Soup of the day, with a side of potato salad", False, 3.99),
        MenuItem("HotDog", "A Hot Dog with sauerkraut, relish, onions, topped with cheese", False, 3.05),
    ]

    def __iter__(self) -> DinnerMenuIterator:
        # Factory Method
        return DinnerMenuIterator(self.menu)


class BreakfastMenuIterator(Iterator):
    # Just for demonstration purposes!
    def __init__(self, collection: Dict[str, MenuItem]):
        self._collection = collection
        self._position = 0

    def __next__(self) -> MenuItem:
        try:
            value = list(self._collection.values())[self._position]
            self._position += 1
        except IndexError:
            raise StopIteration()

        return value


class BreakfastMenu:
    # Just for demonstration purposes!
    menu = {
        "K&B's Pancake Breakfast": MenuItem("K&B's Pancake Breakfast", "Pancakes with scrambled eggs and toast", True, 2.99),
        "Regular Pancake Breakfast": MenuItem("Regular Pancake Breakfast", "Pancakes with fried eggs, sausage", False, 2.99),
        "Blueberry Pancakes": MenuItem("Blueberry Pancakes", "Pancakes made with fresh blueberries", True, 3.49),
    }

    def __iter__(self) -> BreakfastMenuIterator:
        # Factory Method
        return BreakfastMenuIterator(self.menu)


class Waitress:
    def __init__(self, pancake_menu: BreakfastMenu, dinner_menu: DinnerMenu):
        self._pancake_menu = pancake_menu
        self._dinner_menu = dinner_menu

    def print_menu(self):
        print("BREAKFAST")
        self._print_menu(self._pancake_menu)
        print("DINNER")
        self._print_menu(self._dinner_menu)

    @staticmethod
    def _print_menu(menu: Union[BreakfastMenu, DinnerMenu]):
        for menu_item in menu:
            print(f"{menu_item.name}, {menu_item.price}, {menu_item.description}")


Waitress(BreakfastMenu(), DinnerMenu()).print_menu()


================================================
FILE: books/head-first-design-patterns/ch_10_state.py
================================================
from __future__ import annotations

from random import random


class State:
    def __init__(self, gumball_machine: GumballMachine):
        self._gumball_machine = gumball_machine

    def insert_quarter(self) -> None:
        pass

    def eject_quarter(self) -> None:
        pass

    def turn_crank(self) -> None:
        pass

    def dispense(self) -> None:
        pass


class NoQuarterState(State):
    def insert_quarter(self) -> None:
        print("You inserted a quarter")
        self._gumball_machine.state = self._gumball_machine.has_quarter_state


class HasQuarterState(State):
    def eject_quarter(self) -> None:
        print("Quarter returned")
        self._gumball_machine.state = self._gumball_machine.no_quarter_state

    def turn_crank(self) -> None:
        print("You turned...")

        if random() < 0.1 and self._gumball_machine.count > 1:
            self._gumball_machine.state = self._gumball_machine.winner_state
        else:
            self._gumball_machine.state = self._gumball_machine.sold_state


class SoldState(State):
    def dispense(self) -> None:
        self._gumball_machine.release_ball()

        if self._gumball_machine.count > 0:
            self._gumball_machine.state = self._gumball_machine.no_quarter_state
        else:
            print("Out of gumballs!")
            self._gumball_machine.state = self._gumball_machine.sold_out_state


class SoldOutState(State):
    pass


class WinnerState(State):
    def dispense(self) -> None:
        self._gumball_machine.release_ball()

        if self._gumball_machine.count == 0:
            self._gumball_machine.state = self._gumball_machine.sold_out_state
        else:
            self._gumball_machine.release_ball()
            print("You are a WINNER!")

            if self._gumball_machine.count > 0:
                self._gumball_machine.state = self._gumball_machine.no_quarter_state
            else:
                print("Out of gumballs!")
                self._gumball_machine.state = self._gumball_machine.sold_out_state


class GumballMachine:
    def __init__(self, count: int):
        self.count = count

        self.no_quarter_state = NoQuarterState(self)
        self.has_quarter_state = HasQuarterState(self)
        self.sold_state = SoldState(self)
        self.sold_out_state = SoldOutState(self)
        self.winner_state = WinnerState(self)

        self.state = self.no_quarter_state if count > 0 else self.sold_out_state

    def insert_quarter(self) -> None:
        self.state.insert_quarter()

    def eject_quarter(self) -> None:
        self.state.eject_quarter()

    def turn_crank(self) -> None:
        self.state.turn_crank()
        self.state.dispense()

    def release_ball(self) -> None:
        print("A ball rolling out the slot...")
        if self.count > 0:
            self.count = self.count - 1


machine = GumballMachine(5)

machine.insert_quarter()
machine.turn_crank()

machine.insert_quarter()
machine.turn_crank()

machine.insert_quarter()
machine.turn_crank()


================================================
FILE: books/head-first-design-patterns/ch_11_virtual_proxy.py
================================================
class Icon:
    @property
    def width(self) -> int:
        raise NotImplementedError

    @property
    def height(self) -> int:
        raise NotImplementedError

    def paint_icon(self) -> None:
        raise NotImplementedError


class ImageIcon(Icon):
    @property
    def width(self) -> int:
        return 1280

    @property
    def height(self) -> int:
        return 720

    def paint_icon(self) -> None:
        print(":)")


class ImageProxy(Icon):
    def __init__(self, url: str):
        self._image_icon = None
        self._url = url

    # Following 'if' statements can be reworked to use The State Pattern: ImageNotLoaded and ImageLoaded
    @property
    def width(self) -> int:
        return self._image_icon.width if self._image_icon else 600

    @property
    def height(self) -> int:
        return self._image_icon.height if self._image_icon else 800

    def paint_icon(self) -> None:
        if not self._image_icon:
            # Download image from the internet
            print(f"Downloading the image from '{self._url}'")
            self._image_icon = ImageIcon()
        self._image_icon.paint_icon()


image = ImageProxy("whatever://image")
image.paint_icon()


================================================
FILE: books/head-first-design-patterns/notes.md
================================================
[go back](https://github.com/pkardas/learning)

# Head First Design Patterns: Building Extensible and Maintainable Object-Oriented Software

Book by Eric Freeman and Elisabeth Robson

Code here: [click](.)

- [Chapter 1: The Strategy Pattern - Welcome to Design Patterns](#chapter-1-welcome-to-design-patterns)
- [Chapter 2: The Observer Pattern - Keeping your Objects in the Know](#chapter-2-keeping-your-objects-in-the-know)
- [Chapter 3: The Decorator Pattern - Decorating Objects](#chapter-3-decorating-objects)
- [Chapter 4: The Factory Pattern - Baking with OO Goodness](#chapter-4-baking-with-oo-goodness)
- [Chapter 5: The Singleton Pattern - One-of-a-kind Objects](#chapter-5-one-of-a-kind-objects)
- [Chapter 6: The Command Pattern - Encapsulating Invocation](#chapter-6-encapsulating-invocation)
- [Chapter 7: The Adapter and Facade Patterns - Being Adaptive](#chapter-7-being-adaptive)
- [Chapter 8: The Template Method Pattern - Encapsulating Algorithms](#chapter-8-encapsulating-algorithms)
- [Chapter 9: The Iterator and Composite Patterns - Well-Managed Collections](#chapter-9-well-managed-collections)
- [Chapter 10: The State Pattern - The State of Things](#chapter-10-the-state-of-things)
- [Chapter 11: The Proxy Pattern - Controlling Object Access](#chapter-11-controlling-object-access)
- [Chapter 12: Compound patterns - Patterns of Patterns](#chapter-12-patterns-of-patterns)
- [Chapter 13: Patterns in the Real World](#chapter-13-patterns-in-the-real-world)
- [Chapter 14: Appendix - Leftover Patterns](#chapter-14-leftover-patterns)

## Chapter 1: Welcome to Design Patterns

[The Strategy Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_01_strategy.py)

Someone has already solved your problems. You can exploit the wisdom and lessons learned by other developers who have
been down the same design problems road and survived the trip. Instead of code reuse, with patterns you get experience
reuse.

Example with ducks, adding `fly` method to the `Duck` superclass turned out to introduce a bug to the `RubberDuck`
subclass. A localised update to the code caused a non-local side effect (flying rubber-duck).

*Which of the following are disadvantages of using inheritance to provide Duck behaviour?*

- My answer: [D] It is hard to gain knowledge of all duck behaviours. [F] Changes can unintentionally affect other
  ducks.

*What do YOU think about the design? What would you do if you were Joe?*

- My answer: New features would require adding many interfaces, for example: interface for migrating birds. Maybe
  instead, it would be better to have 2 types of ducks: Living and non-living and instead of introducing a single class
  per duck, reuse classes and make them parametrised with a name.

There is one constant in software development. What is the one thing you can always count on in software development.
**CHANGE**. No matter how well you design an application, over time an application must grow and change, or it will die.

*List some reasons you have had to change code in your application*:

- New definition of the operations process.
- Better understanding of the domain.
- Requirement to use worker instead of lambda.
- New library for the JSON serialisation.

We know using inheritance hasn't worked out very well. The `Flyable` and `Quackable` interfaces sounded good at first.
There is a design principle:

> Identify the aspects of your application that vary and separate them from what stays the same.

Another way to think about this principle: *take the parts that vary and encapsulate them, so that later you can alter
or extend the parts that vary without affecting those that don't*.

We know that `fly` and `quack` are the parts of the Duck class that vary across ducks. We pull these methods out of the
Duck class and create a new set of classes to represent each behaviour (FlyBehaviour, QuackBehaviour, ...). That way,
the Duck classes won't need to know any of the implementation details for their own behaviours.

> Program to an interface, not an implementation. == Program to a supertype.

Programming to an implementation:

```java
Dog d = new Dog();  // a concrete implementation of Animal 
d.bark()
```

Programming to an interface/supertype:

```java
Animal animal = new Dog();  // we knwo it is a Dog, but we can now use the animal reference polymorphically
animal.makeSound();
```

*Using our new design, what would you do if you needed to add rocket-powered flying to the SimUDuck app?*

- My answer: Add a new implementation of the `FlyBehaviour`

*Can you think of a class that might want to use the Quack behaviour that isn't a duck?*

- My answer: Russian quacking machine

A Duck will now delegate its flying and quacking behaviours, instead of using quacking and flying methods defined in the
Duck class. To change a duck's behaviour at runtime, just call the duck's setter method for that behaviour.

Design principle:

> Favour composition over inheritance.

Creating systems using composition gives you a lot more flexibility. Not only does it let you encapsulate a family of
algorithms into their own set of classes, but it also lets you change behaviour at runtime. Composition Is used in many
design patterns, and you will see a lot more about its advantages and disadvantages throughout the book.

*A duck call is a device that hunters use to mimic the calls (quacks) of ducks. How would you implement your own duck
call that does not inherit from the Duck class?*

- My answer: Compose a duck call of `QuackBehaviour`.

I have just applied the **STRATEGY** pattern. **The Strategy Pattern** - defines a family of algorithms, encapsulates
each one, and makes them interchangeable. Strategy lets the algorithm vary independently of clients that use it.

Design puzzle:

- *KnifeBehaviour, BowAndArrowBehaviour, AxeBehaviour, SwordBehaviour* IMPLEMENT *WeaponBehaviour*
- *Troll, Queen, King, Knight* EXTENDS *Character*
- *Character* HAS-A *WeaponBehaviour*
- `setWeapon` should be in *Character* class

Design Patterns give you a shared vocabulary with other developers. Once you have got the vocabulary, you can more
easily communicate with other developers and inspire those who don't know patterns to start learning them. It also
elevates your thinking about architectures by letting you think at the pattern level, not the nitty-gritty object level.

The power of a shared pattern vocabulary:

- Shared pattern vocabularies are POWERFUL. When you communicate with another developer using patterns, you are
  communicating not just a pattern name but a whole set of qualities, characteristics and constraints that the pattern
  represents.
- Patterns allow you to say more with less. Other developers can quickly know precisely the design you have in mind.
- Talking at the pattern level allows you to stay *in the design* loner, without having to dive deep down to the
  nitty-gritty details of implementing objects and classes.
- Shared vocabularies can turbo-charge your team. A team well versed in design patterns can move quickly with less room
  for misunderstanding.
- Shared vocabularies encourage more junior developers to get up to speed.

Design patterns don't go directly into your code, they first go into your **brain**. Once you have loaded your brain
with a good working knowledge of patterns, you can then start to apply them to new designs, and rework your old code
when you find it is degrading into inflexible mess.

OO Basics: Abstraction, Encapsulation, Polymorphism, Inheritance

OO Principles: Encapsulate what varies. Favour composition over inheritance. Program to interfaces, not implementations.

Bullet points:

- Knowing the OO basics does not make you a good OO designer.
- Good OO designs are reusable, extensible and maintainable.
- Patterns show you how to build systems with good OO design qualities.
- Patterns are proven OO experience.
- Patterns don't give you code, they give you general solutions to design problems. You apply them to your specific
  application.
- Patterns aren't invented, they are discovered.
- Most patterns and principles address issues of change in software.
- Most patterns allow some part of a system to vary independently of all other parts.
- We often try to take what varies in a system and encapsulate it.
- Patterns provide language that can maximise the value of your communication with other developers.

## Chapter 2: Keeping your Objects in the Know

[The Observer Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_02_observer.py)

Observer Pattern: Pattern that keeps your objects in the know when something they care about happens.

Weather-O-Rama, our task is to create an app that uses the WeatherData object to update 3 displays for current
conditions weather stats and a forecast.

*Based on our first implementation, which of the following apply?*

- My answers: [A] We are coding to concrete implementations, not interfaces. [B] For every new display we will need to
  alter this code. [C] We have no way to add or remove display elements at runtime. [E] We haven't encapsulated the part
  that changes.

You know how newspaper or magazine subscriptions work:

1. A newspaper publisher goes into business and begins publishing newspapers
2. You subscribe to a particular publisher, and every time there is a new edition it gets delivered to you. As long as
   you remain a subscriber, you get new newspapers.
3. You unsubscribe when you don't want papers anymore, and they stop being delivered.
4. While the publisher remains in business, people, hotels, airlines and other businesses constantly subscribe and
   unsubscribe to the newspaper.

> Publishers + Subscribers = Observer Pattern

The Observer Pattern:

> Defines a one-to-many dependency between objects so that when one object changes state, all of its dependencies are
> notified and updated automatically.

There are few different ways to implement the Observer Pattern, but most revolve around a class design that includes
Subject and Observer interfaces.

Because the subject is the sole owner of the data, the observers are dependent on the subject to update them when the
data changes. This leads to a cleaner OO design than allowing many objects to control the same data.

**We say an object is tightly coupled to another object when it is too dependent on that object.** Loosely coupled
object doesn't know or care too much about the details of another object. By not knowing too much about other objects,
we can create designs that can handle change better. The Observer Pattern is a great example of loose coupling.

The ways the pattern achieves loose coupling:

1. The only thing the subject knows about an observer is that it implements a certain interface.
2. We can add new observers at any time.
3. We never need to modify the subject to add new types of observers.
4. We can reuse subjects or observers independently of each other.
5. Changes to either the subject or an observer will not affect the other.

Design principle:

> Strive for loosely coupled designs between objects that interact.

Loosely coupled designs allow us to build flexible systems that can handle change because they minimise the
interdependency between objects.

The Observer Pattern is one of the most common patterns in use, and you will find plenty of examples of the pattern
being used in many libraries and frameworks (Swing, JavaBeans, Cocoa, ...). Listener == Observer Pattern.

The Observer Pattern can be used for sending "notifications" so that observers can pull the data on their own.

Bullet points:

- The Observer Pattern defines a one-to-many relationship between objects.
- Subjects update Observers using a common interface.
- Observers of any concrete type can participate in the pattern as long they implement the Observer interface.
- Observers are loosely coupled in that the Subject knows nothing about them, other than that they implement the
  Observer interface.
- You can push or pull data from the Subject when using the pattern (pull is considered more correct).
- Swing makes heavy use of the Observer Pattern, as do many GUI frameworks.
- You will also find the pattern in many other places including RxJava, JavaBeans and RMI, as well as in other language
  frameworks, like Cocoa, Swift and JavaScript events.
- The Observer Pattern is related to the Publish / Subscribe Pattern, which is for more complex situations with multiple
  Subjects and or / multiple message types.
- The Observer Pattern is a commonly used pattern, and we will see it again when we learn about Model-View-Controller.

*For each design principle, describe how the Observer Pattern makes use of the principle:*

- Identify the aspects of your application that vary and separate them from what stays the same: Observers and data
  vary.
- Program to an interface, not an implementation: Subject and Observers are loosely coupled because what they know about
  each other are the interfaces they implement.
- Favour composition over inheritance: Subject holds a list of observers, observers hold a reference to the subject.

## Chapter 3: Decorating Objects

[The Decorator Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_03_decorator.py)

We will re-examine the typical overuse of inheritance, and we will learn how to decorate classes at runtime using a form
of object composition.

Starbuzz system has created a maintenance nightmare for themselves. They are violating "*Identify the aspects of your
application that vary and separate them from what stays the same" and "*Favour composition over inheritance*".

Problems with the suggested design:

- My answer: What if customer has promo coupon e.g -20%. What if condiment is not available.

If I can extend an objects' behaviour through composition, then I can do this dynamically at runtime. When I inherit by
subclassing that behaviour is set statically at compile time. By dynamically composing objects, I can add new
functionality by writing new code, rather than altering existing code. Because I am not changing existing code, the
changes of introducing bugs or causing unintended side effects in pre-existing code are much reduced. Code should be
closed to change, yet open to extension.

Design principle - one of the most important design principles:

> Classes should be open for extension, but closed for modification.

OPEN - if it needs or requirements change, just go and make your own extensions. CLOSED - we spent a lot of time getting
this code correct and bug free, so we can't let you alter the existing code. It must remain closed to modification.

Our goal is to allow classes to be easily extended to incorporate new behaviour without modifying existing code. Designs
that are resilient to change and flexible enough to take on new functionality to meet changing requirements. E.g. The
Observer Pattern - we can add new Observers and extend the Subject at any time.

Many of the patterns give us time-tested designs that protect your code from being modified by supplying a means of
extension.

How can I make every part of my design follow the Open-Closed Principle? Usually you can't. Making OO design flexible
and open to extension without modifying existing code takes time and effort.

Applying the Open-Closed principle EVERYWHERE is wasteful and unnecessary, and can lead to complex, hard-to-understand
code.

The Decorator Pattern:

> Attaches additional responsibilities to an object dynamically. Decorators provide a flexible alternative to
> subclassing for extending functionality.

The decorator adds its own behaviour before and / or after delegating to the object it decorates to do the rest of the
job.

Just because we are subclassing, it doesn't mean we use inheritance. Sometimes we are subclassing in order to have the
correct type, not to inherit the behaviour. We can acquire new behaviour not by inheriting it from a superclass, but by
composing objects together.

Decorators are typically created using other patterns like Factory and Builder.

`java.io` is largely based on Decorator. Java I/O also points out one of the downsides of the Decorator Pattern: designs
using this pattern often result in a large number of small classes, that can be overwhelming to the developer trying to
use the Decorator-based API.

Bullet points:

- Inheritance is one form of extension, but not necessarily the best way to achieve flexibility in our designs.
- In our designs we should allow behaviour to be extended without the need to modify existing code.
- Composition and delegation is often used to add new behaviours at runtime.
- The Decorator Pattern an alternative to subclassing for extending behaviour.
- The Decorator Pattern involves a set of decorator patterns that are used to wrap concrete components.
- Decorator classes mirror the type of the components they decorate (in fact they are the same type as the components
  they decorate, either through inheritance or interface implementation).
- Decorators change the behaviour of their components by adding new functionality before and / or after method calls to
  the component.
- You can wrap a component with any number of decorators.
- Decorators are typically transparent to the client of the component - that is, unless the client is relying on the
  component's concrete type.
- Decorators can result in many small objects in our design, and overuse can be complex.

## Chapter 4: Baking with OO Goodness

[The Factory Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_04_factory.py)

There is more to making objects than just using the *new* operator. We will learn that instantiation is an activity that
shouldn't always be done in public and can often lead to coupling problems. The Factory Pattern can save us from
embarrassing dependencies.

We should not program to an implementation, but every time we use *new* that is exactly what we do. The *new* operator
instantiating a concrete class, so that is definitely an implementation not an interface.

CHANGE impacts our use of *new*. Code will have to be changed new concrete classes are added.

*How might you take all the parts of your application that instantiate concrete classes and separate or encapsulate them
from the rest of your application?*

- My answer: I would add a function returning instantiated classes.

Indeed, we can encapsulate object creation, we can take the creation code and move it into another object that is only
going to be concerned with creating pizzas. Anytime it needs pizza, it asks the pizza factory to make one. By
encapsulating object creation in one class, we have only one place to make modifications when the implementation
changes. Simple object factory can be a static function, however it has the disadvantage that we can not subclass and
change behaviour of the create method.

The Simple Factory isn't actually a Design Pattern, it is more of a programming idiom. Some developers do mistake this
idiom for the Factory Pattern.

A *factory method* handles object creation and encapsulates it in a subclass. This decouples the client code (
e.g. `orderPizza`) in the superclass from the object creation code in the subclass.

```java
public abstract class PizzaStore {
  public Pizza orderPizza(String type) {
    Pizza pizza;
  
    pizza = createPizza(type);
  
    pizza.prepare();
    pizza.bake();
    pizza.cut();
    pizza.box();
  
    return pizza;
  } 
  
  protected abstract Pizza createPizza(String type);
}
```

All factory patterns encapsulate object creation. The Factory Method Pattern encapsulates object creation by letting
subclasses decide what objects to create. For every concrete Creator, there is typically a whole set of products that it
creates. Chicago pizza creators create different types of Chicago-style pizza, New York pizza creators create different
types of New York-style pizza, and so on.

The Factory Method Pattern:

> Defines an interface for creating an object, but lets subclasses decide which class to instantiate. Factory Method
> lets a class defer instantiation to subclasses.

Creator is written to operate on products produced by the factory method. The Creator class is written without knowledge
of the ac dual products that will be created. Only subclasses actually implement the factory method and create products.

When you directly instantiate an object, you are depending on its concrete class. Reducing dependencies to concrete
classes in our code is a "good thing". General Principle - Dependency Inversion Principle:

> Depend upon abstractions. Do not depend upon concrete class.

It suggests that our high-level components should not depend on out low-level components, rather, they should both
depend on abstractions.

The "inversion" in the name Dependency Inversion Principle is there because it inverts they way you typically might
think about your OO design. Low-level components now depend on higher-level abstraction.

Guidelines that can help to avoid OO designs that violate the Dependency Inversion Principle:

- No variable should hold a reference to a concrete class (if you use new, you will be holding a reference, use factory
  instead)
- No class should derive from a concrete class (If you derive, you depend, derive from an abstraction)
- No method should override an implemented method of its base classes (if you override an implemented method, your base
  wasn't really an abstraction to start with)

This is a guideline you should strive for, rather than a rule you should follow all the time. Clearly, every single Java
program ever written violates these guidelines. But if you internalise these guidelines and have them in the back of
your mind when you design, you will know when you are violating the principle, and you will have a good reason for doing
so.

An Abstract Factory gives un an interface for creating a family of products. By writing code that uses this interface,
we decouple our code from actual factory that creates the products. That allows us to implement a variety of factories
that produce products meant for different contexts - such as different regions, operating systems of different look and
feels. Because code is decouples from the actual products, we can substitute different factories to get different
behaviours.

The Abstract Factory Pattern:

> Provides an interface for creating families of related or dependent objects without specifying their concrete classes.

Often the methods of an Abstract Factory are implemented as factory methods.

The Factory Method and The Abstract Factory are both good at decoupling applications from specific implementations.

- Use Abstract Factory whenever you have families of products you need to create, and you need to make sure your clients
  create products that belong together. Abstract Factory creates objects through object composition.
- Use Factory Methods to decouple client code from the concrete classes you need to instantiate, or if you don't know
  ahead of time all the concrete classes you are going to need. Factory Method creates objects through inheritance.

Bullet points:

- All factories encapsulate object creation.
- Simple Factory, while not a bona fide design pattern, is a simple way to decouple your clients from concrete classes.
- Factory Method relies on inheritance: object creation is delegated to subclasses, which implement the factory method
  to create objects.
- Abstract Factory relies on object composition: object creation is implemented in methods exposed in the factory
  interface.
- All factory patterns promote loose coupling by reducing the dependency of your application on concrete classes.
- The intent of Factory Method is to allow a class to defer instantiation to its subclasses.
- The intent of Abstract Factory is to create families of related objects without having to depend on their concrete
  classes.
- The Dependency Inversion Principle guides us to avoid dependencies on concrete types and to strive for abstractions.
- Factories are powerful technique for coding to abstractions, not concrete classes.

## Chapter 5: One-of-a-kind Objects

[The Singleton Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_05_singleton.py)

The ticket to creating one-of-a-kind objects for which there is only one instance, ever. By using singleton you can
ensure that every object in your application is making use of the same global resource. Often used to manage pools of
resources, like connection or thread pools.

_How might things go wrong if more than one instance of ChocolateBoiler is created in an application?_

- My answer: Incorrect state management, because of multiple instances.

The Singleton Pattern:

> Ensures a class has only one instance, and provides a global point of access to it.

Despite using the Singleton Pattern, multithreaded-application can still cause problems - instantiate multiple objects.
In Javan solution for this is to use `synchronized` keyword.

```java
public static synchronized Singleton getInstance() {
  ...
}
```

`synchronized` - forces every thread to wait fot its turn before it can enter the method. That is, no 2 threads may
enter the method at the same time. Synchronization may be expensive, but here it will be used only once
on `uniqueInstance` initialization. After the first time, synchronization is totally unneeded overhead. There are
Java-specific solutions to this overhead (e.g. double-check locking).

The Singleton Pattern violates "_the loose coupling principle_", if you make a change to the Singleton, you will likely
have to make a change to every object connected to it.

A global variable can provide a global access, but not ensure only one instance. Global variables also tend to encourage
developers to pollute the namespace with lots of global references to small objects. Singletons don't encourage this in
the same way, but can be abused nonetheless.

It is possible to implement Singleton as an enum.

Bullet points:

- The Singleton Pattern ensures you have at most one instance of a class in your application.
- The Singleton Pattern also provides a global access point to that interface.
- Java's implementation of the Singleton Pattern makes use of a private constructor, a static method combined with a
  static variable.
- Examine your performance and resource constraints and carefully choose an appropriate Singleton for multithreaded
  applications (we should consider all applications multithreaded).

## Chapter 6: Encapsulating Invocation

[The Command Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_06_command.py)

In this chapter we are going to encapsulate method invocation. By encapsulating method invocation, we can crystallize
pieces of computation so that the object invoking the computation doesn't need to worry about how to do things, it just
uses our crystallized method to get it done.

The Command Pattern allows you to decouple the requester of an action from the object that actually performs the action.
This can be achieved by introducing command objects into the design. A command object encapsulates a request to do
something on a specific object.

Example with a waitress taking orders and passing them to a cook - separation of an object making a request from the
object that receive and execute requests.

- Customer - Client
- Order - Command
- Waitress - Invoker
- Short-Order Cook - Receiver
- takeOrder - setCommand - sets what is supposed to be executed
- orderUp - execute

The Command Pattern:

> Encapsulates a request as an object, thereby letting you parametrize other objects with different requests,
> queue or log requests, and support undoable operations.

A null object is useful when you don't have a meaningful object to return, and yet you want to remove the responsibility
of handling null from the client, e.g. `NoCommand` - surrogate and does nothing when its execute method is called.

Command Pattern can be taken into the next level by using e.g. Java's lambda expressions. Instead of instantiating the
concrete command objects, you can use function objects in their place. This can be done if Command interface has one
abstract method.

In order to support undoable Commands, `Command` interface has to be extended with `undo` method.

`MacroCommand` can be used to execute multiple commands:

```java
MacroCommand partyOnMacro = new MacroCommand({lightOn, stereoOn, tvOn, hottubOn});
```

More uses of the Command Pattern:

- queueing requests - objects implementing the command interface are added to the queue, threads remove commands from
  the queue on by one and call their `execute` method. Once complete, they go back for a new command object. This gives
  us an effective way to limit computation to a fixed number of threads.
- logging requests - semantics of some applications require that we log all actions and be able to recover after a crash
  by re-invoking those actions. The Command Pattern can support these semantics with the addition of two
  methods: `store` and `load`.

Bullet points:

- The Command Pattern decouples an object making a request from the one that knows how to perform it.
- A Command object is at the center of this decoupling and encapsulates a receiver with an action (or set of actions).
- An invoker makes a request of a Command object by calling its execute method, which invokes these actions on the
  receiver.
- Invokers can be parametrized with Commands, even dynamically at runtime.
- Commands may support undo by implementing an undo method that restores the object to its previous state before the
  execute method was last called.
- MacroCommands are a simple extension of the Command Pattern that allow multiple commands to be invoked. Likewise,
  MacroCommands can easily support undo.
- In practice, it is not uncommon for "smart" Command objects to implement the request themselves rather than delegating
  to a receiver.
- Commands may also be used to implement logging and transactional systems.

## Chapter 7: Being Adaptive

[The Adapter Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_07_adapter.py)

[The Facade Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_07_facade.py)

We are going to wrap some objects with a different purpose: to make their interfaces look something they are not. So we
can adapt a design expecting one interface to a class that implements a different interface. Also, we are going to look
at another pattern that wraps objects to simplify their interface.

You will have no trouble understanding what an OO adapter is because the real world is full of them (e.g. power adapter,
The British wall outlet exposes on interface for getting power, the adapter converts one interface into another, the US
laptop expects another interface).

OO adapters play the same role as their real-world counterparts: they take an interface and adapt it to one that a
client ise expecting. For example: you are going to use a new library, but the new vendor designed their interfaces
differently than the last vendor.

The adapter acts as the middleman by receiving requests from the client and converting them into requests that make
sense on the vendor classes.

_If it walks like a duck and quacks like a duck, then it ~~must~~ might be a ~~duck~~ turkey wrapped with a duck
adapter..._

```java
public class TurkeyAdapter implements Duck {
  // take Turkey in the constuctor, implement Duck's method by invoking Turkey's methods.
}
```

How the Client uses the Adapter:

1. The client makes a request to the adapter by calling a method on it using the target interface.
2. The adapter translates the request into one or more calls on the adaptee using the adaptee interface.
3. The client receives the results of the call and never knows there is an adapter doing the translation.

It is possible to create a Two Way Adapter, just implement both interfaces involved, so the adapter can act as an old
interface or a new interface.

The Adapter Pattern:

> Converts the interface of a class into another interface the client expects. Adapter lets classes work together that
> couldn't otherwise because of incompatible interfaces.

Adapter is used to decouple the client from the implemented interface, and if we expect the interface to change over
time, the adapter encapsulates that change that the client doesn't have to be modified each time it needs to operate
against a different interface.

The Adapter Pattern is full of good OO design principles: uses object composition + binds the client to an interface,
not an implementation.

There is second type of adapter - class adapter, this one uses multiple inheritance (Target and Adaptee).

Real-world adapters:

- [Java] Enumerators - The Enumerator interface allows you to step through the elements of a collection without knowing
  the specifics of how they are managed in the collection.
- [Java] Iterators - The more recent Collection classes use an Iterator interface, allows you to iterate through a set
  of items in a collection, and adds the ability to remove items.

When a method in an adapter can not be supported you can throw e.g. `UnsupportedOperationException`.

_Some AC adapters do more than just change the interface - they add other features like surge protection, indicator
lights, and other bells and whistles. If you were going to implement these kinds of features, what pattern would you
use?_

- My answer: The Decorator Pattern

Decorator vs Adapter:

- Decorators allow new behavior to be added to classes without altering existing code.
- Adapter always convert the interface of what they wrap.

Decorators and Adapters seem to look somewhat similar on paper, but clearly are miles apart.

The Facade Pattern alters an interface, but in order to simplify the interface - it hides all the complexity of one or
more classes behind a clean well-lit facade. The Facade Pattern can take a complex subsystem and make it easier to use.

Example home cinema system: instead of turning on popcorn machine, screen and audio system - all you need to do is
call `watchMovie`.

Facades don't encapsulate the subsystem classes, they merely provide a simplified interface to their functionality. The
subsystem classes still remain available. It provides a simplified interface while still exposing the full functionality
of the system to those who may need it.

A facade not only simplifies an interface, it decouples a client from a subsystem of components.

Facades and adapters may wrap multiple classes, but a facade's intent is to simplify, while an adapter's is to convert
the interface to something different.

The Facade Pattern:

> Provides a unified interface to a set of interfaces in a subsystem. Facade defines a higher-level interface that makes
> the subsystem easier to use.

Design principle - Principle of Least Knowledge (The Law of Demeter):

> Talk only to your immediate friends.

This principle guides us to reduce the interactions between objects to just a few close "friends". It means when you are
designing a system, for any object, be careful of the number of classes it interacts with and also how it comes to
interact with those classes.

This principle prevents us from creating designs that have a large number of classes coupled together so that changes in
one part of the system cascade to other parts.

This means, invoke only methods that belong to:

- the object itself
- objects passed in as a parameter to the method
- any object the method creates or instantiates
- any components of the object

_Side note: Principle of Least Knowledge is a better name than The Law of Demeter, because no principle is a law, and
they don't have to be always applied._

The Facade Pattern and the Principle of Least Knowledge - we try to keep subsystems adhering to the Principle of Least
Knowledge as well. If this gets too complex and too many friends are intermingling, we can introduce additional facades
to form layers of subsystems.

Bullet points:

- When you need to use an existing class and its interface is not the one you need, use an adapter.
- When you need to simplify and unify a large interface or complex set of interfaces, use a facade.
- An adapter changes an interface into one a client expects.
- A facade decouples a client from a complex subsystem.
- Implementing an adapter may require little work or a great deal of work depending on the size and complexity of the
  target interface.
- Implementing a facade requires that we compose the facade with its subsystem and use delegation to perform the work of
  the facade.
- There are two forms of the Adapter pattern: object and class adapters. Class adapters require multiple inheritance.
- You can implement more than one facade for a subsystem.
- An adapter wraps an object to add new behaviours and responsibilities, and a facade "wraps" a set of objects to
  simplify.

## Chapter 8: Encapsulating Algorithms

[The Template Method Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_08_template_method.py)

We are going to get down to encapsulating pieces of algorithms so that subclasses can hook themselves right into a
computation any time they want.

We can generalize the recipe and place it in a base class.

```java
public abstract class CaffeineBeverage {
  final void prepareRecipe() {
    // Our template method - it serves as a template for an algorithm.
    boilWater();
    brew();
    pourInCup();
    addCondiments();
  }
  
  abstract void brew();
  abstract void addCondiments();
  
  void boilWater() {}
  void pourInCup() {}
}
```

The Template Method defines the steps of an algorithm and allows subclasses to provide the implementation for one or
more steps.

The Template Method Pattern:

> Defines the skeleton of an algorithm in a method, deferring some steps to subclasses. Template Method lets subclasses
> redefine certain steps of an algorithm without changing the algorithm's structure.

This pattern is all about creating a template for an algorithm. Template - just a method, method that defines and
algorithms as a set of steps. One or more of these steps is defined to be abstract and implemented by a subclass. This
ensures the algorithm's structure stays unchanged.

We can also have concrete methods that do nothing by default - we call them `hooks`. Subclasses are free to override
these but don't have to.

Use abstract classes when subclass MUST provide an implementation of the method. Use hooks when that part of the
algorithm is optional.

The Hollywood Principle:

> Don't call us, we'll call you.

The Hollywood Principle gives us a way to prevent _dependency rot_. We allow low-level components to hook themselves
into a system, but the high-level components determine when they are needed, and how. In other words, the high-level
components give the low-level components the "don't call us, we'll call you" treatment.

Patters using The Hollywood Principle:

- The Template Method Principle
- The Observer Pattern
- The Strategy Pattern
- The Factory Pattern

The Dependency Inversion Principle teaches us to avoid the use of concrete classes and instead work as much as possible
with abstractions. The Hollywood Principle is a technique for building frameworks or components so that lower-level
components can be hooked into the computation, but without creating dependencies between lower and higher level
components.

This pattern is a great design tool for creating frameworks, where the framework controls how something gets done, but
leaves you to specify your own details about what is actually happening at each step of the framework's algorithm.

`sort` methods are in the spirit of The Template Method Pattern, developer has to define `compare` method.~~

Template Method vs Strategy:

- Strategy defines a family of algorithms and make them interchangeable.
- Factory Method defines the outline of an algorithm, and lets subclasses do some work.
- Strategy uses object composition.
- Template Method uses inheritance.

Bullet points:

- A template method defines the steps of an algorithm, deferring to subclasses for the implementation of those steps.
- The Template Method Pattern gives us an important technique for code reuse.
- The template method's abstract may define concrete methods, abstract methods and hooks.
- Abstract methods are implemented by subclasses.
- Hooks are methods that do nothing or default behavior in the abstract class, but may be overridden in the subclass.
- To prevent subclasses form changing the algorithm in the template method, declare the template method as final.
- The Hollywood Principle guides us to put decision making in high-level modules that can decide how and when to call
  low-level modules.
- You will see lots of uses of the Template Method Pattern in real-world code, but (as with any pattern) don't expect it
  all to be designed "by the book".
- The Strategy and Template Method Patterns both encapsulate algorithms, the first by composition and the other by
  inheritance.
- Factory Method is a specialisation of Template Method.

## Chapter 9: Well-Managed Collections

[The Iterator Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_09_iterator.py)

[The Composite Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_09_composite.py)

In this chapter we are going to see how we can allow our clients to iterate through objects without ever getting a peak
at how we store the objects.

Iterator - encapsulates the way we iterate through a collection of objects. The Iterator Pattern relies on an interface
called Iterator.

However, in Java following interface does not have to be defined, because Java has built-in Iterator interface.

```java
public interface Iterator {
  boolean hasNext();
  MenuItem next();
}
```

Once we have this interface, we can implement Iterators for any kind of collection of objects: arrays, lists, hash
maps...

The Iterator Pattern:

> Provides a way to access the elements of an aggregate object sequentially without exposing its underlying
> representation.

The effect of using iterators in the design: once you have a uniform way of accessing the elements of all your aggregate
objects, you can write polymorphic code that works with any of these aggregates.

The other important impact on the design is that the Iterator Pattern takes the responsibility of traversing elements
and gives that responsibility of traversing elements to the iterator object, not aggregate object. This not only keeps
the aggregate interface and implementation simpler, it removes the responsibility for iterator from the aggregate and
keeps the aggregate focused on the things it should be focused on (managing a collection of objects), not on iteration.

The Single Responsibility Principle:

> A class should have only one reason to change.

We want to avoid change in our classes because modifying code provides all sorts of opportunities for problems to creep
in. Having two ways to change increases the probability the class will change in the future, and when it does, it's
going to affect two aspects of your design.

**Cohesion** - is a measure of how closely a class of module supports a single purpose or responsibility.

- High cohesion - designed around a set of related functions (easy to maintain, single responsibility)
- Low cohesion - designed around a set of unrelated functions (difficult to maintain, multiple responsibilities)

There comes a time when we must refactor our code in order for it to grow. To not of so would leave us with rigid,
inflexible code that has no hope of ever sprouting new life.

The Composite Pattern:

> Allows you to compose objects into tree structures to represent part-whole hierarchies. Composite lets clients treat
> individual objects and compositions of objects uniformly.

Part-whole hierarchy - tree of objects that is made of parts (e.g. menus and menu items).

Using a composite structure, we can apply the same operations over both composites and individual objects. In other
words, in most cases we can ignore differences between compositions of objects and individual objects.

A composite contains components. Components come in two flavors: composites and leaf elements. A composite holds a set
of children: those children may be other composites or leaf elements.

The Composite Pattern takes the Single Responsibility Principle and trades it dor transparency - by allowing the
Component interface to contain the child management operations and the leaf operations, a client can treat both
composites and leaves uniformly.

We are guided by design principles, but we always need to observe the effect they have on our designs.

Bullet points:

- An Iterator allows access to an aggregate's elements without exposing its internal structure.
- An Iterator takes the job of iterating over an aggregate and encapsulates it in another object.
- When using an Iterator, we relieve the aggregate of the responsibility of supporting operations for traversing its
  data.
- An Iterator provides a common interface for traversing the items of an aggregate, allowing you to use polymorphism
  when writing code that makes the use of the items of the aggregate.
- The Iterable interface provides a means of getting an iterator and enables Java's enhanced for loop (for-each).
- We should strive for to assign only one responsibility to each class.
- The Composite Pattern allows clients to treat composites and individual objects uniformly.
- A Component is any object in a Composite structure. Components may be other composites or leaves.
- There are many design tradeoffs in implementing Composite. You need to balance transparency and safety with your
  needs.

## Chapter 10: The State of Things

[The State Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_10_state.py)

The Strategy and State Patterns are twins separated at birth. The Strategy Pattern went on to create a wildly successful
business around interchangeable algorithms, while State took the perhaps more noble path by helping objects to control
their behavior by changing their internal state. As different as their paths became, however, underneath you will almost
precisely the same design.

The State Pattern:

> Allows an object to alter its behavior when its internal state changes. The object will appear to change its class.

The pattern encapsulates state into separate classes and delegates to the object representing the current state. What
does it mean for an object to "appear to change its class"? If an object you are using can completely change its
behavior, then it appears to you that the object is actually instantiated from another class. In reality, however, you
know that we are using composition to give the appearance of a class change by simply referencing different state
objects.

Think of the Strategy Pattern as a flexible alternative to subclassing - if you use inheritance to define the behavior
of a class, then you are stuck with that behavior even if you need to change it. With Strategy, you can change the
behavior by composing with a different object.

Think of the State Pattern as an alternative to putting lots of conditionals in your context - by encapsulating the
behaviors within state objects, you can simply change the state object in context to change its behavior.

Bullet points:

- The State Pattern allows an object to have many behaviors that are based on its internal state.
- Unlike a procedural state machine, the State Pattern represents each state as a full-blown class.
- The Context gets its behavior by delegating to the current state object it is composed with.
- By encapsulating each state into a class, we localize any changes that will need to be made.
- The State and Strategy Patterns have the same class diagram, but they differ in intent.
- The Strategy Pattern typically configures Context classes with a behavior or algorithm.
- The State Pattern allows a Context to change its behavior as the state of the Context changes.
- State transitions can be controlled by the State classes or by the Context classes.
- Using the State Pattern will typically result in a greater number of classes in your design.
- State classes may be shared among Context instances.

## Chapter 11: Controlling Object Access

[The Virtual Proxy Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_11_virtual_proxy.py)

Proxies control and manage access. Proxies have been known to haul entire method calls over the internet for their
proxied objects - they have been also known to patiently stand in for some pretty lazy objects.

Proxy pretends it is the real object, but it is really communicating over the net to the real object. A remote proxy
acts as a local representative to a remote object. Remote object is an object that lives in the heap of different JVM.
Local representative - it is an object that you call local methods on and have them forwarded on to the remote object.

RMI builds the client and the service helper objects. The nice thing about RMI is that you don't have to write any of
the networking or I/O code yourself. Networking and I/O methods are risky and can fail. The client dopes have to
acknowledge the risk.

RMI nomenclature: client helper is a "stub" and the service helper is a "skeleton".

The Proxy Pattern:

> Provides a surrogate of placeholder for another object to control access to it.

Use the Proxy Pattern to create a representative object that controls access to another object, which may be remote,
expensive to create, or in need of securing.

The Proxy Pattern can manifest itself in many forms, e.g. the Virtual Proxy.

Virtual Proxy - acts as a representative for an object that bay be expensive to create. The Virtual Proxy often defers
the creation of the object until it is needed. The Virtual Proxy also acts as a surrogate for the object before and
while it is being created. After that, the proxy delegates requests to the RealSubject.

ImageProxy for application displaying images:

1. ImageProxy first creates an ImageIcon and starts loading it from a network URL.
2. While the bytes of the image are being retrieved, ImageProxy displays "Loading album cover, please wait..."
3. When the image is fully loaded, ImageProxy delegates all method calls to the image icon
4. If the user requests a new image, we will create a new proxy and start the process over.

There are a lot of variants of the Proxy Pattern in the real world. What they all have in common is that they intercept
a method invocation that the client is making to the subject. This level of indirection allows us to do many things,
including dispatching requests to a remote subject, providing a representative for an expensive object as it is created
or providing some level of protection that can determine which clients should be calling which methods.

Protection Proxy - a proxy that controls access to an object based on access rights. For example: `Employee` object - a
Protection Proxy might allow the employee to call certain methods on the object, a manager to call additional methods (
like `setSalary`), and an HR employee to call any method on the object.

Additional proxies:

- Firewall Proxy - controls access to a set of network resources, protecting the subject from "bad" clients.
- Smart Reference Proxy - provides additional actions whenever a subject is referenced, such as counting the number of
  references to an object.
- Caching Proxy - provides temporary storage for results of operations that are expensive. It can also allow multiple
  clients to share the results to reduce computation or network latency.
- Synchronization Proxy - provides safe access to a subject from multiple threads.
- Complexity Hiding Proxy - hides the complexity of and controls access to a complex set of classes. This is sometimes
  called the Facade Proxy for obvious reasons. The Complexity Hiding Proxy differs from the Facade Pattern in that the
  proxy controls access, while the Facade Pattern just provides an alternative interface.
- Copy-On-Write Proxy - controls the copying of an object by deferring the copying of an object until it is required by
  a client. This is a variant of the Virtual Proxy.

Bullet points:

- The Proxy Pattern provides a representative for another object in order to control the client's access to it.
- A Remote Proxy manages interaction between a client and a remote object.
- A Virtual Proxy controls access to the methods of an object based on the caller.
- A Protection Proxy controls access to the methods of an object based on the caller.
- Many other variants of the Proxy Pattern exist, including caching proxies, firewall proxies, copy-on-write proxies,
  and so on.
- Proxy is structurally similar to Decorator, but the two patterns differ in their purpose.
- The Decorator Pattern adds behavior to an object, while Proxy controls access.
- Java's built-in support for Proxy can build a dynamic proxy class on demand and dispatch all calls on it to a handler
  of your choosing.
- Like any wrapper, proxies will increase the number of classes and objects in your designs.

## Chapter 12: Patterns of Patterns

Some of the most powerful OO designs use several patterns together. Compound patterns - set of patterns that wort
together in a design that can be applied over many problems.

Patterns are often used together and combined within the same design solution. A compound pattern combines two or more
patterns into a solution that solves a recurring or general problem.

It is possible to rework Duck Simulator from the first chapter using 6 patterns. In fact, you never actually want to
approach a design like this. You only want to apply patterns when and where they make sense. **You never want to start
out with the intention of using patterns just for the sake of it**.

MVC - it is just a few patterns put together. Music players underneath use MVC.

View - gives you a presentation of the model. The view usually gets the state and data it needs to display directly from
the model.

Controller - takes user input and figures out what it means to the model.

Model - the model holds all the data, state and application logic. The model is oblivious to the view and controller,
although it provides an interface to manipulate and retrieve its state, and it can send notifications of state changes
to observers.

You are the user - you interact with the view. When you do something to the view, then the view tells the controller
what you did. It is controller's job to handle that. The controller asks the model to change its state. If you click a
button it is the controller's job to figure out what that means and how the model should be manipulated based on that
action. The controller may also ask the view to change. The model notifies the view when its state has changed. The view
asks the model for state.

MVW is made of:

- Strategy - The view and controller implement the classic Strategy Pattern - the view is configured with a strategy,
  the controller provides strategy.
- Composite - the display consists of a nested set of windows, panels, buttons, text labels and so on. Each display is a
  composite (like a window) or a leaf (like a button). When the controller tells the view to update, it only has to tell
  the top view component, and Composite takes care of the rest.
- Observer - The model implements the Observer Pattern to keep interested objects updated when state changes occur.

Typically, you need one controller per view at runtime; however the same controller class can easily manage many views.

MVC has been adopted to many web frameworks:

- thin client - the model and most of the view and the controller all reside in the server, with the browser providing a
  way to display the view, and to get input from the browser to the controller.
- single page application - almost all the model, view and controller reside on the client side.

MVC frameworks: Django, AngularJS, EmberJS, ...

Bullet points:

- The Model View Controller Pattern is a compound pattern consisting of the Observer, Strategy and Composite Patterns.
- The model makes use of the Observer Pattern so that it can keep observers updated yet stayed decoupled from them.
- The controller is the Strategy for the view. The view can use different implementations of the controller to get
  different behavior.
- The view uses the Composite Pattern to implement the user interface, which usually consists of nested components like
  panels, frames and buttons.
- These patterns work together to decouple the three players in the MVC model, which keep designs clear and flexible.
- The Adapter Pattern can be used to adapt a new model to an existing view and controller.
- MVC has been adapted to the web.
- There are many web MVC frameworks with various adaptations of the MVC pattern to fit the client/server application
  structure.

## Chapter 13: Patterns in the Real World

A Pattern:

> is a solution to a problem in a context.

- The context is the situation in which the pattern applies. This should be recurring situation.
- The problem refers to the goal you are trying to achieve in this context, but it also refers to any constraints that
  occur in the context.
- The solution is what you are after: a general design that anyone can apply that resolves the goal and set of
  constraints.

Like design principles, patterns are not meant to be laws or rules - they are guidelines that you can alter to fit your
needs. A lot of real-world examples don't fit the classic pattern designs. When you adapt patterns, it never hurts to
document how your pattern differs from the classic design - that way other developers can quickly recognize the patterns
you are using.

The Design Pattern definition tells us that the problem consists of a goal and set of constraints. Only when solution
balances both sides of the _force_ (goal - constraints) do we have a useful pattern.

Design pattern should have: a name, a template, an intent, motivation, applicability, code example, use cases, how
pattern relates to other patterns, consequences.

Design patterns are discovered, not created. Anyone can discover a Design Pattern, however it is not easy and doesn't
happen quickly. You don't have a pattern until others have used it and found it to work. You don't have a pattern until
it passes the Rule of Thee - a pattern can be called a pattern only if it has been applied in a real-world solution at
least 3 times.

Creational Patterns - involve object instantiation and all provide a way to decouple a client from the objects it needs
to instantiate: Singleton, Abstract Factory, Factory Method.

Behavioral Patterns - concerned with how classes and objects interact and distribute responsibility: Template Method,
Iterator, Command, State, Observer, Strategy.

Structural Patterns - let you compose classes or objects into larger structures: Proxy, Facade, Composite, Adapter,
Decorator.

Patterns are often classified by a second attribute - whether the pattern deals with classes or objects: Class
Patterns (Template Method, Factory Method, Adapter) and Object Patterns (Composite, Decorator, State, Singleton, ...).

Categorisation is confusing because many patterns fit into more than one category. Categories give us a way to think
about the wa groups of patterns relate and how patterns within a group relate to one another. They also give us a way to
extrapolate to new patterns.

Keep it simple - KISS - your goal should be simplicity, not "how can I apply a pattern to this problem". Don't feel like
you aren't a sophisticated developer if you don't use a pattern to solve a problem.

Patterns aren't a magic bullet. You can't plug one in, compile and then take and early lunch. To use patterns, you need
to think through the consequences for the rest of your design.

Refactoring is a great time to reexamine your design to see if it may be better structured with patterns.

Don't be afraid to remove a Design Pattern from your design. Remove, when a simpler solution without the pattern would
be better.

_YAGNI_: Resist the temptation of creating architectures that are ready to take on change from any direction. If the
reason for adding a pattern is only hypothetical, don't add the pattern: it will only add complexity to your system, and
you might never need it. Overuse of design patterns can lead to code that is downright overengineered. Always go with
the simplest solution that does the job and introduce patterns where the need emerges.

The Beginner uses patterns everywhere. The Intermediate starts to see where patterns are needed and where they aren't.
The Zen mind is able to see patterns where they fit naturally.

Anti-pattern:

> Tells you how to go from a problem to a BAD solution.

An anti-pattern tells you why bad solution is attractive, tells why that solution in the long term is bad and suggests
other applicable patterns that may provide good solutions.

An anti-pattern always looks like a good solution, but then turns out to be a bad solution when it is applied. BY
documenting anti-patterns we help others to recognize bad solutions before they implement them. Like many patterns,
there are many types of anti-patterns including development, OO, organizational, and domain specific anti-patterns.

Bullet points:

- Let Design Patterns emerge in your designs, don't force them in just for the sake of using a pattern.
- Design Patterns aren't set in stone - adapt and tweak them to meet your needs.
- Always use the simplest solution that meets your needs, even if it doesn't include a pattern.
- Study Design Patterns catalogs to familiarize yourself with patterns and the relationship among them.
- Pattern classifications provide groupings for patterns. When they help, use them.
- You need to be committed to be a patterns' writer - it takes time and patience, and you have to be willing to do lots
  of refinement.
- Remember, most patterns you encounter will be adaptations of existing patterns, not new patterns.
- Build your team's shared vocabulary. This is one of the most powerful benefits of using patterns.
- Like any community, the patterns community has its own lingo. Don't let that hold you back. Having read this book, you
  know most of it.

## Chapter 14: Leftover Patterns

**Bridge**

> Use the Bridge Pattern to vary not only your implementations, but also your abstractions.

Benefits:

+ Decouples an implementation so that it is bout bound permanently to an interface.
+ Abstraction and implementation can be extended independently.
+ Changes to the concrete abstraction classes don't affect the client.

Bridge Uses and Drawbacks:

- Useful in graphics and windowing systems that need to run over multiple platforms.
- Useful any time you need to vary an interface and an implementation in different ways.
- Increases complexity.

**Builder**

> Use the Builder Pattern to encapsulate the construction of a product and allow it to be constructed in steps.

Benefits:

+ Encapsulates the way a complex object is constructed.
+ Allows objects to be constructed in a multistep and varying process (as opposed to one-step factories).
+ Hides the internal representation of the product from the client.
+ Product implementations can be swapped in and out because the client only sees an abstract interface.

Builder Uses and Drawbacks:

- Often used for building composite structures.
- Constructing objects requires more domain knowledge of the client than when using a Factory.

**Chain of Responsibility**

> Use the Chain of Responsibility Pattern when you want to give more than one object a chance to handle a request.

Benefits:

+ Decouples the sender of the request and its receivers.
+ Simplifies your object because it doesn't have to know the chain's structure and keep direct references to its
  members.
+ Allows you to add or remove responsibilities dynamically by changing the members or order of the chain.

Chain of Responsibility Uses and Drawbacks:

- Commonly used in Windows systems to handle events like mouse clicks and keyboard events.
- Execution of the request isn't guaranteed - it may fall to the end of the chain if no object handles it.
- Can be hard to observe and debug at runtime.

**Flyweight**

> Use the Flyweight Pattern when one instance of a class can be used to provide many virtual instances.

Benefits:

+ Reduces the number of objects instances at runtime, saving memory.
+ Centralizes state for many "virtual" objects into a single location.

Flyweight Uses and Drawbacks:

- The Flyweight is used when a class has many instances, and they all can be controlled identically.
- A drawback, of the Flyweight Pattern is once you have implemented it, single, logical instances of the class will not
  be able to behave independently from the other instances.

**Interpreter**

> Use the Interpreter Pattern to build an interpreter for a language.

When you need to implement a simple language, the Interpreter Pattern defines a class based representations for its
grammar along with an interpreter to interpret its sentences. To represent the language, you use a class to represent
each rule in the language.

Benefits:

+ Representing each grammar rule in a class makes the language easy to implement.
+ Because the grammar is represented by classes, you can easily change or extend the language.
+ By adding methods to the class structure, you can add new behaviors beyond interpretation, like pretty printing and
  more sophisticated program validation.

Interpreter Uses and Drawbacks:

- Use Interpreter when you need to implement a simple language.
- Appropriate when you have a simple grammar and simplicity is more important than efficiency.
- Used for scripting and programming languages.
- This pattern can become cumbersome when the number of grammar rules is large. In these cases a parser/compiler
  generator may be more appropriate.

**Mediator**

> Use the Mediator Pattern to centralize complex communications and control between related objects.

Benefits:

+ Increases the reusability of the objects supported by the Mediator by decoupling them from the system.
+ Simplifies maintenance of the system by centralizing control logic,
+ Simplifies and reduces the variety of messages sent between objects in the system.

Mediator Uses and Drawbacks:

- The Mediator is commonly used to coordinate related GUI components.
- A drawback so the Mediator Pattern is that without proper design, the Mediator object itself can become overly
  complex.

**Memento**

> Use the Memento Pattern when you need to be able to return an object to one of ots previous states: for instance, if
> your user requests an "undo"

The Memento has 2 goals: Saving the important state of a system's key object. Maintaining the key object's
encapsulation.

Keeping the Single Responsibility Principle in mind, it is also a good idea to keep the state that you are saving
separate from the key object. This separate object that holds the state is known as the Memento object.

Benefits:

+ Keeping the saved state external from the key object helps to maintain cohesion.
+ Keeps the key object's data encapsulated.
+ Provides easy-to-implement recovery capability.

Memento Uses and Drawbacks:

- The Memento is used to save state.
- A drawback to using Memento is that saving and restoring state can be time-consuming.
- In Java systems, consider using Serialization to save a system's state.

**Prototype**

> Use the Prototype Pattern when creating an instance of a given class is either expensive or complicated.

The Prototype Pattern allows you to make new instances by copying existing instances.

Benefits:

+ Hides the complexities of making new instances from the client.
+ Provides the option for the client to generate objects whose type is not known.
+ In some circumstances, copying an object can be more efficient than creating a new object.

Prototype Uses and Drawbacks:

- Prototype should be considered wen a system must create new objects of many types in a complex class hierarchy.
- A drawback to using Prototype is that making a copy of an object can sometimes be complicated.

**Visitor**

> Use the Visitor Pattern when you want to add capabilities to a composite of objects and encapsulation is not
> important.

The Visitor works hand in hand with a Traverser. The Traverser knows how to navigate to all the objects in a Composite.
The Traverser guides the Visitor through the Composite so that the Visitor can collect state as it goes. Once state has
been gathered, the Client can have the Visitor pattern perform various operations on the state.

Benefits:

+ Allows you to add operations to a Composite.
+ Adding new operations is relatively easy.
+ The code for operations performed by the Visitor is centralized.

Visitor Drawbacks:

- The Composite classes' encapsulation is broken when the Visitor is used.
- Because the traversal function is involved, changes to the Composite structure are more difficult.


================================================
FILE: books/kubernetes-book.md
================================================
[go back](https://github.com/pkardas/learning)

# The Kubernetes Book

Book by Nigel Poulton, https://github.com/nigelpoulton/TheK8sBook

- [1: Kubernetes primer](#1-kubernetes-primer)
- [2: Kubernetes principles of operation](#2-kubernetes-principles-of-operation)
- [3: Getting Kubernetes](#3-getting-kubernetes)
- [4: Working with Pods](#4-working-with-pods)
- [5: Virtual clusters with Namespaces](#5-virtual-clusters-with-namespaces)
- [6: Kubernetes Deployments](#6-kubernetes-deployments)
- [7: Kubernetes Services](#7-kubernetes-services)
- [8: Ingress](#8-ingress)
- [9: Service discovery deep dive](#9-service-discovery-deep-dive)
- [10: Kubernetes storage](#10-kubernetes-storage)
- [11: ConfigMaps and Secrets](#11-configmaps-and-secrets)
- [12: StatefulSets](#12-statefulsets)
- [13: API security and RBAC](#13-api-security-and-rbac)
- [14: The Kubernetes API](#14-the-kubernetes-api)
- [15: Threat modeling Kubernetes](#15-threat-modeling-kubernetes)

## 1: Kubernetes primer

Kubernetes - an application orchestrator, it orchestrates containerized cloud-native microservices apps.

- orchestrator - a system that deploys and manages applications (dynamically respond to changes - scale up/down,
  self-heal, perform zero-downtime rolling updates)
- containerized app - app that runs in a container - 1980-1990 physical servers era, 2000-2010 virtual machines and
  virtualization era, now cloud-native era
- cloud-native app - designed to meet cloud-like demands of auto-scaling, self-healing, rolling updates, rollbacks and
  more, cloud-native is about the way applications behave and react to events
- microservices app - built from lots of small, specialised, independent parts that work together to form a meaningful
  application

Kubernetes enables 2 things Google and the rest of the industry needs:

1. It abstracts underlying infrastructure such as AWS
2. It makes it easy to move applications on and off clouds

Kubernetes vs Docker Swarm - long story short, Kubernetes won. Docker Swarm is still under active development and is
popular with small companies that need simple alternative to Kubernetes.

Kubernetes as the operating system of the cloud:

- you install a traditional OS on a server, and it abstracts server resources and schedules application processes
- you install Kubernetes on a cloud, and it abstracts cloud resources and schedules application microservices

At a high level, a cloud/datacenter is a pool of compute, network and storage resources. Kubernetes abstracts them.
Servers are no longer pets, they are cattle.

Kubernetes is like a courier service - you package the app as a container, give it a Kubernetes manifest, and let
Kubernetes take care of deploying it and keeping it running.

## 2: Kubernetes principles of operation

Kubernetes is 2 things:

- a cluster to run applications on
    - like any cluster - bunch od machines to host apps
    - these machines are called "nodes" (physical servers, VMs, cloud instances, Raspberry PIs, ...)
    - cluster is made of:
        - control plane (the brains) - exposes the API, has a scheduler for assigning work, records the state of the
          cluster and apps
        - worker nodes (the muscle) - where user apps run
- an orchestrator of cloud-native microservices apps
    - a system that takes care of deploying and managing apps

Simple process to run apps on a Kubernetes cluster:

1. Design and write the application as small independent microservices
2. Package each microservice as its own container
3. Wrap each container in a Kubernetes Pod
4. Deploy Pods to the cluster via higher-level controllers such as Deployments, DaemonSets, StatefulSets, CronJobs, ...

The Control Plane - runs a collection of system services that make up the control plane of the cluster (Master, Heads,
Head nodes). Production envs should have multiple control plane nodes - 3 or 5 recommended, and should be spread across
availability zones. Different services making up the control plane:

- The API server - the Grand Central station of Kubernetes, all communication, between all components, must go through
  the API server. All roads lead to the API Server.
- The Cluster Store - the only stateful part of the Control Plane, stores the configuration and the state. Based
  on `etcd` (a popular distributed database).
- The Controller Manager and Controllers - all the background controllers that monitor cluster components amd respond to
  events.
- The Scheduler - watches the API server for new work tasks and assigns them to appropriate healthy worker nodes. Only
  responsible for picking the nodes to run tasks, it isn't responsible for running them.
- The Cloud Controller Manager - its job is to facilitate integrations with cloud services, such as instances,
  load-balancers, and storage.

Worker nodes - are where user applications run. At a high-level they do 3 things:

1. Watch the API server for new work assignments
2. Execute work assignments
3. Report back to the control plane (via the API server)

3 major components:

1. Kubelet - main Kubernetes agent and runs on every worker node. Watches the API server for new work tasks. Executes
   the task and maintains reporting channel back to the control plane.
2. Container runtime - kubelet needs it to perform container-related tasks - things like pulling images and starting and
   stopping containers.
3. Kube-proxy - runs on every node and is responsible for local cluster networking.

In order to run on a Kubernetes cluster an application needs to:

1. Be packaged as a container
2. Be wrapped in a Pod
3. Be deployed via a declarative manifest file

The declarative model:

- declare the desired state of an application microservice in a manifest file
    - desired state - image, how many replicas, which network ports, how to perform updates
- post it to the API server
    - using `kubectl` CLI (it uses a HTTP request)
- Kubernetes stores it in the cluster store as the application's desired state
- Kubernetes implements the desired state on the cluster
- A controller makes sure the observed state of the application doesn't vary from the desired state
    - background reconciliation loops that constantly monitor the state of the cluster, if desired state != observed
      state - Kubernetes performs the necessary tasks

Kubernetes Pod - a wrapper that allows a container to run on a Kubernetes cluster. Atomic unit of scheduling. VMware has
virtual machines, Docker has containers, Kubernetes has Pods. In Kubernetes, every container must run inside a Pod. "
Pod" comes from "a pod of whales" (group of whales is called "a pod of whales"). "Pod" and "container" are often used
interchangeably, however it is possible (in some advanced use-cases) to run multiple containers in a single Pod.

Pods don't run applications - applications always run in containers, the Pod is just a sandbox to run one or more
containers. Pods are also the minimum unit of scheduling in Kubernetes. If you need to scale an app, you add or remove
Pods. You do not scale by adding more containers to existing Pods.

A pod is only ready for service when all its containers are up and running. A single Pod can only be scheduled to a
single node.

Pods are immutable. Whenever we talk about updating Pods, we mean - delete and replace it with a new one. Pods are
unreliable.

Example controller: Deployments - a high-level Kubernetes object that wraps around a Pod and adds features such as
self-healing, scaling, zero-downtime rollouts, and versioned rollbacks.

Services - provide reliable networking for a set of Pods. Services have a stable DNS name, IP address and name, they
load-balance traffic across a dynamic set of Pods. As Pods come and go, the Service observes this, automatically updates
itself, and continues to provide that stable networking endpoint.

Service - a stable network abstraction that provides TCP/UPD load-balancing across a dynamic set of Pods.

## 3: Getting Kubernetes

Hosted Kubernetes: AWS Elastic Kubernetes Service, Google Kubernetes Engine, Azure Kubernetes Service. Managing your own
Kubernetes cluster isn't a good use of time and other resources. However, it is easy to rack up large bills if you
forget to turn off infrastructure when not in use.

The hardest way to get a Kubernetes cluster is to build it yourself.

Play with Kubernetes - quick and simple way to get your hands on a development Kubernetes cluster. However, it is time
limited and sometimes suffers from capacity and performance issues. Link: https://labs.play-with-k8s.com

Docker Desktop - offers a single-node Kubernetes cluster that you can develop and test with.

`kubectl` is the main Kubernetes command-line tool. At a high-level, `kubectl` converts user-friendly commands into HTTP
REST requests with JSON content required by the Kubernetes API server.

```shell
kubectl get nodes
```

```shell
kubectl config current-context
```

```shell
kubectl config use-context docker-desktop
```

## 4: Working with Pods

Controllers - infuse Pods with super-powers such as self-healing, scaling, rollouts and rollbacks. Every Controller bas
a PodTemplate defining the Pods it deploys and manages. You rarely interact with Pods directly.

Pod - the atomic unit of scheduling in Kubernetes. Apps deployed to Kubernetes always run inside Pods. If you deploy an
app, you deploy it in a Pod. If you terminate an app, you terminate its Pod. If you scale your app up/down, you
add/remove Pods.

Kubernetes doesn't allow containers to run directly on a cluster, they always have to be wrapped in a Pod.

1. Pods augment containers

- labels - group Pods and associate them with others
- annotations - add experimental features and integrations with 3rd-party tools
- probes - test the health and status of Pods and the apps they run, this enables advanced scheduling, updates, and
  more.
- affinity and anti-affinity rules - control over where in the cluster Pods are allowed to run
- termination controls - gracefully terminate Pods and the apps they run
- security policies - enforce security features
- resource requests and limits - min. and max. values for CPU, memory, IO, ...

Despite bringing so many features, Pods are super-lightweight and add very little overhead.

```shell
kubectl explain pods --recursive
```

```shell
kubectl explain pod.spec.restartPolicy
```

2. Pods assist in scheduling

Every container in a Pods is guaranteed to be scheduled to the same worker node.

3. Pods enable resource sharing

Pods provide a shared execution environment for one or more containers (filesystem, network stack, memory, volumes). So
if a Pod has 2 containers, both containers share the Pod's IP address and can access ony of the Pod's volumes to share
data.

There are 2 ways to deploy a Pod:

- directly via a Pod manifest
    - called "Static Pods", no super-powers like self-healing, scaling, or rolling updates
- indirectly via a controller
    - have all the benefits of being monitored by a highly-available controller running on the control-plane

Pets vs Cattle paradigm - Pods are cattle, when they die, they get replaced by another. The old one is gone, and a shiny
new one (with the same config, but a different IP and UID) magically appears and takes its place.

This is why applications should always store state and data outside the Pod. It is also why you should not rely on
individual Pods - they are ephemeral, here today, gone tomorrow.

Deploying Pods:

1. Define it in a YAML manifest file
2. Post it to the API server
3. The API server authenticates and authorizes the request
4. The configuration (YAML) is validated
5. The scheduler deploys the Pod to a healthy worker node with enough available resources

If you are using Docker or containerd as your container runtime, a Pod is actually a special type of container - a pause
container. This means containers running inside of Pods are really containers running inside containers.

The Pod Network is flat, meaning every Pod can talk directly to every other Pod without the need for complex routing and
port mappings. You should use Kubernetes Network Policies.

Pod deployment is an atomic operation - all-or-nothing - deployment either succeeds or fails. You will never have a
scenario where a partially deployed Pod is servicing requests.

Pod lifecycle: pending -> running (long-lived Pod) | succeeded (short-lived Pod)

- short-lived - batch jobs, designed to only run until a task completes
- long-lived - web-servers, remain in the running phase indefinitely, if container fail, the controller may attempt to
  restart them

Pods are immutable objects. You can't modify them after they are deployed. You always replace a Pod with a new one (in
case of a failure or update).

If you need to scale an app, you add or remove Pods (horizontal scaling). You never scale an app by adding more of the
same containers to a Pod. Multi-container Pods are only for co-scheduling and co-locating containers that need tight
coupling.

Co-locating multiple containers in the same Pod allows containers to be designed with a single responsibility but
co-operate closely with others.

Kubernetes multi-container Pod patterns:

- Sidecar pattern - (most popular) the job of a sidecar is to augment of perform a secondary task for the main
  application container
- Adapter pattern - variation of the sidecar pattern where the helper container takes non-standardized output from the
  main container and rejigs it into a format required by an external system
- Ambassador pattern - variation of the sidecar pattern where the helper container brokers connectivity to an external
  system, ambassador containers interface with external systems on behalf of the main app container
- Init pattern - runs a special init container that is guaranteed to start and complete before your main app container,
  it is also guaranteed to run only once

```shell
kubectl get pods
```

Get pods info with additional info:

```shell
kubectl get pods -o wide
```

Get pod info, a full copy of the Pod from the cluster:

```shell
kubectl get pods -o yaml
```

Get even more info; spec - desired state, status - observed state:

```shell
kubectl get pods hello-pod -o yaml
```

Pod manifest files:

- kind - tells the Kubernetes the type of object being defined
- apiVersion - defines the schema version to use when creating the object
- metadata - names, labels, annotations, and a Namespace
- spec - define the containers the Pod will run

```shell
kubectl apply -f pod.yml
```

`kubectl describe` - a nicely formatted multi-line overview of an object: You can add the `--watch` flag to the command
to monitor it and see when the status changes to _Running_.

```shell
kubectl describe pods hello-pod
```

You can see ordering and names of containers using this command.

`kubectl logs` - like other Pod related commands, if you don't specify `--container`, it executes against the first
container in the pod:

```shell
kubectl logs hello-pod
```

```shell
kubectl logs hello-pod --container hello-ctr
```

`kubectl exec` - execute commands inside a running Pod

```shell
kubectl exec hello-pod -- pwd
```

Get shell access:

```shell
kubectl exec -it hello-pod hello-pod -- sh
```

`-it` flag makes the session interactive and connects STDIN and STDOUT on your terminal to STD and STDOUT inside the
first container in the Pod.

Pod hostname - every container in a Pod inherits its hostname from the name of the Pod (`metadata.name`). With this in
mind, you should always set Pod names as valid DNS names (a-z, 0-9, +, -, .).

`spec.initCointainers` block defines one or more containers that Kubernetes guarantees will run and complete before main
app container starts.

```shell
kubectl delete pod git-sync
```

## 5: Virtual clusters with Namespaces

Namespaces are a native way to divide a single Kubernetes cluster into multiple virtual clusters.

Namespaces partition a Kubernetes cluster and are designed as an easy way to apply quotas and policies to groups of
objects.

See all Kubernetes API resources supported in your cluster:

```shell
kubectl api-resources
```

Namespaces are a good way of sharing a single cluster among different departments and environments. For example, a
single cluster might have the following namespaces: dev, test, qa. Each one can have its own set of users and
permissions, as well as unique resource quotas.

Namespaces are not good for isolating hostile workloads. A compromised container or Pod in one Namespace can wreak havoc
in other Namespaces. For example, you shouldn't place competitors such as Pepsi and Coke, in separate Namespaces on the
same shared cluster.

If you need strong workload isolation, the current method is to use multiple clusters. There are some attempts to do
something different, but the safest and most common way of isolating workloads is putting them on their own clusters.

Every Kubernetes cluster has a set of pre-created Namespaces (virtual clusters):

```shell
kubectl get namespaces
```

- `default` is where newly created objects go if you don't specify a Namespace
- `kube-system` is where DNS, the metrics server, and other control plane components run
- `kube-public` is for objects that need to be readable by anyone
- `kube-node-lease` is used for node heartbeat and managing node leases

```shell
kubectl describe namespaces default
```

List service objects in a selected namespace:

```shell
kubectl get svc --namespace kube-system
```

```shell
kubectl get svc --all-namespaces
```

Create a new Namespace, Pods don't create a Namespace automatically, a Namespace must be created first:

```shell
kubectl create ns kydra
```

Switch between Namespaces:

```shell
kubens shield
```

There are 2 ways to deploy objects to a specific Namespace:

- imperatively - requires you to add the `-n` or `--namespace` flag to commands
- declaratively - requires you to specify the Namespace in the YAML

Delete Pods:

```shell
kubectl detele -f shield.app.yml
```

Delete Namespace:

```shell
kubectl delete ns shield
```

## 6: Kubernetes Deployments

Use Deployments to bring cloud-native features such as self-healing, scaling, rolling updates, and versioned rollbacks
to stateless apps on Kubernetes.

Kubernetes offers several controllers that augment Pods with important capabilities. The Deployment controller is
designed for stateless apps.

The Deployment spec is a declarative YAML object where you describe the desired state of a stateless app. The controller
element operates as a backgrounds loop on the control plane, reconciling observed state with desired state.

You start with a stateless application, package it as a container, then define it in a Pod template. At this point you
have a static Pod - it does not self-heal, autoscale or is easy to update. That is why you almost always wrap them in a
Deployment object.

A Deployment object only manages a single Pod template.

Deployments rely heavily on a ReplicaSet. Replica Sets manage Pods and bring self-healing and scaling. Deployments
manage ReplicaSet and add rollouts and rollbacks. It is not recommended to manage ReplicaSets directly. Think of
Deployments as managing ReplicaSets, and ReplicaSets as managing Pods.

Deployments:

- if Pods managed by a Deployment fail, they will be replaced (self-healing)
- if Pods managed by a Deployment see increased or decreased load, they can be scaled

3 concepts fundamental to everything about Kubernetes:

- desired state (what you want)
- observed state (what you have)
- reconciliation (if desired state != observed state, a process of reconciliation attempts to bring observed state into
  sync with desired state)

Declarative model is a method of telling Kubernetes your desired state, while avoiding the detail of how to implement
it. You leave the _how_ up to Kubernetes.

Zero-downtime rolling-updates of stateless apps are what Deployments are about. They require a couple of things from
your microservice applications in order to work properly:

- loose coupling via APIs
- backwards and forwards compatibility

Each Deployment describes all the following:

- how many Pod replicas
- what images to use for the Pod's containers
- what network ports to expose
- details about how to perform rolling updates

Deploying a new version: update the dame Deployment YAML file with the new image version and re-post it to the API
server.

Rollback: you wind one of the old ReplicaSets up while you wind the current one down.

Kubernetes gives you fine-grained control over how rollouts and rollbacks proceed - insert delays, control the pace and
cadence of releases, you can probe the health and status of updated replicas.

YAML components:

- `apiVersion: apps/v1` - Deployments available in the apps/v1 subgroup
- `kind: Deployment` - Deployment object
- `metadata.name: hello-deploy` - a valid DNS name
- `spec` - anything nested below `spec` relates to the Deployment
- `spec.templates` - the Pod template Deployments uses to stamp out Pod replicas
- `spec.replicas` - how many Pod replicas the Deployment should create and manage
- `spec.selector` - a list of labels that Pods must have in order for Deployments to manage them. This tells Kubernetes
  which Pods to terminate and replace when performing the rollout.
- `spec.revisionHistoryLimit` - how many older versions/ReplicaSets to keep
- `spec.progressDeadlineSeconds` - tells Kubernetes how long to wait during a rollout for each new replica to come
  online
- `spec.strategy` - tells the Deployment controller how to upgrade the Pods when a rollout occurs
    - update using the Rolling Update strategy
    - never have more than one Pod below desired state (`maxUnavailable: 1`) - you will never have less than 9 replicas
      during the update process
    - never have more than one Pod above desired state (`maxSurge: 1`) - never have more than qq replicas during the
      update process
    - net result - update two Pods at a time, the delta between 9 and 11 is 2

```yaml
spec:
  replicas: 10
  selector:
    matchLabels:
      app: hello-world
  revisionHistoryLimit: 5
  progressDeadlineSeconds: 300
  minReadySeconds: 10
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
      maxSurge: 1
  template:
    metadata:
      labels:
        app: hello-world
    spec:
      containers:
        - name: hello-pod
          image: nigelpoulton/k8sbook:2.0
          ports:
            - containerPort: 8080
```

Deploy to the cluster:

```shell
kubectl apply -f deploy.yml
```

```shell
kubectl get deploy hello-deploy
```

```shell
kubectl describe deploy hello-deploy
```

```shell
kubectl get replicaset
```

```shell
kubectl describe replicaset hello-deploy-5cd5dcf7d7
```

In order to access a web app from a stable name or IP address, or even from outside the cluster, you need a Kubernetes
service object. A Service provide reliable networking for a set of Pods.

Scaling the number of replicas manually - edit the YAML and set a different number of replicas or use the command:

```shell
kubectl scale deploy hello-deploy --replicas 5
```

Performing a rolling update (by replacement because Pods are immutable):

```shell
kubectl apply -f deploy.yml
```

```shell
kubectl rollout status deployment hello-deploy
```

Pausing & resuming deployment:

```shell
kubectl rollout pause deploy hello-deploy
```

```shell
kubectl rollout resume deploy hello-deploy
```

Detailed deployment info:

```shell
kubectl describe deploy hello-deploy
```

Kubernetes maintains a documented revision history of rollouts:

```shell
kubectl rollout history deployment hello-deploy
```

Rolling Updates create new ReplicaSets, old ReplicaSets aren't deleted. The fact the old ones still exist makes them
ideal for executing rollbacks:

```shell
kubectl rollout undo deployment hello-deploy --to-revision=1
```

Modern versions of Kubernetes use the system generated pod-template-hash label so only Pods that were originally created
by the Deployment/ReplicaSet will be managed:

```shell
kubectl get pods --show-labels 
```

## 7: Kubernetes Services

Controllers add self-healing, scaling and rollouts. Despite all of this, Pods are still unreliable, and you should never
connect directly to them.

Services provide stable and reliable networking for a set of unreliable Pods. Every Service gets its onw stable IP
address, its own DNS name, and its own stable port. The Service fronts the Pods with a stable UP, DNS, and port. It also
load-balances traffic to Pods with the right labels.

With a Service in place, the Pods can scale up/down, they can fail, and they can be updated and rolled back. Despite all
of this, clients will continue to access them without interruption. The Service is observing the changes and updating
its lists of healthy Pods it sends traffic to.

Think of Services as having a static front-end and a dynamic back-end.

Services are loosely coupled with Pods via labels and selectors. This is ihe same technology that loosely couples
Deployments to Pods.

Every time you create a Service, Kubernetes automatically creates an associated Endpoints object. The Endpoints object
is used to store a dynamic list of healthy Pods matching the Service's label selector. Any new Pods that match the
selector get added to the Endpoints object.

Types of Services:

- accessible from inside the cluster
    - ClusterIP - default type, a stable virtual IP, every service you create gets a ClusterIP
- accessible from outside the cluster
    - NodePort - built on top of CLusterIP and allow external clients to hit a dedicated port on every cluster node and
      reach the Service
    - LoadBalancer- make external access even easier by integrating with an internet-facing load-balancer on your
      underlying cloud platform

Example Service object:

```yml
spec:
  type: NodePort
  ports:
    - port: 8080       -- listen internally on port 8080
      nodePort: 30001  -- listen externally on 30001
      targetPort: 8080 -- forward traffic to the application Pods on port 8080
      protocol: TCP    -- use TCP (default)
  selector: -- send traffic to all healthy Pods on the cluster with the following metadata.labels
    chapter: services
```

Get Endpoint object:

```shell
kubectl get endpointslices
```

Get details of each healthy Pods:

```shell
kubectl describe endpointslice svc-test-xgnsv
```

If your cluster is on a cloud platform, deploying a Service with `type=LoadBalancer` will provision one of your cloud's
internet-facing load-balancers and configure it to send traffic to your Service.

```shell
kubectl get svc --watch
```

After ~2 minutes the value in the EXTERNAL-IP column will appear.

Delete multiple resources:

```shell
kubectl delete -f deploy.yml -f lb.yml -f svc.yml
```

## 8: Ingress

Ingress is all about accessing multiple web applications through a single LoadBalancer Service.

- `Load Balancer` refers to a Kubernetes Service object of `type=LoadBalancer`
- `load-balancer` refers to the internet-facing load-balancer on the underlying cloud

Ingress exposes multiple Services through a single cloud load-balancer. Cloud load-balancers are expensive.

```shell
kubectl get ing
```

Ingress classes allow you to run multiple Ingress controllers on a single cluster:

- assign each Ingress controller to an Ingress class
- when you create Ingress object, you assign them to an Ingress class

```shell
kubectl get ingressclass
```

Ingress is a way to expose multiple applications and Kubernetes Services via a single cloud load-balancer. They are
stable objects in the API but have feature overlap with a lot of service meshes - if you are running a service mesh you
may not need Ingress.

## 9: Service discovery deep dive

Finding stuff on a crazy-busy platform like Kubernetes is hard. Service discovery makes it simple. Apps need a way to
find the other apps they work with.

2 components to service discovery:

- registration - is the process of an application listing its connection details in a service registry so other apps can
  find it and consume it. Kubernetes uses its internal DNS as a service registry. All Kubernetes Services are
  automatically registered with DNS.
- discovery - for service discovery to work, apps need to know to the name of the Service fronting the apps they want to
  connect to (rast is taken care of by Kubernetes)

Get Pods running the cluster DNS:

```shell
kubectl get pods -n kube-system -l k8s-app=kube-dns
```

Service discovery works like a typical routing - check your own table, if not found pass it to the next one.

Domain name format: _object-name_._namespace_.svc.cluster.local, object name has to be unique within a Namespace, but
not across Namespaces.

## 10: Kubernetes storage

Kubernetes supports lots of types of storage from lots of different places. No matter what type of storage, or where is
comes from, when it is exposed on Kubernetes it is called a volume. All that's required is a plugin allowing their
storage resources to be surfaced as volumes in Kubernetes.

Container Storage Interface - an open standard aimed at providing a clean storage interface for container orchestrators
such as Kubernetes.

Core storage-related API objects:

- Persistent Volumes - are how external storage assets are represented in Kubernetes
- Persistent Volume Claims - like tickets that grant access to a PV
- Storage Classes - makes it all dynamic

Storage Providers - AWS Elastic Block Store, Azure File, NFS volumes, ...

The CSI is a vital place of the Kubernetes storage, however, unless you are a developer writing a storage plugins, you
are unlikely to interact with it very often.

Working with Storage Classes:

- Create one or more StorageClasses on Kubernetes
- Deploy Pods with PVCs that reference those Storage Classes

Other settings:

- Access mode:
    - ReadWriteOnce - a PV that can be only bound as R/W by a single PVC
    - ReadWriteMany - a PV that can be bound as R/W by multiple PCVs
    - ReadOnlyMany - a PV that can be bound as R/O by multiple PVCs
- Reclaim policy - how to deal with a PV when its PVC is released:
    - Delete - it deletes the PV and associated storage resource on the external storage system
    - Retain - keep the associated PV object on the cluster as well as any data stored on the associated external asset

```shell
kubectl get sc
```

```shell
kubectl get pv
```

```shell
kubectl get pvc
```

## 11: ConfigMaps and Secrets

Most apps comprise two main parts: the app & the configuration. Coupling the application and the configuration into a
single easy-to deploy unit is an anti-pattern. De-coupling the application and the configuration has the following
benefits:

- re-usable application images (you can use the same image on dev, staging, prod)
- simpler development and testing (easier to spot a mistake when the app and the config are decoupled, e.g. app crash
  after config change)
- simpler and fewer disruptive changes

Kubernetes provides an object called a ConfigMap that lets you store configuration data outside a Pod. It also makes
it easy to inject config into Pods at run-time.

You should not use ConfigMaps to store sensitive data such as certificates and passwords. Kubernetes provides a
different object, called a Secret, for storing sensitive data.

Behind the scenes, ConfigMaps are a map of key-value pairs, and we call each pair an entry:

- Keys - an arbitrary name that can be created from alphanumerics, dashes, dots, and underscores
- Values - anything, including multiple lines with carriage returns
- Keys and Values are separated by a colon -- `key:value`

Data in a ConfigMap, can be injected into containers at run-time via any of the following methods:

- environmental variables (static variables, updates made to the map don't get reflected in running containers, major
  reason not to use environmental variables)
- arguments to the container's startup command (the most limited methods, shares environmental variables' limitations)
- files in a volume (the most flexible method)

ConfigMap object don'§t have the concept of state (desired/actual) - this is why they have a `data` block instead
of `spec` and `status` blocks.

Creating a ConfigMap declaratively:

```yaml
kind: ConfigMap
apiVersion: v1
metadata:
  name: multimap
data:
  given: Nigel
  family: Poulton
```

```shell
kubectl apply -f multimap.yml
```

ConfigMaps are extremely flexible and can be used to insert complex configurations, including JSON files and even
scripts, into containers at run-time.

View logs from a pod from a container:

```shell
kubectl logs startup-pod -c args1
```

ConfigMaps with volumes is the most flexible option. You can reference entire configuration files, as well as make
updates to the ConfigMap that will be reflected in running containers.

1. Create the ConfigMap
2. Create a ConfigMap volume in the Pod template
3. Mount the ConfigMap volume into the container
4. Entries in the ConfigMap will appear in the container as individual files

Update to a ConfigMap via re-applying ConfigMap YML.

Check ENV variable value:

```shell
kubectl exec cmvol -- cat /etc/name/given
```

Secrets are almost identical to ConfigMaps - they hold application configuration data that is injected into containers
at run-time. Secrets are designed for sensitive data such as passwords, certificates, and OAuth tokens.

Despite being designed for sensitive data, Kubernetes does not encrypt Secrets in the cluster store. Fortunately, it is
possible to configure encryption-ar-rest with EncryptionConfiguration objects. Despite this, many people opt to use
external 3rd-party tools, such as HasiCorp Vault.

A typical workflow for a Secret is as follows:

1. The Secret is created and persisted to the cluster store as an un-encrypted object
2. A Pod that uses it gets scheduled to cluster node
3. The Secret is transferred over the network, un-encrypted, to the node
4. The kubelet on the node starts the Pod and its containers
5. The Secret is mounted into the container via in-memory tmpfs filesystem and decoded from base64 to plain text
6. The application consumes it
7. When the Pod is deleted, the Secret is deleted from the node

```shell
kubectl get secrets
```

Create a Secret manually:

```shell
kubectl create secret generic creds --from-literal user=piotr --from-literal pwd=qwerty
```

Decode base-64:

```shell
echo cGlvdHI= | base64 -d
```

```yaml
apiVersion: v1
kind: Secret
metadata:
  name: tkb-secret
  labels:
    chapter: configmaps
type: Opaque
data: -- stringData when using plaintext
  username: bmlnZWxwb3VsdG9u
  password: UGFzc3dvcmQxMjM=
```

The most flexible way to inject a Secret into a Pod is via a special type of volume called a Secret volume. Secret vols
are automatically mounted as read-only to prevent containers and applications accidentally mutating them.

## 12: StatefulSets

Stateful application - application that creates and saves valuable data, for example an app that saves data about client
sessions and uses it for future sessions, or a database.

StatefulSets guarantee:

- predictable and persistent Pod names
    - name format: `StatefulSetName-Integer`
- predictable and persistent DNS hostnames
- predictable and persistent volume bindings

Failed Pods managed by a StatefulSet will be replaced by new Pods with the exact same Pod name, the exact same DNS
hostname, and the exact same volumes. This is true even if the replacement is started on a different cluster node. The
same is not true of Pods managed by a Deployment.

StatefulSets create one Pod at a time, and always wait for previous Pods to be running and ready before creating the
next.

Knowing the order in which Pods will be scaled down, as well as knowing that Pods will not be terminated in parallel, is
a game-changer for many stateful apps.

Note: deleting a StatefulSet object does not terminate Pods in order, with this in mind, you may want to scale down a
StatefulSet to 0 replicas before deleting it.

Headless Service is a regular Kubernetes Service object without an IP address. It becomes a StatefulSet's Governing
Service when you list it in the StatefulSet config under `spec.serviceName`.

StatefulSets are only a framework. Applications need to be written in ways to take advantage of the way StatefulSets
behave.

## 13: API security and RBAC

Kubernetes is API-centric and the API is served through the API server.

Authentication (authN = "auth en") is about providing your identity. All requests to the API server have to include
credentials, and the authentication layer is responsible for verifying them. The authentication layer in Kubernetes is
pluggable, and popular modules include integration with external identity management systems such as Amazon Identity
Access Management.

In fact, Kubernetes forces you to use external identity management system.

Cluster details and credentials are stored in a `kubeconfig` file.

```shell
kubectl config view
```

Authorization (authZ - "auth zee") - RBAC (Role-Based Access Control) - happens immediately after successful
authorization. It is about three things: users, actions, resources. Which users can perform which actions agains which
resources.

Admission Control runs immediately after successful Authentication and Authorization and is all about policies. There
are 2 types of admission controllers: mutating (check for compliance and can modify requests) and validating (check for
policy compliance, without request modification).

Most real-world clusters will have a lot of admission controllers enabled. Example: a policy to require `env=prod`
label, admission control can verify presence and add a label when it is missing.

## 14: The Kubernetes API

Kubernetes is API centric. This means everything in Kubernetes is about the API, and everything ges through the API and
API server. For most part, you will use `kubectl` to send requests, however you can craft them in code.

```shell
kubectl proxy --port 9000 &
```

```shell
curl http://localhost:9000/api/v1/pods
```

The Kubernetes API is divided into 2 groups:

- the core group - mature objects that were created in the early dats of Kubernetes before the API was divided into
  groups, located in `api/v1`
- the named group - the future of the API, all new resources get added to named groups

```shell
kubectl api-resources
```

Kubernetes has a strict process for adding new resources to the API. They come in as _alpha_ (experimental, can be
buggy), progress through _beta_ (pre-release), and eventually reach _stable_.

It is possible to write your custom controller or resource.

## 15: Threat modeling Kubernetes

Threat modeling is the process of identifying vulnerabilities. The STRIDE model:

- Spoofing
    - pretending to be somebody else with the aim of gaining extra privileges on a system
- Tampering
    - the act of changing something in a malicious way, so you can cause one of the following:
        - denial of service - tampering with the resource to make it unusable
        - elevation of privilege - tampering with a resource to gain additional privileges
- Repudiation
    - creating doubt about something, non-repudiation is proving certain actions were carried out by certain individuals
- Information disclosure
    - when sensitive data is leaked
- Denial of service
    - making something unavailable, many types of DoS attacks, but a well-known variation is overloading system to the
      point it can no longer service requests
- Elevation of privilege
    - gaining higher access than what is granted, usually in order to cause damage or gain unauthorized access


================================================
FILE: books/kubernetes-in-action.md
================================================
[go back](https://github.com/pkardas/learning)

# Kubernetes in Action, Second Edition 

Book by Marko Lukša


================================================
FILE: books/nlp-book.md
================================================
[go back](https://github.com/pkardas/learning)

# Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition

Book by Daniel Jurafsky and James H. Martin (December 2020 draft)

- [Chapter 2: Regular Expressions, Text Normalization, Edit Distance](#chapter-2-regular-expressions-text-normalization-edit-distance)
- [Chapter 3: N-gram Language Models](#chapter-3-n-gram-language-models)
- [Chapter 4: Naive Bayes and Sentiment Classification](#chapter-4-naive-bayes-and-sentiment-classification)
- [Chapter 5: Logistic Regression](#chapter-5-logistic-regression)
- [Chapter 6: Vector Semantics and Embeddings](#chapter-6-vector-semantics-and-embeddings)
- [Chapter 7: Neural Networks and Neural Language Models](#chapter-7-neural-networks-and-neural-language-models)
- [Chapter 8: Sequence Labeling for Parts of Speech and Named Entities](#chapter-8-sequence-labeling-for-parts-of-speech-and-named-entities)
- [Chapter 9: Deep Learning Architectures for Sequence Processing](#chapter-9-deep-learning-architectures-for-sequence-processing)
- [Chapter 10](#chapter-10)
- [Chapter 11: Machine Translation and Encode-Decoder Models](#chapter-11-machine-translation-and-encode-decoder-models)
- [Chapter 12: Constituency Grammars](#chapter-12-constituency-grammars)
- [Chapter 13-16](#chapter-13-16)
- [Chapter 17: Information Extraction](#chapter-17-information-extraction)
- [Chapter 18: Word Senses and WordNet](#chapter-18-word-senses-and-wordnet)
- [Chapter 19](#chapter-19)
- [Chapter 20: Lexicons for Sentiment, Affect and Connotation](#chapter-20-lexicons-for-sentiment-affect-and-connotation)
- [Chapter 21-22](#chapter-21-22)
- [Chapter 23: Question Answering](#chapter-23-question-answering)
- [Chapter 24: Chatbots & Dialogue Systems](#chapter-24-chatbots--dialogue-systems)
- [Chapter 25: Phonetics](#chapter-25-phonetics)
- [Chapter 26: Automatic Speech Recognition and Text-to-speech](#chapter-26-automatic-speech-recognition-and-text-to-speech)

## Chapter 2: Regular Expressions, Text Normalization, Edit Distance

*Regular Expressions*

Regular expression is an algebraic notation for characterising a set of strings.

Kleene * (cleany star) - zero or more occurrences.

Kleene + - at least one

Anchors - special characters that *anchor* regular expressions to particular places in a string (`^` - start, `$` - end
of a string).

`^` has multiple meanings:

1. match start of the line
2. negation inside square brackets `[^Ss]` - neither `S` nor `s`

Pipe symbol `|` also known as "disjunction". Logical OR. `cat|dog` match either `cat` or `dog`.

Operator precedence hierarchy:

1. Parenthesis: `()`
2. Counters: `* + ? {}`, `{}` - explicit counter
3. Sequences and anchors: `sequence ^the end$`
4. Disjunction: `|`

Regular expressions are greedy, however there is a way to enforce non-greedy behaviour -> `*?` - Kleene star that
matches as little text as possible, `+?` - Kleene plus that matches as little text as possible.

Fixing RE errors might require following efforts:

- Increasing precision (minimising false positives - incorrectly matched)

- Increasing recall (minimising false negatives - incorrectly missed)

*Substitution* - easiest to explain with an example:

```
the (.*)er they were, the \1er they will be
--- will match ---
the bigger they were, the bigger they will be
```

Number operator, e.g.: `\1` allows repeating matched group. So parenthesis operator not only allows to group but also
store in a numbered register. It is possible to disable register and use non-capturing group,
e.g.: `(:?some: a few) (people|cats) like some \1`. Famous chatbot ELIZA used a series of regular expressions
substitutions.

```
I'M (depressed|sad) -> I AM SORRY TO HEAR YOU ARE \1
```

Look ahead - look ahead in the text to see if some pattern matches BUT not advance the match cursor.

Negative lookahead - used for ruling out special cases, e.g. rule out strings starting with word
Volcano: `(?!Volcano)[A-Za-z]+`

*Words*

Fragment - broken-off word, "I do main- mainly", "main-" is a fragment here

Filler - "um, uh" - used in spoken language, problem: should be treated as a word? Fragment and fillers are 2 kinds of
disfluencies.

Type - number of distinct words in a corpus. When we speak about number of words in the language, we are generally
referring to word types.

Herdan's Law/Heap's Law - relationship between number of types (`|V|`) and number of tokens (`N`) in the corpora:
$$ |V| = kN^{\beta} $$
(k and 0 < B < 1 are constants)

*Corpora*

Writers, speakers have specific styles of communicating, use specific dialects, text can vary by time, place, function,
race, gender, age, socioeconomic class.

Code switching - common practice for speakers and writers to use multiple languages in single communicative act

When preparing a computational models for language processing it is useful to prepare data-sheet, document answering
questions like: Who produced the text? In what context? For what purpose? In what language? What was race, gender, ...
of the authors? How data was annotated?

*Text Normalisation*

1. Tokenisation (segmentation)
2. Normalising word formats
3. Segmenting sentences

*Tokenisation*

UNIX's `tr` command can be used for quick tokenisation of English texts.

Problem: Keep specific words together: `2020/02/02`, `km/h`, `$65`, `www.github.com`, `100 000`, `I'm`, `New York`

Tokeniser can be used to expand clitic contractions: `we're -> we are`. Tokenisation is tied up with Named Entity
Recognition. Tokenisation needs to be fast, hence often uses deterministic algorithms based on regular expressions
compiled into efficient finite state automata. Tokenisation is more complex for e.g.: Chinese or Japanese (languages not
using spaces for separating words). For Japanese algorithms like words segmentation work better. Also, it is possible to
use neural networks for the task of tokenisation.

Penn Treebank Tokeniser

- separates clitics (`doesn't -> does n't`)
- keeps hyphenated words together (`close-up`, `Bielsko-Biała`)
- separates out all punctuation

Byte-Pair Encoding

- begins with a vocabulary that is set of all individual characters
- examines training corpus, chooses the two symbols that are most frequently adjacent (A, B -> AB)
- continues to count and merge, creating longer and longer character strings, until k merges (parameter of the
  algorithm)
- most words will be represented as full symbols, few rare will have to be represented by their parts

*Normalisation*

Task of putting words in a standard format, choosing a single normal form for words with multiple forms like the USA/US.
Valuable proces despite spelling information is lost.

Case folding - mapping everything to lower/upper case. However, might give wrong results, e.g.: US (country) -> us (
people, we)

*Lemmatisation*

Task of determining that two words have the same root (am, are, is -> be). Useful for web search - usually we want all
forms to be found. Requires morphological parsing of the word. Morphology is the study of the way words are built up
from smaller meaning-bearing units called morphemes. Morphemes have 2 classes: stems - central part of the word and
affixes - (prefixes and suffixes).

The Porter Stemmer

Lemmatisation is hard, that's why sometimes we use stemming.

```
This -> Thi, was -> wa, Bone's -> Bone s, ...
```

Stemming is based on series of rules, e.g.: `ATIONAL -> ATE` (relational -> relate). They do make errors, but are fast
and deterministic.

Sentence Segmentation

`?`, `!` are unambiguous, `.` is unfortunately ambiguous, it doesn't need to mean sentence end. Rule-based approach or
machine learning.

*Minimum Edit Distance* - minimum number of editing operations (add, delete, substitution) needed to transform one
string into another.

How to find Minimal Edit Distance? This can be thought of as the shortest path problem. Shortest sequence of edits from
one string to another. This can be solved using dynamic programming (table-driven method for solving problems by
combining solutions to sub-problems).

## Chapter 3: N-gram Language Models

Assigning probabilities of upcoming words in a sentence is a very important task in speech recognition, spelling
correction, machine translation and AAC systems. Systems that assign probabilities to sequences of models are called **
language models**. Simplest model is an n-gram.

*P(w|h)* - the probability of a word *w* given some history *h*. $$ P(the|its\ water\ is\ so\ transparent\ that) =
\dfrac{count(its\ water\ is\ so\ transparent\ that\ the)}{count(its\ water\ is\ so\ transparent\ that)} $$ You can
compute these probabilities for a large corpus, e.g. wikipedia. This method works fine in many cases, but it turned out
even the web can not give us good estimates in most cases - language is dynamic, you are not able to count ALL the
possible sentences. Hence, there is a need for introducing more clever way for estimating the probability *P(w|h)*.

Instead of computing the probability of a word given its entire history, we can approximate the history by just the last
few words. The bigram model approximates the probability by taking the last word, so for the example we had earlier (so
in general: n-gram takes *n - 1* words into the past, trigrams are most commonly used, 4/5-grams are used when there is
sufficient training data):
$$ P(the|its\ water\ is\ so\ transparent\ that) \approx P(the|that)
$$ This assumption, that next word depends on the previous one is called a **Markov** assumption.

Probability of a sentence can be calculated using chain rule of probability:
$$ P(<s>\ i\ want\ english\ food\ </s>) = P(i|<s>)P(want|i)P(english|want)P(food|english)P(</s>|food) =\ ... $$ Such
technique is able to capture e.g. cultural things - people more often look for Chinese food than English. Language
models are always computed in log format - log probabilities. Why? Probability always fall between 0 and 1, multiplying
small float numbers - you end up with numerical underflow, using logarithms you get numbers that are not as small.

*Evaluating Language Models*

Best way to evaluate the performance of a language model is to embed it in an application and measure how much the
application improves - **extrinsic evaluation**. However, this technique requires running multiple models in order to
measure the improvement. Better approach is to use  **intrinsic evaluation** - standard approach from ML, training set
and validation (unseen) set. So the better predictions on the test set, the better model you got. Sample from test set
can not appear in training set - this introduces bias - probabilities gets too high (unreliable) - huge inaccuracies in
perplexity - probability based metric. If a particular test set is used too often, we implicitly tune to its
characteristics.

*Perplexity*

PP for short, metric used for evaluating language model. Perplexity on a test set is the inverse probability of the test
set, normalised by the number of words. Minimising perplexity is equivalent to maximising the test set probability
according to the language model.

Another way of thinking about perplexity: weighted average branching factor (branching factor - number of possible next
words that can follow any words).

The more information the n-gram gives us about the word sequence, the lower the perplexity (unigram: 962, bigram: 170,
trigram: 109).

An intrinsic improvement in perplexity does not guarantee an extrinsic improvement in the performance. In other words:
because some metric shows your model is great, it does not mean it will do so great in real life. Perplexity should be
confirmed by an end-to-end evaluation of a real task.

*Generalisation and zeros*

N-gram model is highly dependant on the training model, also it does better job as we increase *n*. You need to use
similar genres for the training - Shakespearian English is far different from WSJ's English. To build model for
translating legal documents you need to train it on legal documents, you need to build a questions answering system, you
need to use questions for training. It is important to use appropriate dialects and variety (African American Language,
Nigerian English, ...).

Zeros: Imagine you trained a model on a corpus, "denied the: allegations, speculation, rumours, report", but for the
test you check phrases like "denied the: offer, loan", model would estimate probability as 0:
$$ P(offer|denied\ the) = 0 $$ This is bad... if you want to calculate perplexity, you would need to divide by zero.
Which is kinda problematic.

So what about words we haven't seen before (open vocabulary -> out of vocabulary words / unknown words)? Add pseudo
word `<UNK>`. You can use this tag to replace all the words that occur fewer than some small number *n*.

*Smoothing* (discounting) - process of shaving off a bit of probability mass from some more frequent events and give it
to the events we have never seen. There are variety of ways to do smoothing:

- Laplace Smoothing (add-one smoothing) - adds 1 to all bigram counts before we normalise them into probabilities. So
  all the counts that uses do be 0, becomes 1, 1 will be 2, ... This method is not used in state-of-the-art solutions.
  Can be treated as a baseline.
- Add-k smoothing - Instead of adding 1, we add a fractional count e.g. 0.5, 0.05, 0.01, ... Useful for some of the
  applications but still, does not perform perfectly.

Backoff - we can use available knowledge, if you need to computer trigram, maybe bigram can help you with that, or even
unigram. Sometimes, this might be sufficient.

Interpolation - mix the probability estimates form all the n-gram estimators

*Kneser-Ney Smoothing* - most commonly used method. It uses following observation: "words that have appeared in more
contexts in the past are more likely to appear in some new context as well". The best performing method is a modified
Kneser-Ney Smoothing.

*Huge Language Models and Stupid Backoff*

Google open-sourced their The Web 1 Trillion 5-gram corpus, they released also Google Books Ngrams. There is also COCA.

Stupid backoff - algorithm for a language model, gives up idea of making the idea of trying to make the model a true
probability distribution, no discounting. If a higher-order n-gram has zero count, we simply backoff to a lower order
n-gram, this algorithm does not produce a probability distribution.

## Chapter 4: Naive Bayes and Sentiment Classification

Many problems can be viewed as classification problems: text categorisation, sentiment analysis, language
identification, authorship attribution, period disambiguation, tokenisation, and many more. Goal is to take a sample,
extract features and classify the observation.

*Naive Bayes Classifiers*

Classifiers that make simplified (naive) assumption about how the features interact.

Binary Multinomial Naive Bayes (binary NB) - used for sentiment analysis, clip the word counts in each document at 1 (
extract unique words from the documents and count occurrence).

How to deal with negations? I really like this movie (positive), I don't like this movie (negative). Very simple
baseline, commonly used is: during text normalisation prepend the prefix *NOT_* to every word after a token of logical
negation.

````
i didn't like this movie , but ... -> i didn't NOT_like NOT_this NOT_movie , but ...
````

Chapter 16 will tell more about parsing and relationship between negations.

Sentiment lexicons - lists of words that are pre-annotated with positive or negative sentiment. Popular lexicon: General
Inquirer or LIWC. For Naive Bayes you can add a feature "this word occurs in the positive lexicon" instead of counting
each words separately. Chapter 20 will tell how lexicons can be learned automatically and other use cases besides
sentiment analysis will be shown.

Spam detection - Naive Bayes + regex + HTML scan

Language identification - Naive Byes but not on the words! Used Character n-grams.

Naive Bayes can be viewed as a language model.

*Evaluation*

Confusion matrix - table for visualising how an algorithm performs with respect to the human *gold label* (human labeled
data - gold labels). Has 2 dimensions - system output and gold labels.

Accuracy - what percentage of all observations our system labelled correctly, doesn't work well for unbalanced classes -
e.g. 80 negative classes, 20 *positive*, learn to always answer *negative* and you have 80% *accuracy*.

Precision - percentage of the items that the system detected that are in fact positive.

Recall - percentage of the items actually present in the input that were correctly identified by the system.

F-measure - combines both metrics - weighted harmonic mean of precision and recall - conservative metric, closer to the
minimum of the two values (comparing to the arithmetic mean).

*Evaluating with more than two classes*

Macro-averaging - compute the performance of each class and then average over classes. Can be dominated by the more
frequent class.

Micro-averaging - collect decisions for all classes into a single confusion matrix and then computer precision and
recall from that table. Reflects better the statistics for the smaller classes, more appropriate when the performance on
all the classes is equally important.

*Test sets and Cross-validation*

Cross validation - when your dataset is not large enough, you can use it all for training and validating by using
cross-validation. Process of selecting random training and validation sets, training the classifier, computing the error
and the repeating it once again. Usually 10 times.

*Statistical Significance Testing*

We often need to compare the performance of two systems. How can we know one system is better than the another?

*Effect size* - difference between F1-scores.

*Null hypothesis* - we suppose *delta > 0*, we would like to know if we can confidentially rule out this hypothesis. In
order to do this, create random variable *X* ranging over all test sets, we ask: how likely is it if the null hypothesis
is correct that among these test sets we would encounter the value of *delta* that we found. This likelihood is called *
p-value*. We select the threshold - usually small, if we can reject the *null hypothesis* we can tell A is better than B

- is *statistically significant*.

*Avoiding harms in classification*

Representational harms - system perpetuating negative stereotypes about social groups.

Toxicity detection - hate speech, abuse, harassment detection. These systems make harm themselves, for example: mark
sentences mentioning minorities.

System based on stereotypes can lead to censorship. Also, human labeled data can be biased.

It is important to include *model card* when releasing a system. Model card includes: training algorithms and
parameters, data sources, intended users and use, model performance across different groups.

## Chapter 5: Logistic Regression

Logistic regression - one of the most important analytic tools in the social and natural sciences. Baseline supervised
machine learning algorithm for classification. Neural network can be seen as a series of logistic regression classifiers
stacked on top of each other. This is a discriminative classifier (unlike Naive Bayes - generative classifier - you can
literally as such model how for example dog or cat looks like, discriminative model learns only how to distinguish the
classes, e.g. training set with dogs with collars and cats - when you ask a model what does it know about cats it would
respond: it doesn't wear a collar).

Classification: *The Sigmoid*

Sigmoid function - takes a real value (even x -> infinity) and maps it to the range [0, 1]. Nearly linear near 0. This
is extremely useful for calculating e.g. *P(y=1|x)* - belonging to the class. $$ z = weights\ of \ feature\ vector\ *\ x

+ bias $$

$$ P(y=1) = \sigma(z)
$$

*z* - ranges from *-inf* to *+inf*.

Logistic regression can be used for all sorts of NLP tasks, e.g. period disambiguation (deciding if a period is the end
of a sentence or part of a word).

*Designing features* - features are generally designed by examining the training set with an eye to linguistic
intuitions.

*Representation learning* - ways to learn features automatically in an unsupervised way from the input.

*Choosing a classifier* - Logistic Regression great at finding correlations.

*Loss / cost function* - The distance between the system output and the gold output. Gradient descent - optimisation
algorithm for updating the weights. It is a method that finds a minimum of a function by figuring out in which direction
the function's slope is rising the most steeply. $$ \theta\ -\ weights,\ in\ the\ case\ of\ logistic\ regression\ \theta
= weights,\ bias $$
*Convex function* - function with one minimum. No local minima to get stuck. Local minima is a problem in training
neural networks - non-convex functions.

*Learning rate* - the magnitude of the amount to move in gradient descent (hyper-parameter).

*Hyper-parameters* - special parameters chosen by the algorithm designer that affect how the algorithm works.

*Batch training* - we compute gradient over the entire dataset, quite expensive to compute. Possibility to use *
mini-batch* training, we train on a group of *m* examples (512 or 1024).

*Regularisation* - a good model should generalise well, there is a problem of overfitting it model fits the data too
perfectly. There is a possibility to add a regularisation - L1 (lasso regression) and L2 (ridge regression)
regularisation.

*Multinomial logistic regression* (*softmax* regression) - for classification problems with more than 2 classes. The
multinomial logistic classifier uses a generalisation of the sigmoid function called softmax function.

*Model interpretation* - Often we want to know more than just the result of classification, we want to know why
classifier made certain decision. Logistic regression is interpretable.

## Chapter 6: Vector Semantics and Embeddings

*Distributional hypothesis* - the link between similarity in how words are distributed and similarity.

*Lemma / citation* form - basic form of a word. *Wordform* - inflected lemma. Lemma can have multiple meanings, e.g.
mouse might refer to a rodent or to a pointer, each of these are called word senses. Lemmas can be polysemous (have
multiple senses), this makes interpretation difficult. Word sense disambiguation - the task of determining which sense
of a word is being used in particular context.

*Synonyms* - two words are synonymous if they are substitutable - have the same propositional meaning.

*Principle of contrast* - a difference in linguistic form is always associated with some difference in meaning, e.g.:
water / H2O, H2O - rather used in scientific context.

*Word similarity* - *cat* is not a synonym of a *dog*, but these are 2 similar words. There are many human-labelled
datasets for this.

*Word relatedness* - (or association) e.g.: *coffee* is not similar to *cup*, they shave ro similar features, but they
are very related - associated, they co-exist. Very common kind of relatedness is semantic field, e.g.: *surgeon,
scalpel, nurse, hospital*. Semantic fields are related to topic models like LDa - Latent Dirichlet Allocation -
unsupervised learning on large sets of texts to induce sets of associated words from text. There are more relations
between words:
hypernymy, antonymy or meronymy.

*Semantic Frames and Roles* - a set of words that denote perspectives or participants in a particular type of event,
e.g.: *Ling sold the book to Sam* - seller / buyer relation. Important problem in question answering.

*Connotation* - affective meaning - emotions, sentiment, opinions or evaluations.

*Sentiment* - valence - the pleasantness of the stimulus, arousal - the intensity of emotion provoked by the stimulus,
dominance - the degree of control exerted by the stimulus. In 1957 Osgood used these 2 values to represent a word -
revolutionary idea! Word embedded in 3D space.

*Vectors semantics*. Word's meaning can be defined by its distribution in language - use neighbouring words. Idea of
vector semantics is to represent a word as a point in a multidimensional semantic space (word embedding) that is derived
from the distributions of word neighbours.

*Information retrieval* - the task of finding the document *d* from the *D* documents in some collection that best
matches a query *q*.

*Cosine* - similarity metric between 2 words (angle between 2 vectors)

*TF-IDF* - Raw frequencies - not the best way to measure association between words (a lot of noise from words like *the,
it, they, ...*). Term Frequency - the frequency of a word *t* in document *d*. The second factor gives higher weights to
words that occur only in a few documents.

*PMI* - Point-wise Mutual Information - measure how often 2 events occur, compared with what we would expect if they
were independent. A useful tool whenever we need to find words that are strongly associated. It is more common to use
PPMI. Very rare words tend to have very high PMI.

*Word2vec* - dense word embedding, the intuition of word2vec is that instead of counting how often each word *w* occurs
near word *u*, we train a classifier on a binary classification task: "Is word *w* likely to show up near word *u*?". We
can use running text as supervised training data. - this is called self-supervised training data.

Visualising embeddings - visualise the meaning of a word embedded in space by listing the most similar words, clustering
algorithms and the most important method - dimensionality projection, e.g. t-SNE.

*First-order co-occurrence / Syntagmatic association* - if words are near each other, e.g. *wrote* and *book*.

*Second-order co-occurrence / Paradigmatic association* - if words have similar neighbours, e.g. *wrote*, *said*

*Representational harm*. Embeddings are capable of capturing bias and stereotypes. More, they are capable of amplifying
bias.

## Chapter 7: Neural Networks and Neural Language Models

Neural network share much of the same mathematics as logistic regression, but NNs are more powerful classifier than
logistic regression. Neural networks can automatically learn useful representations of the input.

*Unit* - takes a set of real values numbers as input, performs some computations on them and produces an output. Is
taking weighted sum of inputs + bias. Output of this function is called an activation. $$ y = a = f(z) = f(w \cdot x +
b)
$$
*f* - e.g. sigmoid, tanh, ReLU. Sigmoid most commonly used for teaching. Tanh is almost always better than sigmoid.
ReLU (rectified linear unit) - most commonly used and the simplest.

*The (famous) XOR problem* - Minsky proved it is not possible to build a perceptron (very simple neural unit that has a
binary output and does not have a non-linear activation function) to compute logical XOR. However, it can be computed
using a layered neural network.

*Feed-Forward Neural Network*. Multi-layer network, units are connected without cycles. Sometimes called multi-layer
perceptrons for historical reasons, modern networks aren't perceptrons (aren't linear). Simple FFNN have 3 types of
nodes: input units, hidden units and output units. The core of the neural network is the hidden layer formed of hidden
units. Standard architecture is that each layer is fully connected - each unit in each layer takes all the outputs from
the previous layer.

Purpose of learning is to learn weights and bias on each layer. *Loss function* - the distance between the system output
and the gold output, e.g. cross-entropy loss. To find the parameters that minimise this loss function, we use for
example *gradient descent*. Gradient descent requires knowing the gradient of the loss function with respect to each of
the parameters. Solution for computing this gradient is error back-propagation.

Language modeling - predicting upcoming words from prior word context - neural networks are perfect at this task. Much
better than *n-gram* models - better generalisation, higher accuracy, on the other hand - much slower to train.

## Chapter 8: Sequence Labeling for Parts of Speech and Named Entities

*Named entity* - e.g. Marie Curie, New York City, Stanford University, ... important for many natural language
understanding tasks (e.g. sentiment towards specific product, question answering). Generally speaking, anything that can
be referred to with a proper name (person, location, organisation). Possible output tags: PER (person), LOC (location),
ORG (organisation) and GPE (geopolitical entity).

*POS/Part of Speech* - knowing if a word is noun or verb tells us about likely neighbouring words. They fall into 2
categories: closed class and open class. POS-tagging is the process of assigning a part-of-speech to each word in a
text. Tagging is a disambiguation task. Words are ambiguous, one can have more than one POS e.g. book flight, hand me
that book, ... The goal is to resolve these ambiguities. The accuracy of POS tagging algorithms is very high +97%. Most
Frequent Class Baseline - effective, baseline method, assign token to the class that occurs most often in the training
set.

Markov chain - a model that tells about the probabilities of sequences of random variables. A Markov chain makes a very
strong assumption - if you want to predict future sentence, all that matters is the current state. Formally a Markov
chain is specified by: set of *N* states, a transition probability matrix and initial probability distribution.

The Hidden Markov Model - allows talking about both observed events (words seen in the input) and hidden events (
part-of-speech tags). Formally HMM is specified by: set of *N* states, a transition probability, observations,
observation likelihoods / emission probabilities (probability of an observation begin generated from a state *q*) and
initial probability distribution.

HMM is a useful and powerful model, but needs a number of augmentations to achieve high accuracy. CFR is a log-linear
model that assigns a probability to an entire output sequence. We can think of a CRF as like a giant version of what
multinomial logistic regression does for a single token.

Gazetteer - list of place names, millions of entries for locations with detailed geographical and political information,
e.g. https://www.geonames.org/

POS tags are evaluated by accuracy. NERs are evaluated using recall, precision and F1.

Named Entity Recognition is often based on rule based approaches.

## Chapter 9: Deep Learning Architectures for Sequence Processing

Language is inherently temporal phenomenon. This is hard to capture using standard machine learning models.

*Perplexity* - a measure of model quality, perplexity of a model with respect to an unseen test set is the probability
the model assigns to it, normalised by its length.

*RNN - Recurrent Neural Network* - any network that contains a cycle within its network connections. Any network where
the value of a unit is directly or indirectly dependent on its own earlier outputs as an input. Within RNNs there are
constrained architectures that have proven to be extremely effective.

*Elman Networks / Simple Recurrent Networks* - very useful architecture, also serves as the basis for more complex
approaches like LSTM (Long Short-Term Memory). RNN can be illustrated as a feedforward network. New set of weights that
connect the hidden layer from the previous time step to the current hidden layer determine how the network makes the use
of past context in calculating the output for the current input.

RNN-based language models process sequences a word at a time, attempting to predict the next word in a sequence by using
the current word and the previous hidden state as inputs.

RNNs can be used for many other tasks:

- sequence labeling - task is to assign a label chosen from a small fixed set of labels to each element of a sequence (
  e.g. POS tagging or named entity recognition). Inputs for RNN are word embeddings and the outputs are tag
  probabilities generated by softmax layer.
- sequence classification - e.g. sentiment analysis, spam detection, message routing for customer support applications.

Stacked RNN - multiple networks where the output of one layer serves as the input to a subsequent layer. They very often
outperform single-layer networks, mainly because stacked layers are able to have different level of abstractions across
layers. Optimal number of layers is application-dependant.

Bidirectional RNN = forward and backward networks combined. In these 2 independent networks input is processed form the
start to the end and from the end to the start. Also, very effective for sequence classification.

It is difficult to train RNNs for tasks that require a network to make use of information distant from the current point
of processing. RNNs can not carry forward critical information because of hidden layers and because they are fairly
local.

LSTM - Long Short-Term Memory - divide the context management problem into two sub-problems:

- removing information no longer needed from the context
- adding information likely to be needed for later decision-making

LSTM is capable of mitigating the loss of distant information. However, there are still RNNs, so relevant information
can be lost.

Transformers - approach to sequence processing that eliminates recurrent connections and returns to architectures
reminiscent of the fully connected networks. Transformers are made up of stacks of networks of the same length of simple
linear layers, feedforward networks and custom connections.

Transformers use *self-attention layers* - they allow network to directly extract and use information from arbitrarily
large contexts without the need to pass it through intermediate recurrent connections as in RNNs.

At the core of an attention-based approach is the ability to compare an item of the interest to a collection of other
items in way that reveals their relevance in the current context.

It turns out, language models can generate toxic language. Many models are trained on data from Reddit (majority of
young, males - not representative). Language model can also leak information about training data - meaning it can be
attacked.

## Chapter 10

Missing chapter.

## Chapter 11: Machine Translation and Encode-Decoder Models

Machine translation - the use of computers to translate from one language to another. The most common use of machine
translation is information access - when you want to for example translate some instructions on the web. Also, often
used in CAT - Computer-Aided Translation, where computer produces draft translation and then human fixes it in
post-editing. Last but not least, useful in human communication needs.

Standard algorithm for MT is encoder-decoder network (can be implemented with RNNs or with Transformers). They are
extremely successful in catching small differences between languages.

Some aspects of human language seem to be universal - true for every or almost every language, for example every
language have words for referring to people, eating or drinking. However, many languages differ what causes translation
divergences.

German, French, English and Mandarin are all SVO (Subject-Verb-Object) languages. Hindi and Japanese are SOV languages.
Irish and Arabic are VSO languages. VO languages generally have prepositions, OV languages generally have postpositions.

Machine Translation and Words Sense Disambiguation problems are closely linked.

Encode-decoder (sequence-to-sequence) networks are models capable of generating contextually appropriate, arbitrary
length, output sequences. Encoder (LSTM, GRU, convolutional networks, Transformers) takes an input sequence and creates
a contextualised representation of it, then representation is passed to decoder (any kind of sequence architecture)
which generates a task-specific output sequence.

Machine translation raises many ethical of the same issues that we have discussed previously. MT systems often assign
gender according to culture stereotypes. Some reaserch found that MY systems perform worse when they are asked to
translate sentences that describe people with non-stereotypical gender roles.

## Chapter 12: Constituency Grammars

Syntactic constituency is the idea that groups of words can behave as single units.

The most widely used formal system for modeling constituent structure in English is Context-Free Grammar, also called
Phrase-Structure Grammars, and the formalism is equivalent to Backus-Naur Form (BNF). A context-free grammar consist of
a set of rules or productions, each of which expresses the ways that symbols of the language can be grouped and ordered
together.

Treebank - parse tree.

## Chapter 13-16

Skipped for now.

## Chapter 17: Information Extraction

Information extraction - turns the unstructured information embedded in texts into structured data - e.g. relational
database to enable further processing.

Relation extraction - finding and classifying semantic relations among the text entities. These are often binary
relations - child of, employment, part-whole. Task of NER is extremely useful here. Wikipedia also offers large supply
of relations.

RDF - Resource Description Framework - tuple of entry-relation-entry. DBPedia was derived from Wikipedia and contains
over 2 bilion RDF triples. Freebase - part of Wikidata, has relations between people and their nationality or locations.

There are 5 main classes of algorithms for relation extraction:

- handwritten patterns - high-precision and can be tailored to specific domains, however low recall and a lot of work
- supervised machine learning - for all entity pairs determine if are in relation
- semi-supervised machine learning (bootstrapping and via distant supervision) - bootstrapping proceeds by taking the
  entities in the seed pair and then finding sentences that contain both entities.
- unsupervised

For unsupervised and semi-supervised approaches it is possible to calculate estimated metrics (like estimated precision)
.

Knowledge graphs - dataset of structured relational knowledge.

Event extraction - task of identification mentions of events in texts. In English most events correspond to verbs and
most verbs introduce events (United Airlines SAID, prices INCREASED, ...). Some noun phrases can also denote events (
the increase, the move, ...).

With extracted events and extracted temporal expressions, events from text can be put on a timeline. Determining
ordering can be viewed as a binary relation detection and classification task.

Event coreference - is needed to figure out which event mentions in a text refer to the same event.

Extracting time - temporal expressions are used to determine when the events in a text happened. Dates in text need to
be normalised.

- relative: yesterday, next semester
- absolute: date
- durations

Temporal expressions task consists of finding the start and the end of all the text spans that correspond to such
temporal expressions. Such task can use rule-based approach.

Temporal Normalisation - process of mapping a temporal expressions to either a specific point in time or to a duration.

Template filling - the task of describing stereotypical or recurring events.

## Chapter 18: Word Senses and WordNet

Ambiguity - the same word can be used to mean different things. Words can be polysemous - have many meanings.

Word sense is a discrete representation of one aspect of the meaning of a word. Meaning can be expressed as an
embedding, for example embedding that represents the meaning of a word in its textual context. Alternative for
embeddings are glosses - written for people, a gloss is just a sentence, sentence can be embedded. Other way of defining
a sense is through relationships ("right" is opposite to "left").

Relations between senses:

- synonymy - when two senses of two different words are (almost) identical - couch / sofa, vomit / throw up
- antonymy - when two words have an opposite meaning - long / short, fast / slow
- hyponym / subordinate - when one word is more specific than the other word - car (hyponym) -> vehicle
- hypernym / superordinate- when one word is more general than the other word - vehicle (hypernym) -> car
- meronymy - when one word describes part of the other word - wheel (meronym) -> car
- holonym - opposite to meronym - car (holonym) -> wheel
- metonymy - the use of one aspect of a concept to refer to other aspects of the entity - Jane Austen wrote Emma (
  author) <-> I really love Jane Austen (works of author),

WordNet - a large online thesaurus, a database that represents word senses. WordNet also represents relations between
senses (is-a, part-whole). The relation between two senses is important in language understanding, for example -
antonymy - words with opposite meaning.

English WordNet has 3 separate databases (nouns, verbs, adjectives and adverbs).

Synset - (Synonym Set) - the set of near-synonyms for a WordNet sense. Glosses are properties of a synset.

Word Sense Disambiguation - the task of determining which sense of a word is being used in a particular context. WSD
algorithms take as input some word and context and output the correct word sense.

Lexical sample tasks - small pre-selected set of target words and an inventory of senses. All-words task (harder
problem) - the system is given an entire texts and a lexicon with an inventory of senses for each entry, and we have
disambiguate every word in the text.

The best WSD algorithm is simple 1-nearest-neighbour algorithm using contextual word embeddings.

There are also feature-based algorithms for WSD - POS-tags, n-grams (3-gram most commonly used), weighted average of
embeddings - passed to SVM classifier

The Lesk algorithm - the oldest and the most powerful knowledge-based WSD metod and useful baseline. Lest is a family of
algorithms that choose the sense whose dictionary gloss or definition shares the most words with the target word's
neighbourhood.

BERT - uses contextual embeddings.

Word Sense Induction - unsupervised approach, we don't use human-defined word senses, instead, the set of senses of each
word is created automatically from the instances of each word in the training set.

## Chapter 19

Skipped for now.

## Chapter 20: Lexicons for Sentiment, Affect and Connotation

Connotation - the aspect of word's meaning that are related to a writer or reader's emotions, sentiment, opinions or
evaluations.

Emotion - (by Scherer) relatively brief episode of response to the evaluation of an external or internal event as being
of major significance. Detecting emotions has the potential to improve a number of language processing tasks - detecting
emotions in reviews, improve conversation systems, depression detection.

Basic emotions proposed by Ekman - surprise, happiness, anger, fear, disgust, sadness

Basic emotions proposed by Plutchik - joy-sadness, anger-fear, trust-disgust, anticipation-surprise.

Most models include 2-3 dimensions:

- valence - the pleasantness of the stimulus
- arousal - the intensity of emotion provoked by stimulus
- dominance - the degree of control exerted by the stimulus

The General Inquirer - the oldest lexicon of 1915 positive words and 2291 negative words. The NRC Valence, Arousal and
Dominance scores 20 000 words (this model assigns valence, arousal and dominance). The NC WordEmotion Association
Lexicon uses Plutchik's basic emotions to describe 14 000 words. There are many more lexicons.

Best-worst scaling - method used in crowdsourcing, annotators are given N items and are asked which item is the best and
which item is the worst.

Detecting peron's personality from their language can be useful for dialog systems. Many theories of human personality
are based around a small number of dimensions:

- extroversion vs introversion - sociable, assertive vs aloof, reserved, shy
- emotional stability vs neurocriticism - calm, unemotional vs insecure, anxious
- agreeableness vs disagreeableness - friendly, cooperative vs antagonistic, fault-finding
- conscientiousness vs unconscientiousness - self-disciplined, organised vs inefficient, careless
- openness to experience - intellectual, insightful s shallow, unimaginative

Connotation frames - express richer relations to affective meaning that a predicate encodes about its arguments -
Country A violated the sovereignty of Country B.

## Chapter 21-22

Skipped for now.

## Chapter 23: Question Answering

Two major paradigms of question answering:

- information retrieval
- knowledge-based

Factoid questions - questions that can be answered with simple facts expressed in short texts, like: Where is the Louvre
Museum located?

Information retrieval. The resulting IR system is often called a search engine. Ad hoc retrieval, user poses a query to
a retrieval system, which then returns an ordered set of documents from some collection.

Basic IR system architecture uses the vector space, queries and documents are mapped to vector, then cosine similarity
is being used to rank potential documents answering the query. This is an example of the bag-of-words model. However, we
don't use raw word counts in IR, instead we use TD-IDF.

TD-IDF - The term frequency tells us how frequent the word is, words that occur more often are likely to be informative
about the document's contest. However, terms that occur across all documents aren't useful. In such case inverse
document frequency comes handy.

Documents scoring - we score document d by the cosine of its vector d with the query vector q. $$ score(q, d) = cos(q *
d) = \frac{q*d}{|q|*|d|} $$ More commonly used version of the score (because queries are usually short):
$$ score(q, d) =\sum \frac{tf-idf(t,d)}{|d|} $$ Slightly more complex version of TF-IDF is called BM25.

In the past it was common to remove high-frequency words from query and the document. The list of such high-frequency
words to be removed is called a stop list (the, a, to, ...). Worth to know, however not commonly used recently because
of much better mechanisms.

Inverted index - given a query term, gives a list of documents that contain the term.

TF-IDF / BM25 have conceptual flaw - they work only if there is exact overlap of words between the query and document -
vocabulary mismatch problem. Solution to this is to use synonymy - instead of using word-count, use embeddings. Modern
methods use encoders like BERT.

The goal of IR-based QA (open domain QA) is to answer a user's question by finding short text segments from the web or
some document collection.

Datasets:

- SQuAD - Stanford Question Answering Dataset - contains passages form Wikipedia and associated questions.

- HotpotQA - is the dataset that was created by showing crowd workers multiple context documents and asked to come up
  with questions that require reasoning about all the documents.

- TriviaQA - questions written by trivia enthusiasts, question-answer-evidence triples
- The Natural Questions - real anonymised queries to the Google search engine, annotators were presented a query along
  with Wikipedia page from the top 5 results
- TyDi QA - questions from diverse languages

Entity linking - the task of associating a mention in text with the representation of some real-word entity in an
ontology (e.g. Wikipedia).

Knowledge-based question answering - idea of answering a question by mapping it to a query over a structured database.

RDF triples - a tuple of 3 elements: subject, predicate and object, e.g. (Ada Lovelace, birth-year, 1815). This can be
used to perform queries: "When was Ada Lovelace born?" - birth-year(Ada Lovelace, ?).

Second kind uses a semantic parser to map the question to a structured program to produce an answer.

Another alternative is to query a pretrained model, forcing model to answer a question solely from information stored in
its parameters.

T5 is an encoder-decoder architecture, in pretraining it learns to fill in masked spans of task by generating missing
spans in the decoder. It is then fine-tuned on QA datasets, given the question without adding any additional context or
passages.

Watson DeepQA - system from IBM that won the Jeopardy - main stages - Question processing, Candidate Answer Generation,
Candidate Answer Scoring, Answer Merging and Confidence Scoring.

MRR - mean reciprocal rank - a common evaluation metric for factoid question answering

## Chapter 24: Chatbots & Dialogue Systems

Properties of Human Conversation:

- turns - a dialogue is a sequence of turns, turn structure have important implications for spoken dialogue - a system
  needs to know when to stop talking and also needs to know when user is done speaking.
- speech acts :
    - constatives - committing the speaker to something's being the case (answering, claiming, denying, confirming,
      disagreeing)
    - directives - attempts by the speaker to get the addressee to do something (advising, asking, forbidding, inviting,
      ordering, requesting)
    - commissives - committing the speaker to some future course of action (promising, planning, vowing, betting,
      opposing)
    - acknowledgments - express the speaker's attitude regarding the hearer with respect to some social action (
      apologising, greeting, thanking, accepting, thanking)
- grounding - this means acknowledging that the hearer has understood the speaker (like ACK in TCP), humans do this all
  the time for example using OK
- sub-dialogues and dialogue structure:
    - questions set up an expectation for an answer, proposals are followed by acceptance / rejection, ...
    - dialogue systems aren't always followed immediately by their second pair part, they can be separated by a side
      sequence (or sub-dialogue) - correction sub-dialogue, clarification question or presequence (Can you make a train
      reservations? Yes I can. Please, do ...)
- initiative - sometimes a conversation is completely controlled by one participant, for humans it is more natural that
  initiative shifts from one person to another
- inference - speaker uses provides some information and another information needs to be derived from that information (
  When in May do you want to travel? I have a meeting from 12th to 15th.)

Because of characteristics of human conversations it is difficult to build dialogue systems that can carry on natural
conversations.

Chatbots - the simplest form of dialogue systems. Chatbots have 3 categories:

- rule based chatbots - for example ELIZA based on psychological research, created in 1966, the most important chatbot.
  Few years later PARRY was created - this chatbot had model of its own mental state (fear, anger, ...) - first known
  system to pass the Turing test (1972) - psychiatrists couldn't distinguish text transcripts of interviews with PARRY
  from transcripts of interviews with real paranoids (!!!)
- corpus-based chatbots - instead of using hand-built rules, mine conversations of human-human conversations. Requires
  enormous data for training. Most methods use retrieval methods (grab response from some document) or generation
  methods (language model or encoder-decoder to generate the response given the dialogue context).
- hybrid of 2 above

Task-based dialogue - a dialogue system has the goal of helping a user solve some task like making an airplane
reservation or buying a product. GUS - influential architecture form 1977 for travel planning.

The control architecture for frame-based (frame - kind of knowledge structure representing the kinds of intentions the
system can extract from user sentences) dialogues systems is used in various modern systems like Siri, Google Assistant
or Alexa. The system's goal is to fill the slots in the frame with the fillers the user intends and the preform relevant
action for the user. To do this system asks questions associated with frames. This is heavily rule-based approach.

Slot-filling - task of domain and intent classification.

If dialogue system misrecognizes or misunderstands an utterance, the user will generally correct the error by repeating
or reformulating the utterance.

Modern systems often ask for confirmation or rejection if input data is correct. The explicit confirmation eliminates
risk of mistakes, but awkward and increases the length of conversation.

System might as clarification questions.

Dialogue systems might be evaluated using different metrics, e.g. engagingness, avoiding repetition, making sense.
Commonly used high-level metric is called acute-eval - annotator looks at two conversations and choose the one in which
the dialogue system participant performed better. Automatic metrics are generally not used for chatbots. However, there
are some attempts to train a Turing-like evaluator classifier to distinguish a human-generated responses and
machine-generated responses.

The study of dialogue systems is closely liked with the field of Human-Computer Interaction. Ethical issues also need to
be taken into consideration when designing system - famous example Microsoft's Tay chatbot (adversarial attacks). ML
models amplify stereotypes and also raise privacy concerns.

## Chapter 25: Phonetics

## Chapter 26: Automatic Speech Recognition and Text-to-speech


================================================
FILE: books/peopleware.md
================================================
[go back](https://github.com/pkardas/learning)

# Peopleware: Productive Projects and Teams

Book by Tom DeMarco and Tim Lister

- [Chapter 1: Somewhere today, a project is failing](#chapter-1-somewhere-today-a-project-is-failing)
- [Chapter 2: Make a cheeseburger, sell a cheeseburger](#chapter-2-make-a-cheeseburger-sell-a-cheeseburger)
- [Chapter 3: Vienna waits for you](#chapter-3-vienna-waits-for-you)
- [Chapter 4: Quality - if time permits](#chapter-4-quality---if-time-permits)
- [Chapter 5: Parkinson's Law revisited](#chapter-5-parkinsons-law-revisited)
- [Chapter 6: Laetrile](#chapter-6-laetrile)
- [Chapter 7: The Furniture Police](#chapter-7-the-furniture-police)
- [Chapter 8: You never get anything done around here between 9 and 5](#chapter-8-you-never-get-anything-done-around-here-between-9-and-5)
- [Chapter 9: Saving money on space](#chapter-9-saving-money-on-space)
- [Chapter 10: Brain Time versus Body Time](#chapter-10-brain-time-versus-body-time)
- [Chapter 11: The Telephone](#chapter-11-the-telephone)
- [Chapter 12: Bring Back the Door](#chapter-12-bring-back-the-door)
- [Chapter 13: Taking Umbrella Steps](#chapter-13-taking-umbrella-steps)
- [Chapter 14: The Hornblower Factor](#chapter-14-the-hornblower-factor)
- [Chapter 15: Let's talk about Leadership](#chapter-15-lets-talk-about-leadership)
- [Chapter 16: Hiring a Juggler](#chapter-16-hiring-a-juggler)
- [Chapter 17: Playing well with others](#chapter-17-playing-well-with-others)
- [Chapter 18: Childhood's end](#chapter-18-childhoods-end)
- [Chapter 19: Happy to be here](#chapter-19-happy-to-be-here)
- [Chapter 20: Human Capital](#chapter-20-human-capital)
- [Chapter 21: The Whole is greater than the sum of the Parts](#chapter-21-the-whole-is-greater-than-the-sum-of-the-parts)
- [Chapter 22: The Black Team](#chapter-22-the-black-team)
- [Chapter 23: Teamicide](#chapter-23-teamicide)
- [Chapter 24: Teamicide Revisited](#chapter-24-teamicide-revisited)
- [Chapter 25: Competition](#chapter-25-competition)
- [Chapter 26: A spaghetti dinner](#chapter-26-a-spaghetti-dinner)
- [Chapter 27: Open Kimono](#chapter-27-open-kimono)
- [Chapter 28: Chemistry for Team Formation](#chapter-28-chemistry-for-team-formation)
- [Chapter 29: The self-healing system](#chapter-29-the-self-healing-system)
- [Chapter 30: Dancing with Risk](#chapter-30-dancing-with-risk)
- [Chapter 31: Meetings, Monologues, and Conversations](#chapter-31-meetings-monologues-and-conversations)
- [Chapter 32: The ultimate management sin is ...](#chapter-32-the-ultimate-management-sin-is-)
- [Chapter 33: E(vil) Mail](#chapter-33-evil-mail)
- [Chapter 34: Making change possible](#chapter-34-making-change-possible)
- [Chapter 35: Organizational learning](#chapter-35-organizational-learning)
- [Chapter 36: The making of community](#chapter-36-the-making-of-community)
- [Chapter 37: Chaos and Order](#chapter-37-chaos-and-order)
- [Chapter 38: Free Electrons](#chapter-38-free-electrons)
- [Chapter 39: Holgar Dansk](#chapter-39)

## Chapter 1: Somewhere today, a project is failing

"Politics" is the most frequently cited cause of failure. "Politics" for people mean: communication problems, staffing
problems, lack of motivation, and high turnover. English language provides a much more precise term - sociology.

> The major problems of our work are not so much technological as sociological in nature.

We tend to focus on the technical rather than the human side of the work, because it is easier to do: new hard drive
installation vs figuring out why somebody is dissatisfied with the company.

Manager should concentrate on sociology, not on technology. Human interactions are complicated and never very crisp and
clean in their effects, but they matter more than any other aspect of work.

## Chapter 2: Make a cheeseburger, sell a cheeseburger

The "make a cheeseburger, sell a cheeseburger" mentality, can be fatal in your development area:

- Make the machine (the human machine) run as smoothly as possible
- Take a hard line about people goofing off on the job
- Treat workers as interchangeable pieces of the machine
- Optimize the steady state
- Standardize procedure, do everything by the book
- Eliminate experimentation - that's what the folks at the headquarters are paid for

To manage thinking workers effectively, managers should take measures nearly opposite to those listed above:

- encourage people to make some errors - ask people what dead-end roads they have been down, and making sure they
  understand that "none" is not the best answer
- you may be able to kick the people to make them active, but not to make them creative, inventive and thoughtful, there
  is nothing more discouraging to any worker than the sense that his own motivation is inadequate and has to be
  "supplemented" by the boss
- every worker is unique, uniqueness is what makes project chemistry vital and effective
- the catalyst is important because the project is always in a state of flux, someone who can help a project to jell is
  worth two people who just do work - managers pay too little attention to how well each team member fits into the
  effort as a whole
- workers need time for brainstorming, investigating new methods, figuring out how to avoid doing some subtasks,
  reading, training, and just goofing off

## Chapter 3: Vienna waits for you

Your people are aware of the one short life that each person is allotted. There has to be something more important than
the silly hob they are working on.

Overtime - for every hour of overtime, there will be more or less an hour of undertime. The trade-off might work to
management's advantage in the short term, but in the long term it will cancel out.

Overtime is like sprinting: it makes some sense for the last 100m of the marathon for those with any energy left, but if
you start sprinting in the first kilometer, you are wasting time.

Workaholism is an illness, but common-cold-like, everyone has a bout of it now and then. If a manager tries to exploit
workaholics, they will eventually lose them.

Realization that one has sacrificed a more important value (family, love, home, youth) for a less important value (work)
is devastating.

Typical things companies do to improve productivity, they make the work less enjoyable and less interesting:

- pressure people to put in more hours
- mechanize the process of product deployment
- compromise the quality of the product
- standardize procedures

Next time you hear someone talking about productivity, listen carefully to hear if the speaker uses the term "employee
turnover". Chances are low.

> People under time pressure don't work better - they just work faster.

In order to work faster, they may have to sacrifice the quality of the product and of their work experience.

## Chapter 4: Quality - if time permits

Man's character is dominated by a small number of basic instincts: survival, self-esteem, reproduction, territory, ...
Even the slightest challenge to one of these built-in values can be upsetting.

We all tend to tie our self-esteem to the quality of the product we produce. Any step you take that may jeopardize the
quality of the product is likely to set the emotions of your staff directly against you.

Workers kept under extreme time pressure will begin to sacrifice quality. They will hate what they are doing, but what
other choice do they have?

> Some of my folks would tinker forever on a task, all in the name of _QUALITY_. But the market doesn't give a damn
> about that much quality - it is screaming for the product to be delivered yesterday and will accept it even in a
> quick-and-dirty state

**WRONG.** The builders' view of quality is very different - their self-esteem is strongly tied to the quality of the
product, they tend to impose quality standards of their own.

_Quality, for beyond that required by the end user, is a means to higher productivity._

In some Japanese companies, the project team has an effective power of veto over delivery of what they believe to be a
not-yet-ready product. No matter that the client would be willing to accept even a substandard product, the team can
insist that delivery wait until its own standards are achieved.

## Chapter 5: Parkinson's Law revisited

Parkinson's Law:

> Work expands to fill the time allocated for it

Parkinson's Law gives managers the strongest possible conviction that the only way to get work done at all is to set an
impossibly optimistic delivery date. Parkinson's Law almost certainly doesn't apply to your people. Treating people as
Parkinsonian workers doesn't work - it can only demean and demotivate them.

Parkinson's Law didn't catch on because it was so true, it caught on because it was funny.

Programmers seem to be a bit more productive after they have done the estimate themselves, compared to cases in which
the manager did it without even consulting them.

According to 1985 Jeffery-Lawrence study - projects on which the boss applied no schedule pressure whatsoever ("Just
wake me up when you are done") had the highest productivity of all.

## Chapter 6: Laetrile

Laetrile - colorless liquid pressed from the apricot pits. Can be used for baking like any extract, in Mexico you can
buy it for $50 to "cure" fatal cancer.

Similarly, lots of managers fall into a trap of technical laetrile that purports to improve productivity.

The 7 false hopes of software management:

1. There is some new trick you have missed that could send productivity soaring
2. Other managers are getting gains of 100-200% or more
3. Technology is moving so swiftly that you are being passed by
4. Changing languages will give you huge gains
5. Because of the backlog, you need to double productivity immediately
6. You automate everything else: isn't it about time you automated away your software development staff?
7. Your people will work better if you put them under a lot of pressure

What is management:

> The manager's function is not to make people work, but to make it possible for people to work.

## Chapter 7: The Furniture Police

The work space given to intellect workers is usually noisy, interruptive, un-private and sterile. SOme are prettier than
others, but not much more functional.

Police-mentality planners design workspaces the way they would design prison - optimized for containment at minimal
cost.

As long as workers are crowded into noisy, sterile, disruptive space, it is not worth improving anything but the
workspace.

## Chapter 8: You never get anything done around here between 9 and 5

To be productive, people may come in early or stay late or even try to escape entirely, by staying home for a day to
get a critical piece of work done. Staying late or arriving early or staying home to work in peace is a damning
indictment of the office environment.

Two people from the same organization tend to perform alike. The best performers are clustering in some organizations
while the worst performers are clustering in others.

Many companies provide developers with a workplace that is too crowded, noisy, and interruptive as to fill their days
with frustration. That alone could explain reduced efficiency as well as tendency for good people to migrate elsewhere.

If you participate in or manage a team of people who need to use their brains during the workday, then the environment
is your business.

## Chapter 9: Saving money on space

It is surprising how little the potential savings are compared to the potential risk. The entire cost of work space for
developer is a small percentage of the salary paid to the developer - 20:1 ratio.

People need the space and quiet in order to perform optimally. Noise is directly proportional to density, so halving the
allotment of space per person can be expected to double the noise.

Saving money on space may be costing you a fortune.

## Chapter 10: Brain Time versus Body Time

In the office: 30% of the time, people are noise sensitive, and the rest of the time, they are noise generators.

Each time you are interrupted, you require an additional immersion period to get back into flow. During this immersion,
you are not really doing work.

People have to be reassured that it is not their fault if they can only manage one or two uninterrupted hours a week -
rather it is the organization's fault for not providing a flow-conductive environment. None of this data can go to the
Payroll Department.

The collection of uninterrupted-hour data can give you some meaningful metric evidence of just good or bad your
environment is.

```
E-Factor = Uninterrupted Hours / Body-Present Hours
```

## Chapter 11: The Telephone

When you are doing think-intensive work like design, interruptions are productivity killers. When you are doing sales
and marketing support, you have to take every single call that comes in. Mixing flow and highly interruptive activities
is a recipe for nothing but frustration. "Leave me alone, I am working" ethic can emerge. People must learn that it is
okay sometimes not to answer the phone, and their managers need to understand that as well.

That is the character of knowledge workers' work: The quality of their time is important, not just its quantity.

## Chapter 12: Bring Back the Door

There are some prevalent symbols of success and failure in creating a sensible workplace. The most obvious symbol of
success is the door. When there are sufficient doors, workers can control noise and interruptibility to suit their
changing needs.

Don't expect the Establishment to roll over and play dead just because you begin to complain. There at least 3
counterarguments to surface almost immediately:

- People don't care about glitzy office space. They are too intelligent for that. And the ones who do care are just
  playing status games.
    - Appearance is stressed far too much in workplace design. What is more relevant is whether the workplace lets you
      work or inhibits you.
- Maybe noise is a problem, but there are cheaper ways to deal with it than mucking around with the physical layout. We
  could just pipe in white noise of Muzak and cover up the disturbance.
    - You can either treat the symptom or treat the cause. Treating the cause means choosing isolation in the form of
      noise barriers - walls and doors - and these cost money. Treating the symptom is much cheaper, when you install
      Muzak or some other form of pink noise, you can save even more money by ignoring the problem.
- Enclosed offices don't make for a vital environment. We want people to interact productively, and that is what they
  want, too. So walls and doors would be a step in the wrong direction.
    - Enclosed offices don't have to be one-person offices. 2, 3, 4-person office makes a lot more sense.

Management, at its best, should make sure there is enough space, enough quiet, and enough ways to ensure privacy so that
people can create their own sensible work space.

## Chapter 13: Taking Umbrella Steps

> People cannot work effectively if their workspace is too enclosed or too exposed. A good workspace strikes the
> balance. You feel more comfortable in a workspace if there is a wall behind you. There should be no blank wall closer
> than 2.5m in front of you (eye relief). You should not be able to hear noises very different from the kind you make,
> from your workspace. Workspaces should allow you to face in different directions.

> Rooms without a view are like prisons for the people who have to stay in them

~ Christopher Alexander, _A Pattern Language_

## Chapter 14: The Hornblower Factor

Hornblower is the ultimate manager - his career advanced from midshipman to admiral through the same blend of
cleverness, daring, political maneuvering and good luck.

Managers are supposed to use their leadership skills to bring out untapped qualities in each subordinate - this is not
realistic. The manager doesn't have enough leverage to make a difference in person's nature. So the people who work for
you through whatever period will be more or less the same at the end as they were at the beginning. If they are not
right for the job from the start, they will never be. Getting the right people in the first place is all important.

Most hiring mistakes result from too much attention to appearance. Evolution has planted in each of us a certain
uneasiness toward people who differ by very much from the norm. The need for uniformity is a sign of insecurity on the
part of management. Strong managers don't care when team members cut their hair or whether they wear ties. Their pride
is only to their staff's accomplishments.

Companies sometimes impose standards of dress, they remove considerable discretion from the individual. The effect is
devastating - people can talk and think of nothing else, all useful work stops dead. The most valuable people begin to
realize that they aren't appreciated for their contributions, but for haircuts and neckties.

The term _unprofessional_ is often used to characterize surprising and threatening behaviour. Anything that upsets the
weak manager is almost by definition unprofessional - long hair on male's head, comfortable shoes, dancing around desk,
laughing, ...

Second thermodynamic law of management: _Entropy is always increasing in the org._ - That's why most elderly
institutions are tighter and a lot of less fun than sprightly young companies.

## Chapter 15: Let's talk about Leadership

One of the worst dreadful "motivational" posters says: "The speed of the leader sets the rate of the pack" ==
work-extraction mechanism, purpose is to increase quantity, not quality - work harder, stay loner, stop goofing off.

Leadership is not about extracting anything from somebody - it is about service, while they sometimes set explicit
directions, their main role is that of a catalyst, not a director.

Rebellious leadership is important in order to innovate - they should supply time to innovate (take a person away from
doing billable work). Nobody knows enough to give permission to the key innovators to do what needs to be done. That's
why leadership as a service almost always operates without official permission.

> If companies were more likely inclined to let leadership arise naturally, they wouldn't need to produce so much hot
> air talking about it.

## Chapter 16: Hiring a Juggler

You are hiring a person to produce, you need to examine a sample of those products to see the quality of work the
candidate does. Otherwise, the interview is just a talk.

You can show off your portfolio as part of each interview.

Aptitude tests are almost always oriented toward the tasks the person will perform immediately after being hired. They
test if person is likely to perform immediately after being hired. Aptitude tests are left-brain oriented. The aptitude
test may give you people who perform better in the short term, but are less likely to succeed later on. Use them, but
not for hiring.

Hiring process needs to focus on at least some sociological and human communication traits. Ask a candidate to prepare
10-15 minute presentation on some aspect of past work (technology, management, project) - you will be able to see
candidate's communication skills.

## Chapter 17: Playing well with others

The capacity of a team to absorb newness has its limits. Team jell takes time, and, during much of that time, the
composition of the team can't be changing. If you need to use a reactive strategy of labor, your team will probably
never jell. In fact, the workspace you manage almost certainly will not be a team at all.

## Chapter 18: Childhood's end

For youngest employees, computers, smartphones, the Web, programming, hacking, social networking, and blogging are
environment, not technology.

Young people divide their attention while their older colleagues tend to focus on one or possibly two tasks at once.
Continuous partial attention is the opposite of flow. There is a difference between spending 2% of time on Facebook in a
single block of time vs spending 2% of attention all day on Facebook.

Articulating requirements to young workers is going to be essential to give them a chance to fit in.

## Chapter 19: Happy to be here

Typical turnover figures are in the range of 80-33%/year => average employee longevity averages between 15 and 36
months. The average person leaves after a little more than two years. It costs 1.5-2 months' salary to hire a new
employee (agency or in-house HR).

A new employee is quite useless on Day Zero (or even less than useless), after few months the new person is doing some
useful work. Within 5 months, he/she is at full working capacity.

The total cost of replacing each person is the equivalent of 4.5-5 months of employee cost or about 20% of the cost of
keeping that employee for the full 2 years on the job. And the is only the visible cost of turnover.

In companies with high turnover, people ten toward a destructively short-term viewpoint, because they know they just
aren't going to be there very long. In an organization with high turnover, nobody is willing to take the long view.

If people only stick around for a year or two, the only way to conserve the best people is to promote them quickly. From
the corporate perspective, late promotion is a sign of health.

Reasons account for most departures:

- A just-passing-through mentality - no feelings of long-term involvement in the job
- A feeling of disposability - workers as interchangeable parts (since turnover is really high, nobody is indispensable)
- A sense that loyalty would be ludicrous - who would be loyal to an org that views its people as parts

People leave quickly -> no spending money on training -> no investment in the individual -> individual thinks of moving
on.

The best companies are consciously striving to be best. People tend to stay at such companies because there is a
widespread sense that you are expected to stay. A common feature of companies with the lowest turnover is widespread
retraining (you are forever bumping into managers and officers who started out as secretaries, payroll clerks, or in the
mailroom).

## Chapter 20: Human Capital

Companies that manage their investment sensibly will prosper in the long run. Companies of knowledge workers have to
realize that it is their in human capital that matters most. The good ones already do.

## Chapter 21: The Whole is greater than the sum of the Parts

Jelled Team - a group of people so strongly knit that the whole is greater than the sum of parts. The production of such
team is greater than that of the same people working in unjelled team. Once a team begins to jell, the probability of
success goes up dramatically. They don't need to be managed in the traditional sense, and they certainly don't need to
be motivated. They have got momentum.

Believing that workers will automatically accept organizational goals is the sign of naive managerial optimism.

> The purpose of a team is not goal attainment but goal alignment

Signs of jelled team:

- low turnover
- strong sense of identity (colourful name)
- sense or eliteness (part of something unique, this attitude might be annoying to people outside the group)
- joint ownership

## Chapter 22: The Black Team

The story about the legendary, jelled team - The Black Team.

## Chapter 23: Teamicide

You can't control jelling - the process is too fragile to be controlled. Exact steps are hard to describe, the opposite
is easier. Teamicide techniques:

- Defensive management
    - let your people make mistakes, do not send a message that making errors is forbidden
    - "My people are too dumb to build systems without me"
    - people who feel untrusted have little inclination to bond together into a cooperative team
- Bureaucracy
    - mindless paper pushing hurts team formation
- Physical separation
    - group members may grow stronger bonds to non-group neighbours, just because they see more of them
    - putting people together gives them opportunity for the casual interaction that is so necessary for team formation
- Fragmentation of people's time
    - bad for team formation and efficiency
    - no one can be part of multiple jelled teams
- Quality reduction of the product
    - typical scenario: deliver a product in less time = lower quality
    - self-esteem and enjoyment are undermined by the necessity of building a product of clearly lower quality than what
      they are capable of
- Phony deadlines
    - the date mentioned is impossible to meet, and everyone knows it
    - team will not jell in such environment
- Clique control
    - there are no jelled teams at managerial level
    - as you go higher and higher in the organization chart, the concept of jelled teams reduces further into oblivion

## Chapter 24: Teamicide Revisited

2 additional kinds of teamicide:

- motivational posters
    - are phony enough to make most people's skin crawl
- overtime
    - error, burnout, accelerated turnover, and compensatory undertime
    - disrupts team

## Chapter 25: Competition

Coaching is an important factor in successful team interaction. It provides coordination, personal growth and feels
good. We feel a huge debt to those who have coached us in the past. The act of coaching cannot take place if people
don't feel safe. In competitive atmosphere, you would be crazy to let anyone see you sitting down to be coached. You
would be similarly crazy to coach someone else, as that person may eventually yse your assistance to pass you by.

Anything the manager does to increase the competition within a team has to be viewed as teamidical.

## Chapter 26: A spaghetti dinner

Good managers provide frequent easy opportunities for the team to succeed together. The opportunities may be tiny pilot
subprojects, or demonstrations, os simulations, anything that gets the team quickly into the habit of succeeding
together.

## Chapter 27: Open Kimono

The Open Kimono attitude is the opposite of defensive management. You take no steps to defend yourself from the people
you have put into positions of trust. A person you can't trust with any autonomy is of no use to you.

If you have got decent people under you, there is probably nothing you can do to improve their chances of success more
dramatically than to get yourself out their hair occasionally. Visual supervision is for prisoners.

## Chapter 28: Chemistry for Team Formation

Some organizations are famous for their consistent good luck in getting well-knit teams to happen. It isn't luck - it's
chemistry. These organizations are just plain healthy.

Signs of a health organization:

- people at ease
- people having a good time
- people enjoying interactions with their peers
- no defensiveness
- the work is a joint product
- everybody is proud of the quality
- managers devote their energy to build and maintain healthy chemistry

Chemistry-building strategy:

- Make a cult of quality - cult of quality is the strongest catalyst for team formation
- Provide lots of satisfying closure - people need reassurance from time to time that they are headed in the right
  direction
- Build a sense of eliteness - people require a sense of uniqueness to be at peace with themselves, and they need to be
  at peace with themselves to let the jelling process begin
- Allow and encourage heterogeneity - diverse teams are more fun to work in
- Preserve and protect successful teams
- Provide strategic but not tactical direction

Managers are usually not part of the teams that they manage. On the best teams, different individuals provide occasional
leadership, taking charge in areas where they have particular strengths.

## Chapter 29: The self-healing system

A Methodology - a general theory of how a whole class of thought-intensive work ought to be conducted. _The people who
carry write the Methodology are smart. The people who carry it out can be dumb._

There is a big difference between Methodology and methodology - methodology is a basic approach one takes to get a job
done. It doesn't reside in a fat book, but rather inside the heads of people carrying out the work. Big M Methodology is
an attempt to centralize thinking. All meaningful decisions are made by the Methodology builders, not by the staff
assigned to do the work.

> Voluminous documentation is part of the problem, not part of the solution. People should focus on getting things done,
> instead of building documents.

People might actually do exactly what the Methodology says, and the work would grind nearly to a halt.

## Chapter 30: Dancing with Risk

Our main problems are more likely to be sociological than technological in nature.

Projects that have real value but little or no risk were all done ages ago. The ones that matter today are laden with
risk.

Risk management: it is not to make the risk go away, but to enable sensible mitigation - planned and provisioned well
ahead of time.

## Chapter 31: Meetings, Monologues, and Conversations

Some orgs are addicted to meetings, at the other extreme, some orgs refuse to use the "M" word at all.

As orgs age, meeting time increases until there is time for nothing else. Even short stand-ups can be a drag on an
organization's effectiveness is they lack purpose and focus.

In order to cure meeting-addicted org, start small and eliminate most ceremonial meetings in your area, spend time in
one-on-one conversations, limit attendance at working meetings. Encourage Open-Space networking to give people the
chance to have unstructured interaction.

## Chapter 32: The ultimate management sin is ...

wasting people's time.

WHen participants of a meeting take turns interacting with one key figure, the expected rationale for assembling the
whole group is missing - the boss might as well have interacted separately with each of the subordinates.

Fragmented time is almost certain teamicidal, but also is guaranteed to waste the individual's time.

The human capital invested in your workforce also represents a ton of money.

## Chapter 33: E(vil) Mail

When you over-coordinate the people who work for you, they are too likely to under-coordinate their own efforts. But
self-coordination and mutual coordination amon peers is the hallmark of graceful teamwork.

Imagine how it would work if every pass could only happen if and when the coach gave the signal from the sideline. A
decent coach understands that his/her job is to help people learn to self-coordinate.

> Life is short. If you need to know everything in order to do anything, you are not going to get much done.

## Chapter 34: Making change possible

> People hate change, and that is because people hate change. People really hate change, they really, really do.

When we start out to change, it is never certain that we will succeed. The uncertainty is more compelling than the
possible gain.

> The fundamental response to change is not logical, but emotional

**You can never improve if you can't change at all.**

Change involves at least 4 stages: Old Status Quo -> Chaos -> Practice and Integration -> News Status Quo. Change
happens upon introduction of a foreign element: a catalyst for a change. Without a catalyst, there is no recognition of
the desirability of change.

Change won't get even started unless people fell safe - people feel safe when they know they will not be demeaned for
proposing a change.

Change has only a chance of succeeding if failure is also okay.

## Chapter 35: Organizational learning

Learning is a critical improvement mechanism - non-learners can not expect to prosper for very long without learning.

Experience gets turned into learning when an organization alters itself to take account of what experience has shown.

> Learning is limited by an organization's ability to keep its people

When turnover is high, learning is unlikely to stick or can't take place at all. In such an organization, attempts to
change skills or to improve redesigned procedures are an exercises in futility.

## Chapter 36: The making of community

What great managers do best? The making of community. A need for community is something that is built right into the
human firmware.

Community doesn't just happen on the job. It has to be made. The people who make it are the unsung heroes of our work
experience.

An org that succeeds in building a satisfying community tends to keep its people. No one wants to leave. The investment
made in human capital is retained, and upper management is willing to invest more. When the company invest more in its
people, the people perform better and feel better about themselves and about their company.

There is no formula to build community in the workplace. Some experimenting is needed.

## Chapter 37: Chaos and Order

There is something about human nature that makes us the implacable enemies of chaos. People who were attracted to the
lack of order, feel nostalgic fondness foe the days when everything wasn't so awfully mechanical.

Some lost disorder can be reintroduced to breath some energy into the work - a policy of constructive reintroduction of
small amounts of disorder:

1. Pilot projects
    - set the fat book of standards aside and try some new unproved technique
    - people get the boost in energy when they are doing something new and different
2. War games
    - war games help you evaluate your relative strengths and weaknesses and help the organization to observe its global
      strengths and weaknesses
    - a bug fuss should be made over any and all accomplishments
3. Brainstorming
    - interactive session, targeted on creative insight
    - focus on quantity of ideas, not quality, keep proceedings loose, even silly, discourage negative comments
4. Provocative training experiences
5. Training, trips, conferences, celebrations, and retreats
    - everybody relishes a chance to get out of the office
    - when a team is forming, it makes a good business sense to fight for travel money to get team members out of office
      together
    - adventure adds small amounts of constructive disorder

## Chapter 38: Free Electrons

Free electrons - workers having a strong role in choosing their own orbits. Positions with loosely stated
responsibilities so that the individual has a strong say in defining the work. Companies profit from such people.

Some individuals need to be left alone to work out some matters, or at least free to seek guidance if and when and from
whomever he or she chooses. The mark of the best manager is an ability to single out the few key spirits who have the
proper mix of perspective and maturity and then turn them loose.

## Chapter 39: Holgar Dansk

A single person acting alone is not likely to effect any meaningful change. But there is no need to act alone. When
something is terribly out of kilter, it takes very little to raise people's consciousness of it. Then it is no longer
you. It is everyone.

It may be small voice saying: "This is unacceptable" -- people know it is true. Once it has been said out loud, they
can't ignore it any longer.

Sociology matters more than technology or even money. It is supposed to be productive, satisfying fun to work. If it
isn't, there is nothing else worth concerning on. Choose your terrain carefully, assemble your facts, and speak up. You
can make a difference.


================================================
FILE: books/pragmatic-programmer.md
================================================
[go back](https://github.com/pkardas/learning)

# The Pragmatic Programmer: journey to mastery, 20th Anniversary Edition

Book by David Thomas and Andrew Hunt

- [Chapter 1: A Pragmatic Philosophy](#chapter-1-a-pragmatic-philosophy)
- [Chapter 2: A Pragmatic Approach](#chapter-2-a-pragmatic-approach)
- [Chapter 3: The Basic Tools](#chapter-3-the-basic-tools)
- [Chapter 4: Pragmatic Paranoia](#chapter-4-pragmatic-paranoia)
- [Chapter 5: Bend, or Break](#chapter-5-bend-or-break)
- [Chapter 6: Concurrency](#chapter-6-concurrency)
- [Chapter 7: While you are coding](#chapter-7-while-you-are-coding)
- [Chapter 8: Before the Project](#chapter-8-before-the-project)
- [Chapter 9: Pragmatic Projects](#chapter-9-pragmatic-projects)
- [Postface](#postface)

## Chapter 1: A Pragmatic Philosophy

**You Have Agency.** It is your life. You own it. You run it. You create it. This industry gives you a remarkable set of
opportunities. Be proactive, and take them.

The team needs to be able to trust you and rely on you, and you need to be comfortable relying on each of them as well.
In a healthy environment based in trust, you can safely speak your mind, present your ideas, and rely on your team
members who can in turn rely on you.

**Provide options, don't make lame excuses.** Instead of excuses provide options. Don't say it can't be done: explain
what can be done to salvage the solution. When you find yourself saying "_I don't know_" be sure to follow it up with
"_--but I'll find out_". It is a great way to admit what you don't know, but then take responsibility like a pro.

_Entropy_ - a term from physics that refers to the amount of "disorder" in a system. The entropy in the universe tends
toward a maximum. When disorder increases in software, we call it "software rot". Some folks might call it by the more
optimistic term "_technical debt_" (with the implied notion that they will pay it back someday, they probably will not).

**Don't live with broken windows.** Bad designs, wrong decisions, or poor code. Fix each one as soon as it is
discovered. If there is no sufficient time to fix it properly, board it up. Take some action to prevent further damage
and to show that you are on top of the situation. Don't let entropy win. If you find yourself working on a project with
quite a few broken windows, it is all to easy to slip into the mindset of "_All the rest of this code is crap, I will
just follow suit._". By the same token, if you find yourself on a project where the code is beautiful, well-designed,
and elegant - you will likely take extra special care not to mess it up.

Idea: Help strengthen your team by surveying your project neighbourhood. Choose two or three broken windows and discuss
with your colleagues what the problems are and what could be done to fix them.

**Be a catalyst for change.** You may be in a situation where you know exactly what needs doing and how to do it. People
will form committees, budgets will need approval, and things will get complicated. Work out what can you reasonably ask
for. Develop it well. Once you have got it, show people, and let them marvel. Sit back and wait for them to start asking
you to add the functionality you originally wanted. Show them a glimpse of the future, and you will get them to rally
around.

**Remember the Big Picture.** Constantly review what is happening around you, not just what you personally are doing.
Projects slowly and inexorably get totally out of hand. Most software disasters start out too small to notice, and most
projects overruns happen a day at a time. It is often the accumulation of small things that breaks morale and teams.

Situational awareness (is there anything out of context, anything that looks like it doesn't belong), a technique
practiced by folks ranging from Boy and Girl Scouts and Navy SEALs. Get in a habit of really looking and noticing your
surroundings.

**Make quality a requirements issue.** Involve your users in determining the project's real quality requirements.

> An investment in knowledge always pays the best interest ~ Benjamin Franklin

**Invest regularly in your knowledge portfolio.** Your knowledge and experience are your most important day-to-day
professional assets. Knowledge may become out of date, as the value of your knowledge declines, so does your value to
your company or client.

1. Invest regularly - invest in knowledge regularly, even small amounts.
2. Diversify - the more different things you know, the more valuable you are.
3. Manage risk - don't put all your technical eggs in one basket.
4. Buy low, sell high - learning an emerging technology before it becomes popular can be just as hard as finding an
   undervalues stock, but the payoff can be just as rewarding.
5. Review and rebalance - that hot technology you started investing last month might be stone-cold by now.

Goals:

- learn at least one programming language per year - by learning several approaches, you can broaden your thinking
- read a technical book each month
- read nontechnical books too - don't forget the human side of the equation, as that requires an entirely different
  skill set
- take classes - look for interesting courses at local or online college
- participate in local user groups and meetups - isolation can be deadly to your career, find out what people are
  working on outside of your company
- experiment with different environments - try Linux, Windows, Mac, a new IDE, ...
- stay current - read news and posts online on technology different from that of your current project

**Critically analyze what you read and hear.** You need to ensure that the knowledge in your portfolio is accurate and
unswayed by either vendor or media hype.

_Critical Thinking Tutorial:_

1. Ask the "Five Whys" - ask why at least 5 times. Ask a question and get an answer. Dig deeper by asking "why".
2. Who does this benefit? - "follow the money" can be a very helpful path to analyze. The benefits to someone else or
   another organization may be aligned with your own, or not.
3. What is the context? - everything occurs in its own context. Good for someone, doesn't mean it is good for you.
4. Why is this a problem? - is there an underlying model? How does the underlying model work?

**English is just another programming language.** Having the best ideas, the finest code, or the most pragmatic thinking
is ultimately sterile unless you can communicate with other people.

**It is both what you say and the way you say it.** There is no point in having great ideas if you don't communicate
them effectively. The more effective communication, the more influential you become.

**Build documentation in, don't bolt it on.** It is easy to produce good-looking documentation from the comments in
source code, and we recommend adding comments to modules and exported functions to give other developers a leg up when
they come to use it. Restrict your non-API commenting to discussing why something is done, its purpose and its goal. The
code already shows how it is done, so commenting on this is redundant - and is a violation of the DRY principle.

## Chapter 2: A Pragmatic Approach

**Good design is easier to change than bad design.** A thing is well-designed if it adapts to the people who use it.
Code should be Easy To Change. That's why SRP, decoupling, naming, ... are important, because of ETC.

**DRY - Don't Repeat Yourself.** Every piece of knowledge must have a single, unambiguous, authoritative representation
within a system.

Most people maintenance begins when an application is released, that maintenance means fixing bugs and enhancing
features. This is wrong. Programmers are constantly in maintenance mode. Maintenance is not a discrete activity, but a
routine part of the entire development process. When we perform maintenance, we have to find and change the
representation of things. It is easy to duplicate knowledge in the specifications, processes, and programs we develop,
and when we do so, we invite a maintenance nightmare.

DRY is about the duplication of knowledge, of intent. It is about expressing the same thing in two different places,
possibly in two totally different ways.

Code may be the same, but the knowledge they represent may be different, and this is not a duplication, that is a
coincidence.

> All services offered by a module should be visible through a uniform notation, which does not betray whether they are
> implemented through storage of through computation.

**Make it easy to reuse.** You should foster an environment where it is easier to find and reuse existing stuff than to
write it yourself. If it isn't easy, people will not do it. And if you fail to reuse, you risk duplicating knowledge.

Two or more things are orthogonal if changes in one do not affect any of the others. In a well-designed system, the
database code will be orthogonal to the user interface - you can change the interface without affecting the database,
and swap databases without changing the interface. Non-orthogonal systems are more complex to change and control.

**Eliminate effects between unrelated things.** We want to design components that are self-contained - independent and
with a single, well-defined purpose.

When components are well isolated from one another, you know that you can change one without having to worry about the
rest. As long as you don't change that component's external interfaces, you can be confident that you will not cause
problems that ripple through the entire system.

Modular, component-based, layered systems -> these are orthogonal systems.

- Keep your code decoupled - write shy modules, modules that don't reveal anything unnecessary to other modules and that
  don't rely on other modules' implementations. If you need to change an object's state, get the other object to do it
  for you.
- Avoid global data - in general, your code is easier to understand and maintain if you explicitly pass any required
  context into your modules.
- Avoid similar functions - duplicate code is a symptom of structural problems.

**There are no final decisions.** The mistake lies in assuming that any decision is cast in stone - and not in preparing
for the contingencies that might arise. Think of decisions as being written in the sand at the beach. A big wave can
come along and wipe them out at any time.

**Forgo following fads.** Choose architecture based on fundamentals, not fashion. No one knows what the future may hold.

**Use tracer bullets to find the target.** Look for important requirements, the one that define the system. Look for
areas where you have doubts, and where you see the biggest risks. Then prioritize your development so that these are the
first areas you code. Benefits of the tracer code:

- Users get to see something working early.
- Developers build a structure to work in.
- You have an integration platform.
- You have something to demonstrate.
- You have a better feel for progress.

Prototyping generates disposable code. Tracer code is lean but complete, and forms part of the skeleton of the final
system. Think of prototyping as the reconnaissance and intelligence gathering that takes place before a single tracer
bullet is fired.

Prototypes are designed to answer just a few questions, so they are much cheaper and faster to develop than applications
that go into production. You can prototype: architecture, new functionality in an existing system, structure or contents
of external data, third-party tools or components, performance issues, user interface design.

**Prototype to learn.** Prototyping is a learning experience. Its value lies not in the code produced, but in the lesson
learned. That's really the point of prototyping. It is easy to become mislead by the apparent completeness of a
demonstrated prototype, and project sponsors or management may insist on deploying the prototype. Remind them that you
can build a great prototype of a new car out of balsa wood and duct tape, but you wouldn't try to drive it in rush-hour
traffic.

If you feel there is a strong possibility in your environment or culture that the purpose of prototype code may be
misinterpreted, you may be better off with the tracer bullet approach.

**Program close to the problem domain.** Try to write code using the vocabulary of the application domain.

**Estimate to avoid surprises.** Estimate before you start. You will spot potential problems up front.

Basic estimating trick: ask someone who's already done it. Before you get too committed to model building, cast around
for someone who has been in a similar situation in the past. See how their problems got resolved.

Model building can be both creative and useful in the long term. Often, the process of building the model leads to
discoveries of underlying patterns and processes that weren't apparent on the surface. Building the model introduces
inaccuracies into the estimating process.

_PERT - Program Evaluation Review Technique_ - an estimating methodology, every PERT task has an optimistic, a most
likely, and a pessimistic estimate. Using a range of values like this is a great way to avoid one of the most common
causes of estimation error - padding a number because you are unsure.

**Iterate the schedule with the code.** Make the management understand that the team, their productivity, and the
environment will determine the schedule. By formalizing this, and refining the schedule as part of each iteration, you
will be giving them the most accurate scheduling estimates you can.

## Chapter 3: The Basic Tools

Tools amplify your talent. The better your tools, and the better you know how to use them, the more productive you can
be.

**Keep knowledge in plain text.** Text will not become obsolete. Make plain text understandable to humans.

**Always use version control.** Make sure that everything is under version control: documentation, phone number lists,
memos to vendors, makefiles, build and release procedures - everything.

**Fix the problem, not the blame.** It doesn't really matter whether the bug is your fault or someone else's.

**Don't panic.** The first rule of debugging. Don't waste a single neutron on the train of thought that begins "but that
can't happen" because clearly it can, and has.

**Failing test before fixing code.** We want a bug that can be reproduced with a single command. It is a lot harder to
fix a bug if you have to go through 15 steps to get to the point where the bug shows up.

**Read the damn error message.** Most exceptions tell both what failed and where it failed.

Binary search can be used for finding releases that caused the error, determining minimal subset of values that cause
program to fail.

**Select isn't broken.** It is possible that a bug exists in the OS, the compiler, or a third-party product - but this
should not be your first thought. It is much more likely that the bug exists in the application code under development.

**Don't assume it - prove it.** Don't gloss over a routine or piece of code involved in the bug because you "know" it
works. Prove it. Prove it in this context, with this data, with these boundary conditions.

## Chapter 4: Pragmatic Paranoia

**You can't write perfect software.** Perfect software doesn't exist. Pragmatic Programmers don't trust themselves.
Knowing that no one writes perfect code, including themselves. Pragmatic Programmers build in defenses against their own
mistakes.

**Design with contracts.** Be strict in what you will accept before you begin, and promise as little as possible in
return. Remember, if your contract indicates that you will accept anything and promise the world in return, you have got
a lot of code to write.

**Crash early.** Don't catch or rescue all exceptions, re-raising them after writing some kind of message. Do not
eclipse code by the error handling. Without exception handling code is less coupled. Crashing often is the best thing
you can do. The Erland and Elixir languages embrace this philosophy.

When your code discovers that something that was supposed to be impossible just happened, your program is no longer
viable. Anything it does from this point forward becomes suspect, so terminate it as soon as possible.

**Use assertions to prevent the impossible.** Whenever you find yourself thinking "but of course that could never
happen" add code to check it. Assertions are also useful checks on an algorithm's operation. Assertions check for things
that should never happen. LEAVE ASSERTIONS TURNED ON.

**Finish what you start.** It simply means that the function or object that allocates a resource should be responsible
for deallocating it.

**Take small steps - always.** Always take small, deliberate steps, checking for feedback and adjusting before
proceeding. Consider that the rate of feedback is your speed limit. You never take on a step or a task that is "too big"
. The more you have to predict what the future will look like, the more risk you incur that you will be wrong. Instead
of wasting effort designing for an uncertain future, you can always fall back on designing your code to be replaceable.

Making code replaceable will also help with cohesion, coupling, and DRY, leading to a better design overall.

## Chapter 5: Bend, or Break

Decoupling shows how to keep separate concepts separate, decreasing coupling. Coupling is the enemy of change, because
it links together things that must change in parallel.

When you are designing bridges, you want them to hold their shape - you need them to be rigid. But when you are
designing software that you will want to change, you want exactly the opposite - you want it to be flexible.

**Decoupled code is easier to change.**

**Tell, don't ask.** (The Law of Demeter) You shouldn't make decisions based on the internal state of an object abd then
update the object. Doing so totally destroys the benefits of encapsulation, and, in doing so, spreads the knowledge of
the implementation thought the code.

A method defined in a class C should only call:

- Other instance methods
- Its parameters
- Methods in objects it creates
- Global variables

**Don't chain method calls.** (Something simpler than the Law of Demeter.) Try not to have more than one "." when you
access something. The rule doesn't apply if the things you are changing are really unlikely to change (e.g. libraries
that come with the language).

**Avoid global data.** It is like adding extra parameter to every method.

**If it is important enough to be global, wrap it in an API.** Any mutable external resource is global data (database,
file system, service API, ...). Always wrap these resources behind code that you control.

Keeping your code shy - having it deal with things it directly knows about, will help keep you applications decoupled,
and that will make them more amenable to change.

Publish/Subscribe generalizes the observer pattern, at the same time solving the problems of coupling and performance.

Streams let us treat events as if they were a collection of data. It's as if we had a list of events, which got longer
when new events arrive. We can treat streams like any other collection (manipulate, filter, combine).

Baseline for reactive event handling: reactivex.io

**Programming is about code, but programs are about data.** Start designing using transformations (unix-like pipelines).
Using pipelines means that you are automatically thinking in terms of transforming data.

**Don't hoard state, pass it around.** Functions greatly reduce coupling. A function can be used (and reused) anywhere
its parameters match the output of some other function. There is still a degree of coupling, but it is more manageable
than the OO-style of command and control.

Thinking of code as a series of nested transformations can be a liberating approach to programming. It takes a while to
get used to, but once you have developed the habit you will find your code becomes cleaner, your functions shorter, and
your designs flatter.

**Don't pay inheritance tax.** Inheritance is coupling. Not only is the child class coupled to the parent, the parent's
parent, and so on, but the code that uses the child is also coupled to al the ancestors.

Alternatives to inheritance:

- interfaces and protocols - these declarations create no code. We can use them to create types, and any class that
  implements the appropriate interface will be compatible with that type.
- delegation - has-a is better than is-a. If parent has 20 methods, and the subclass wants to make use of just 2 of
  them, its objects will still have the other 18 just lying around and callable.
- mixins and traits - use them to share functionality. The basic ide is simple, we want to be able to extend classes and
  objects with new functionality without using inheritance. So we create a set of these functions, give that set a name,
  and then somehow extend a class with them.

**Prefer interfaces to express polymorphism.** Interfaces and protocols give us polymorphism without inheritance.

**Parametrize your app using external configuration.** When code relies on values that may change after the application
has gone live, keep those values external to the app. Keep the environment and customer-specific values outside the
app (credentials, logging levels, IP addresses, validation parameters, external rates - e.g. tax rates, formatting
details, license keys).

While static configuration is common, we currently favor a different approach. We still want configuration data kept
external to the application, but rather than in a flat file ro database, we would like to see it stored behind a service
API.

## Chapter 6: Concurrency

Concurrency - when the execution of two or more pieces of code act as if they run at the same time (context switching).
Parallelism is when they do run at the same time (multiple cores).

Temporal coupling - coupling in time. Temporal coupling happens when your code imposes a sequence on things that is not
required to solve the problem.

**Analyze workflow to improve concurrency.** Find out what can happen at the same time, and what must happen in a strict
order. One way to do this is to capture the workflow using a notation such as the activity diagram.

**Shared state is incorrect state.** A semaphore is a thing that only one person can own at a time. You can create a
semaphore and the use it to control some other resource.

**Random failures are often concurrency issues.** Whenever tow or more instances or your code can access some resource
at the same time, you are looking at a potential problem.

**Use actors for concurrency without shared state.** Actors execute concurrently, asynchronously and share nothing. An
actor is an independent virtual processor with its own local state. Each actor has a mailbox. When a message appears in
the mailbox and the actor is idle, it kicks into life and processes the message. When it finishes processing, it
processes another message in the mailbox, or goes back to sleep.

**Use blackboards to coordinate workflow.** Order of data arrival is irrelevant - when a fact is posted it can trigger
the appropriate rules. The output of any rules can post to the backboard and cause the triggering of yet more applicable
rules.

## Chapter 7: While You Are Coding

**Listen to your inner lizard.** When it feels like your code is pushing back, it is really your subconscious trying to
tell you something is wrong.

Learning to listen to your gut feeling when coding is an important skill to foster. But it applies to the bigger picture
as well. Sometimes a design just feels wrong, or some requirements makes you feel uneasy. Stop and analyze these
feelings. If you are in a supportive environment, express them out loud. Explore them.

**Don't program by coincidence.** Don't rely on luck and accidental success.

- Always be aware of what you are doing.
- Can you explain the code, in detail, to a more junior programmer? If not, perhaps you are relying on coincidences.
- Don't code in dark. If you are not sure why it works, you will not know why it fails.
- Proceed from a plan.
- Don't depend on assumptions. If you can't tell something is reliable, assume the worst.
- Document your assumptions.
- Don't just test your code, but test your assumptions as well. Don't guess, try it. Write an assertion to test your
  assumptions. If your assertion is right, you have improved the documentation in your code. If you discover your
  assumption is wrong, then count yourself lucky.
- _Don't be a slave to history. Don't let existing code dictate future code. All code can be replaced if it is no longer
  appropriate._

**Estimate the order of your algorithms.** Estimate the resources that algorithms use - time, processor, memory, and so
on. When you write anything containing loops or recursive calls, check the runtime and memory requirements. When a more
detailed analysis is needed - use Big-O notation.

Think of the _O_ as meaning _on the order of_. Big-O is never going to give you actual numbers for time of memory of
whatever - it simply tells you how these values will change as the input changes.

Common sense estimation:

- simple loops - _O(n)_
- nested loops - _O(n^2)_
- binary chop - _O(log n)_
- divide and conquer - _O(n log n)_
- combinatorics - running time might run out of time, _O(n!)_

**Test your estimates.** The fastest one is not always the best for the job. Given a small input set, a straightforward
insertion sort will perform just as well as a quicksort, and will take less time to write and debug.

Be wary of _premature optimisation_. It is always a good idea to make sure an algorithm really is a bottleneck before
investing your precious time trying to improve it.

Refactoring: As a program evolves, it will become necessary to rethink earlier decisions and rework portions of code.
This process is perfectly natural. Code needs to evolve - it is not a static thing.

The most common metaphor for software development is building construction. Rather than a construction, software is more
like a gardening - it is more organic than concrete.

Refactoring is not intended to be a special, high-ceremony, once-in-a-while activity. Refactoring is a day-to-day
activity, taking low risk small steps. It is a targeted, precise approach to help keep the code easy to change. You need
good, automated unit testing that validates the behavior of the code.

Any number of things may cause code to qualify for refactoring:

- duplication
- non-orthogonal design - change to one thing affects the other
- outdated knowledge
- usage - some features may be more important than originally thought
- performance
- the test pass - when you have added a small amount of code, and that extra test passes, you have a great opportunity
  to dive in and tidy up what you just wrote.

**Refactor early, refactor often.** Time pressure is often used as an excuse for not refactoring. Fail to refactor now,
and there will be a far greater time investment to fix the problem down the road.

**Explain this principle to others by using a medical analogy: think of the code that needs refactoring as "a growth".
Removing it requires invasive surgery. You can go in now, and take it out while it is still small. Or, you could wait
while it grows and spreads - but removing it then will be both more expensive and more dangerous. Wait even longer, and
you may lose the patient entirely.**

How to refactor without doing more harm than good:

1. Don't try to refactor and add functionality at the same time.
2. Make sure you have good tests before you begin refactoring. Run the tests as often as possible.
3. Take short, deliberate steps. Refactoring often involves making many localized changes that result in a larger-scale
   change.

Don't live with broken windows.

**Testing is not about finding bugs.** Major benefits of testing happen when you think about and write the tests, not
when you run them.

**A test is the first user of your code.** Testing is vital feedback that guides your coding. _A function or method that
is tightly coupled to other code is hard to test, because you have to set up all that environment._ Making your stuff
testable also reduces its coupling.

**Build end-to-end, not top-down or bottom up.** Build small pieces of end-to-end functionality, learning about the
problem as you go.

Like our hardware colleagues, we need to build testability into the software from the very beginning, and test each
piece thoroughly before trying to wire them together. Chip-level testing for hardware is roughly equivalent to unit
testing in software. Write test cases that ensure a given unit honors its contract. We want to test that the module
delivers the functionality it promises.

**Design to test.** Start thinking about testing before you write a line of code.

Approaches:

- Test first - TDD - probably the best choice in most circumstances.
- Test during - a good fallback when TDD is not useful or convenient.
- Test never - the worst choice.

**Test your software, or your users will.** Make no mistake, testing is part of programming. It is not something left to
other departments or staff. Testing, design, coding - it is all programming.

**Use property-based tests to validate your assumptions.** Property-based tests will try things you never thought to
try, and exercise your code in ways it wasn't meant to be used. For python use _Hypothesis_ framework. Hypothesis gives
you a minilanguage for describing the data it should generate.

**Keep it simple and minimize attack surfaces.** Bear in mind these security principles:

1. Minimize Attack Surface Area
    1. Code complexity makes the attack surface larger, with more opportunities for unanticipated side effects. Think of
       complex code as making the surface area more porous and open to infection. Simple, smaller code is better.
    2. Never trust data from an external entity, always sanitize it before passing it on to a database, view rendering,
       or other processing.
    3. Unauthenticated services are an attack vector. Any user anywhere in the world cal call unauthenticated services.
    4. Keep the number of authenticated users at an absolute minimum. Cull unused, old, or outdated users and services.
       If an account with development services is compromised, your entire product is compromised.
    5. Don't give too much information about an error in the response.
2. Principle of Least Privilege - Every program and every privileged user of the system should operate using the least
   amount of privilege necessary to complete the job.
3. Don't leave personally identifiable information, financial data, passwords, or other credentials in plain text. Don't
   check in secrets, API keys, SSH keys, encryption passwords or other credentials alongside your code in version
   control.
4. Apply security patches quickly. The largest data breaches in history were caused by systems that were behind on their
   updates.

You don't want to do encryption yourself. Even the tiniest error can compromise everything. Rely on reliable things.
Take the more pragmatic approach and let someone else worry about it and use a third party authentication provider.

**Name well, rename when needed.** Things should be named according to the role they play in your code. Honor the local
culture (snake_case vs CamelCase vs ...). Every project has its own vocabulary - jargon words that have a special
meaning to the team. It is important everyone on the team knows what these words mean. One way is to encourage a lot of
communication, another way is to have a project glossary.

When you see a name that no longer expresses the intent, or is misleading or confusing, fix it.

## Chapter 8: Before the Project

**No one knows exactly what they want.** Requirements rarely lie on the surface. Normally, they are buried deep beneath
layers of assumptions, misconceptions, and politics.

**Programmers help people understand what they want.** Our job is to help people understand what they want.

**Requirements are learned in a feedback loop.** Your role is to interpret what the client says and to feed back to them
the implications. This is both an intellectual proces and a creative one. Your job is to help the client understand the
consequences of their stated requirements.

**Work with the user to think like a user.** There is a simple technique for getting inside your client's heads: become
a client.

**Policy is metadata.** Don't hardcode policy into a system, instead express it as metadata used by the system.

**Use a project glossary.** Create and maintain a project glossary - one place that defines all the specific terms and
vocabulary used in a project. It is hard to succeed on a project if users and developers call the same thing by
different names.

**Don't think outside the box - find the box.** When faced with an impossible problem, identify the real constraints.
Ask yourself: Does it have to be done this way? Does it have to be done at all?

Sometimes you find yourself working on a problem that seems much harder than you thought it should be. You may think
this particula problem is "impossible". This is an ideal time to do something else for a while. Sleep on it, go walk the
dog. People who were distracted did better on a complex problem-solving task than people who put in conscious effort. If
you are not willing to drop the problem for a while, the next best thing is probably finding someone to explain it to (
rubber duck).

Conway's Law: "_Organizations which design systems are constrained to produce designs which are copies of the
communication structures of these organizations_".

**Don't go into code alone.**

Pair programming - the inherited peer-pressure of a second person helps against moments of weakness and bad habits of
naming variables such as foo and such. You are less inclined to take a potentially embarrassing shortcut when someone is
actively watching, which also results in higher-quality code.

Mob programming - it is an extension of pair programming that involves more than just two developers. You can think of
mob programming as tight collaboration with live coding.

**Agile is not a noun, agile is how you do things.** Agile is an adjective. Remember the values from the manifesto:

1. Individuals and interactions over processes and tools
2. Working software over comprehensive documentation
3. Customer collaboration over contract negotiation
4. Responding to change over following a plan

Agility is all about responding to change, responding to the unknowns you encounter after you set out.

Recipe for working in an agile way:

1. Work out where you are.
2. Make the smallest meaningful step towards where you want to be.
3. Evaluate where you end up, and fix anything you broke (this requires a good design, because it is easier to fix good
   design).

## Chapter 9: Pragmatic Projects

**Maintain small, stable teams.** A pragmatic team is small, under 10-12 or so members. Members come and go rarely.
Everyone knows everyone well, trust each other, and depends on each other.

Quality is a team issue. The most diligent developer placed on a team that just doesn't care will find it difficult to
maintain the enthusiasm needed to fix niggling problems. Teams as a whole should not tolerate broken windows - those
small imperfections that no one fixes.

**Schedule to make it happen.** If your team is serious about improvement and innovation, you need to schedule it.
Trying to get things done "whenever there is a free moment" means they will never happen. Whatever sort of backlog or
task list or flow you are working with, don't reserve it for only feature development. The team works on more than just
new features:

- old systems maintenance
- process reflection and refinement - continuous improvement can only happen when you take the time to look around
- new tech experiments - try new stuff and analyze results
- learning and skill improvements - brown bags, training sessions

**Organize fully functional teams.**

There is a simple marketing trick that helps teams communicate as one - generate a brand. When you start a project, come
up with a name for it, ideally off-the-wall. Spend 30 minutes coming up with a zany logo, and use it, but it gives your
team an identity to build on, and the world something memorable to associate with your work.

Good communication is key to avoiding problems. You should be able to ask a question of team members and get a
more-or-less instant reply. If you have to wait for a week for the team meeting to ask your question or share your
status, that is an awful lot of friction.

**Do what works, not what is fashionable.** Ask yourself, why are you even using that particular development
method/framework/whatever? Does it work well for you? Or it was adopted just because it was being used by the latest
internet-fueled story?

You want to take the best pieces from any particular methodology and adapt the for use. No one fits for all, and current
methods are far from complete, so you will need to look at more than just one popular method. That is very different
mindset from "but Scrum/Lean/Kanban/XP/agile does it this way...".

The goal isn't to do Scrum/do agile/ do Lean or what-have-you. The goal is to be in a position to deliver working
software that gives the users some new capability at a moment's notice. Not weeks, months, or years from now. If you are
delivering in years, they shorten the cycle to months. From months, cut it down to weeks. From a four-week sprint, try
two. From a two-week sprint, try one. Then daily. Then, finally, on demand. Note that being able to deliver on demand
deos not mean you are forced to deliver every minute of every day. You deliver when the users need it, when it makes
business sense to do so.

**Deliver when users need it.** In order to move to this style of continuous development, you need to a rock-solid
infrastructure.

Once your infrastructure is in order, you need to decide how to organize the work. Beginners might want to start with
Scrum for project management. More disciplined and experienced teams might look to Kanban and Lean techniques. But
investigate it first. Try these approaches for yourself.

**Use version control to drive builds, tests and releases.** Build, test, and deployment are triggered via commits or
pushes version control, and built in a container in the cloud. Release to staging or production is specified by using a
tag in your version control system.

**Test early, test often, test automatically.** A good project may well have more test code than production code. The
time it takes to produce this test code is worth the effort. It ends up being much cheaper in the long run, and you
actually stand a chance of producing a product with close to zero defects.

**Coding ain't done till all the tests run.** The automatic build runs all available tests. It is important to aim to "
test for real" - the test environment should match the production environment closely. The build may cover several major
types of software testing: unit testing, integration testing, validation and verification and performance testing.

**Use Saboteurs to test your testing.** Because we can't write perfect software, we can't write perfect tests. We need
to test the tests. After you have written a test to detect a bug, cause the bug deliberately and make sure the test
complains. If you are really serious about testing, take a separate branch, introduce bugs on purpose and verify that
the tests will catch them. At a higher level, you can use something like Netflix's Chaos Monkey.

**Test state coverage, not code coverage.** Even if you happen to hit every line of code, that is not whole picture.
What is important is the number of states that your program may have. States are not equivalent to lines of code. A
great wat to explore how your code handles unexpected states is to have a computer generate those states (property-based
testing).

**Find bug once.** Once a human tester finds a bug, it should be the last time a human tester finds that bug. If a bug
slips through the net of existing tests, you need to add a new test to trap it next time.

**Don't use manual procedures.** Tracking down differences of any one component usually reveals a surprise. People
aren't as repeatable as computers are. Nor should we expect them to be. Everything should depend on automation. Project
build, deployment, ... Once you introduce manual steps, you have broken a very large window.

**Delight users, don't just deliver code.** If you want to delight your client, forge a relationship with them where you
can actively help solve their problems. Be a _Problem Solver_ (not Software Engineer/Developer). That is the essence of
a Pragmatic Programmer.

**Sign your work.** If we are responsible for a design, or a piece of code, we do a job we can be proud of. Artisans of
an earlier age were proud to sign their work. You should be, too.

However, you shouldn't jealously defend your code against interlopers, by the same token, you should treat other
people's code with respect. Mutual respect among the developers is critical to make this tip work.

We want to see pride in ownership "_I wrote this, and I stand behind my work_". Your signature should come to be
recognized as an indicator of quality. People should see your name on a piece of code and expect it to be solid, well
written, tested and documented.

A really professional job. Written by a professional. A Pragmatic Programmer.

## Postface

We have a duty to ask ourselves two questions about every piece of code we deliver:

1. Have I protected the user?
2. Would I use this myself?

**First, do no harm.** Would I be happy to be a user of this software? Do I want my details shared? Do I want my
movements to be given to retail outlets? Would I be happy to be driven by this autonomous vehicle? Am I comfortable
doing this? If you are involved in the project, you are just as responsible as the sponsors.

**Don't enable scumbags.**

**It is your life. Share it. Celebrate. Build It. AND HAVE FUN.** You are building the future. Your duty is to make a
future that we would all want to inhabit. Recognize when you are doing something against this ideal, and have courage to
say no.


================================================
FILE: books/pytest/.coveragerc
================================================
[paths]
source =
    src/

================================================
FILE: books/pytest/Dockerfile
================================================
FROM python:3.10.2

WORKDIR /src

ENV PYTHONPATH "${PYTHONPATH}:/src"

COPY requirements.txt .
COPY setup.cfg .

RUN pip install -r requirements.txt

COPY src/ src/
COPY tests/ tests/


================================================
FILE: books/pytest/docker-compose.yml
================================================
version: "3.9"
services:
  book:
    build:
      context: .
      dockerfile: Dockerfile
    volumes:
      - ./:/src


================================================
FILE: books/pytest/notes.md
================================================
[go back](https://github.com/pkardas/learning)

# Python Testing with Pytest: Simple, Rapid, Effective, and Scalable

Book by Brian Okken

Code here: [click](.)

- [Chapter 1: Getting Started with pytest](#chapter-1-getting-started-with-pytest)
- [Chapter 2: Writing Test Functions](#chapter-2-writing-test-functions)
- [Chapter 3: pytest Fixtures](#chapter-3-pytest-fixtures)
- [Chapter 4: Built-in fixtures](#chapter-4-built-in-fixtures)
- [Chapter 5: Parametrization](#chapter-5-parametrization)
- [Chapter 6: Markers](#chapter-6-markers)
- [Chapter 7: Strategy](#chapter-7-strategy)
- [Chapter 8: Configuration Files](#chapter-8-configuration-files)
- [Chapter 9: Coverage](#chapter-9-coverage)
- [Chapter 10: Mocking](#chapter-10-mocking)
- [Chapter 11: tox and Continuous Integration](#chapter-11-tox-and-continuous-integration)
- [Chapter 12: Testing Scripts and Applications](#chapter-12-testing-scripts-and-applications)
- [Chapter 13: Debugging Test Failures](#chapter-13-debugging-test-failures)
- [Chapter 14: Third-Party Plugins](#chapter-14-third-party-plugins)
- [Chapter 15: Building Plugins](#chapter-15-building-plugins)
- [Chapter 16: Advanced Parametrization](#chapter-16-advanced-parametrization)

## Chapter 1: Getting Started with pytest

Part of pytest execution is test discovery, where pytest looks for `.py` files starting with `test_` or ending
with `_test`. Test methods and functions must start with `test_`, test classes should start with `Test`.

Flag `--tb=no` turns off tracebacks.

Test outcomes:

- PASSED (.)
- FAILED (F)
- SKIPPED (S) - you can tell pytest to skip a test by using `@pytest.mark.skip` or `@pytest.mark.skipif`
- XFAIL (x) - the test was not supposed to pass (`@pytest.mark.xfail`)
- XPASS (X) - the teas was marked with xfail, but it ran and passed
- ERROR (E) - an exception happened during the execution

## Chapter 2: Writing Test Functions

Writing knowledge-building tests - when faced a new data structure, it is often helpful to write some quick tests so
that you can understand how the data structure works. The point of these tests is to check my understanding of how the
structure works, and possibly to document that knowledge for someone else or even for a future me.

`pytest` includes a feature called "_assert rewriting_", that intercepts _assert_ calls and replaces them with something
that can tell you more about why your assertions failed.

`pytest.fail()` underneath raises an exception. When calling this function or raising an exception directly, we don't
get the wonderful "assert rewriting" provided by the `pytest`.

Assertion helper function - used to wrap up a complicated assertion check. `__tracebackhide__ = True` the effect will be
that failing tests will not include this function in the traceback.

Flag `--tb=short` - shorted traceback format.

Use `pytest.raises` to test expected exceptions. You can check error details by using `match`, `match` accepts regular
expressions and matches it with the exception message. You can also use `as exc_info` (or any other variable name) to
interrogate extra parameters.

Arrange-Act-Assert or Given-When-Then patterns are about separating test into stages. A common anti-pattern is to have
more "Arrange-Assert-Act-Assert-Act-Assert-...". Test should focus on testing one behavior.

`pytest` allows to group tests with classes. You can utilize class hierarchies for inherited methods. However, book
author doesn't recommend tests inheritance because they easily confuse readers. Use classes only for grouping.

`pytest` allows to run a subset of tests, examples:

- `pytest ch2/test_classes.py::TestEquality::test_equality`
- `pytest ch2/test_classes.py::TestEquality`
- `pytest ch2/test_classes.py`
- `pytest ch2/test_card.py::test_defaults`
- `pytest ch2/test_card.py`

`-k` argument takes an expression, and tells pytest to run tests that contain a substring that matches the expression,
examples:

- `pytest -v -k TestEquality`
- `pytest -v -k TestEq`
- `pytest -v -k equality`
- `pytest -v -k "equality and not equality_fail"` (_and, or, parenthesis, not_ are allowed to create complex
  expressions)

## Chapter 3: pytest Fixtures

Fixtures are helper functions, run by pytest before (and sometimes after) the actual test functions. Code in the fixture
can do whatever you want it to do. Fixture can be also used to refer to the resource that is being set up by the fixture
functions.

`pytest` treats exceptions differently during fixtures compared to during a test function.

- FAIL - the failure is somewhere in the test function
- ERROR - the failure is somewhere in the fixture

Fixtures help a lot when dealing with databases.

Fixture functions run before the tests that use them. If there is a `yield` in the function, it stops there, passes
control to the tests, and picks up on the next line after the tests are done. The code above `yield` is "setup" and the
code after `yield` is "teardown". The code after `yield`, is guaranteed to run regardless of what happens during the
tests.

Flag `--setup-show` shows us the order of operations of tests and fixtures, including the setup and teardown phases of
the fixtures.

The scope dictates how often the setup and teardown get run when it is used by multiple test functions:

- _function_ - (default scope) run once per test function. The setup is run before each test using the fixture. The
  teardown is run after each test using the fixture.
- _class_ - run once per test class, regardless of how many test methods are in the class.
- _module_ - run once per module, regardless of how many test functions/methods of other fixtures in the module use it.
- _package_ - run once per package, regardless of how many test functions/methods of other fixtures in the package use
  it.
- _session_ - run once per session, all test methods/functions using a fixture of session scope share one setup and
  teardown call.

The scope is set at the definition of a fixture, and not at the place where it is called `@pytest.fixture(scope=...)`.

Fixtures can only depend on other fixtures of their same scope or wider.

`conftest.py` is considered by `pytest` as a "local plugin". Gets read by pytest automatically. Use `conftest.py` to
share fixtures among multiple test files. We can have `conftest.py` files at every level of our test directory. Test can
use any fixture that is in the same test module as a test function, or in a `conftest.py` file in the same directory (or
in the parent directory).

Use `--fixtures` to show list of all available fixtures our test can use.

Use `--fixtures-per-test` to see what fixtures are used by each test and where the functions are defined.

Using multiple stage fixtures can provide some incredible speed benefits and maintain test order independence.

It is possible to set fixture scope dynamically, e.g. by passing a new flag as an argument.

Use `autouse=True` to run fixture all the time. The `autouse` feature is good to have around. But it is more of an
exception than a rule. Opt for named fixtures unless you have a really great reason not to.

`pytest` allows you to rename fixtures with a `name` parameter to `@pytest.fixture`.

## Chapter 4: Built-in fixtures

`tmp path` and `tmp_path_factory` - used to create temporary directories.

- `tmp path`
    - function scope
- `tmp_path_factory`
    - session scope
    - you have to call `mktemp` to get a directory
- `tmpdir_factory`
    - similar to `tmp_path_factory`, but instead of `Path`, returns `py.path.local`

`capsys` - enables the capturing of writes to `stdout` and `stderr`.

- `capfd` - like `capsys`, but captures file descriptors 1 and 2 (stdout and stderr)
- `capsysbinary` - `capsys` captures text, `capsysbinary` captures binary
- `caplog` - captures output written with the logging package

A "monkey patch" is a dynamic modification of a class or module during runtime. "Monkey patching" is a convenient way to
take over part of the runtime environment of the application code and replace it with entities that are more convenient
for testing.

`monkeypatch` - used to modify objects, directories, evn variables. When test ends, the original unpatched code is
restored. It has the following functions:

- `setattr` - sets an attribute
- `delattr` - deletes an attribute
- `setitem` - sets a directory entry
- `delitem` - deletes a directory entry
- `setenv` - sets an env variable
- `delenv` - deletes an env variable
- `syspath_prepend` - prepends, `path` to `sys.path`, which is Python's lis of import locations
- `chdir` - changes the current working directory

If you start using monkey-patching:

- you will start to understand this
- you will start to avoid mocking and monkey-patching whenever possible

DESIGN FOR TESTABILITY. A concept borrowed from hardware designers. Concept of adding functionality to software to make
it easier to test.

More fixtures: https://docs.pytest.org/en/6.2.x/fixture.html or run `pytest --fixtures`.

## Chapter 5: Parametrization

Parametrized tests refer to adding parameters to our test functions and passing in multiple sets of arguments to the
test to create new test cases.

With fixture parametrization, we shift parameters to a fixture, `pytest` will then call the fixture once each for every
set of values we provide.

Fixture parametrization has the benefit of having a fixture run for each set of arguments. This is useful if you have
setup or teardown code that needs to run for each test case - e.g. different database connection, different file
content, ...

`pytest_generate_tests` - hook function. Allows you to modify the parametrization list at test collection time in
interesting ways.

## Chapter 6: Markers

Markers are a way to tell pytest there is something special about a particular test. You can think of them like tags or
labels. If some tests are slow, you can mark them with `@pytest.mark.slow` and have pytest skip those tests when you are
in hurry. You can pick a handful of tests out of a test suite and mark them with `@pytest.mark.smoke`.

Built-in markers:

- `@pytest.mark.filterwarnings(warning)` - adds a warning filter to the given test
- `@pytest.mark.skip(reason=None)` - skip the test with an optional reason
- `@pytest.mark.skipif(condition, ..., *, reason)` - skip the test if any of the conditions are true
- `@pytest.mark.xfail(condition, ..., *, reason, run=True, raises=None, stric=xfail_strict)` - we can expect the test to
  fail. If we want to run all tests, even those that we know will fail, we can use this marker.
- `@pytest.mark.parametrize(argnames, argvalues, indirect, ids, scope)` - call a test function multiple times
- `@pytest.mark.usefixtures(fixturename1, fizxturename2, ...)` - marks tests as needing all rhe specified fixtures

Custom markers - you need to add `pytest.ini` with marker definition, some ideas for markers:

- `@pytest.mark.smoke` - run `pytest -v -m smoke` to run smoke tests only
- `@pytest.mark.exception` - run `pytest -v -m exception` to run exception-related tests only

Custom markers shine when we have more files involved. We can also add markers to entire files or classes. We can even
put multiple markers on a single test.

File-level marker:

```python
pytestmark = [pytest.mark.marker_one, pytest.mark.marker_two]
```

When filtering tests using markers, it is possible to combine markers and use a bit of logic, just like we did with
the `-k` keyword, e.g. `pytest -v -m "custom and exception"`, `pytest -v -m "finish and not smoke"`.

`--strict-markers` - raises an error when mark was not found (by default a warning is raised). Also, an error is raised
at collection time, not at run time - error is reported earlier.

Markers can be used in conjunction with fixtures.

Use `--markers` to list all available markers.

## Chapter 7: Strategy

_Testing enough to sleep at night_: The idea of testing enough so that you can sleep at night may have come from
software systems where developers have to be on call to fix software if it stops working in the middle of the night. It
has been extended to including sleeping soundly, knowing that your software is well tested.

Testing through the API tests most of the system and logic.

Before you create the test cases you want to test, evaluate what features to test. When you have a lot of functionality
and features to test, you have to prioritize the order of developing tests. At least a rough idea of order helps.
Prioritize using the following factors:

1. Recent - new features, new areas of code, recently modified, refactored.
2. Core - your product's unique selling propositions. The essential functions that must continue to work in order for
   the product to be useful.
3. Risk - areas of the application that pose more risk, such as areas important to customers but not used regularly by
   the development team or parts that use 3-rd party code you don't trust.
4. Problematic - functionality that frequently breaks or even gets defect reports against it.
5. Expertise - features or algorithms understood by a limited subset of people

Creating test cases.

- start with a non-trivial, "happy path" test case
- then look at test cases that represent
    - interesting set of inputs
    - interesting starting states
    - interesting end states
    - all possible error states

## Chapter 8: Configuration Files

Non-test files that affect how _pytest_ runs.

- `pytest.ini` - primary pytest configuration file that allows you to change pytest's default behavior. Its location
  also defines the pytest root directory.
- `conftest.py` - this file contains fixtures and hook functions. It can exist in at the root directory or in any
  subdirectory. It is a good idea to stick to only one `conftest.py` file, so you can find fixture definitions easily.
- `__init__.py` - when put into test subdirectories, this file allows you to have identical test file names in multiple
  test directories. This means you can have `api/test_add.py` and `cli/test_add.py` but only if you have `__init__.py`
  in both directories.
- `tox.ini`, `pyproject.toml`, `setup.cfg` - these files can take the place of `pytest.ini`

Example `pytest.ini`:

```
[pytest]              -- including `[pytest]` in `pytest.ini` allows the pytest ini parsing to treat `pytest.ini` and `tox.ini` identically
addopts =             -- enables us to list the pytest flags we always want to run in this project
    --stric-markers   -- raise an error for any unregistered marker
    --strict-config   -- raise an error for any difficulty in parsing config files
    -ra               -- display extra text summary at the end of a test run
    
testpaths = tests     -- tells the python wehere to look for tests

markers =             -- declare markers
    smoke: subset of tests
    exception: check for expected exceptions
```

Example `tox.ini`:

```
[tox]
; tox specific settings

[pytest]
addopts =
    --stric-markers
    --strict-config
    -ra
...
```

Example `pyptoject.toml`:

```
[tool.pytest.ini_options]
addopts = [
  "--stric-markers",
  "--strict-config",
  "-ra"
]

testpaths = tests 

markers =[
  "smoke: subset of tests",
  "exception: check for expected exceptions"
]
```

Example `setup.cfg`:

```
[tool:pytest]
addopts =
    --stric-markers
    --strict-config
    -ra
...
```

Even if you don't need any configuration settings, it is still a great idea to place an empty `pytest.ini` at the top of
your project, because pytest may keep searching for this file.

## Chapter 9: Coverage

Tools that measure code coverage watch your code while a test suite is being run and keep track of which lines are hit
and which are not. That measurement is called "line coverage" = "total number of lines" / "total lines of code".

Code coverage tools can also tell you if all paths are taken in control statements - "branch coverage".

Code coverage cannot tell you if your test suite is good - it can only tell you how much of the application code is
getting hit by your test suite.

`coverage.py` - preferred Python coverage tool, `pytest-cov` - popular pytest plugin (depends on `coverage.py`, so it
will be installed as well).

To run tests with `coverage.py`, you need to add `--cov` flag.

To add missing lines to the terminal report, add the `--cov-report=term-missing` flag.

`coverage.py` is able to generate HTML reports: `docker-compose run --rm book pytest --cov=src --cov-report=html`, to
help view coverage data in more detail.

`# pragra: no cover` - tells `coverage` to exclude either a single line or a block of code.

**Beware of Coverage-Driven Development!** The problem with adding tests just to hit 100% is that doing so will mask the
fact that these lines aren't being used and therefore are not needed by the application. It also adds test code and
coding time that is not necessary.

## Chapter 10: Mocking

The `mock` package is used to swap out pieces of the system to isolate bits of our application code from the rest of the
system. Mock objects are called sometimes _test doubles_, _spies_, _fakes_ or _stubs_.

Typer provides a testing interface. With it, we don't have use `subprocess.run`, which is good, because we can't mock
stuff running in a separate process.

Mocks by default accept any access. If real object allows `.start(index)`, we want our mock objects to
allow `start(index)` as well. Mock objects are too flexible by default - they will also accept `star()` - any misspelled
methods, additional parameters, really anything.

_Mock drift_ - occurs when the interface you are mocking changes, and your mock in your test code doesn't.

Use `autospec=True` - without it, mock will allow you to call any function, with any parameters, even if it doesn't make
sense for the real thing being mocked. Always use _autospec_ when you can.

**Mocking tests implementation, not behavior.** When we are using mocks in a test, we are no longer testing behavior,
but testing implementation. Focusing tests on testing implementation is dangerous and time-consuming.

_Change detector test_ - test that break during valid refactoring. When test fail whenever the code changes, they are
change detector tests, and are usually more trouble than they worth.

Mocking is useful when you need to generate an exception or make sure your code calls a particular API method when it is
supposed to, with the correct parameters.

There are several special-purpose mocking libraries:

- mocking database: `pytest-postgresql`, `pytest-mongo`, `pytest-mysql`, `pytest-dynamodb`
- mocking HTTP servers: `pytest-httpserver`
- mocking requests: `responses`, `betamax`
- other: `pytest-rabbitmq`, `pytest-soir`, `pytest-elasticsearch`, `pytest-redis`

Adding functionality that makes testing easier is part of "design for testability" and can be used to allow testing at
multiple levels or testing at a higher level.

## Chapter 11: tox and Continuous Integration

CI refers to the practice of merging all developers' code changes into a shared repository on a regular basis - often
several times a day.

Before the implementation of CI, teams used version control to keep track of code updates, and different developers
would add a feature/fix on the separate branches. Then code was merged, built, and tested. The frequency of merge varied
from "when your code is ready, merge it" to regularly scheduled merges (weekly, monthly). The merge was called
_integration_ because the code is being integrated together.

With this soft of version control, code conflicts happened often. Some merge errors were not found until very late.

CI tools build and run tests all on their own, usually triggered by a merge request. Because the build and test stages
are automated, developers can integrate more frequently, even several times a day.

CI tools automate the process of build and test.

`tox` - command-line tool that allows you to run complete suite of tests in multiple envs. Great starting point when
learning about CI. `tox`:

1. creates a virtual env in a .tox directory
2. pip installs some dependencies
3. builds your package
4. pip installs your package
5. runs your tests

`tox` can automate testing process locally, but also it helps with cloud-based CI. You can integrate tox with GitHub
Actions.

## Chapter 12: Testing Scripts and Applications

Definitions:

- script - a single file containing Python code that is intended to be run directly from Python
- importable script - a script in which no code is executed when it is imported. Code is executed only when it is run
  directly
- application - package or script that has external dependencies

Testing a small script with `subprcoess.run` works okay, but it does have drawbacks

- we may want to test sections of larger scripts separately
- we may want to separate test code and scripts into different directories

Solution for this is to make a script importable. Add `if __name__ == "__main__"` - this code is executed only when we
call the script with `python script.py`.

## Chapter 13: Debugging Test Failures

pytest includes few command-line flags that are useful for debugging:

- `-lf` / `--last-failed` - runs just the tests that failed last
- `-ff` / `--failed-first` - runs all the test, starting from the last failed
- `-x` / `--exitfirst` - stops the test session after the first failure
- `--maxfail=num` -stops the tests after `num` failures
- `-nf` / `--new-first` - runs all the tests, ordered by the modification time
- `--sw` / `--stepwise` - stops the tests at the first failure, starts the test at the last failure next time
- `--sw-skip` / `--stepwise-skip` - same as `--sw`, but skips the first failure

Flags to control pytest output:

- `-v` / `--verbose` - all the test names, passing or failing
- `--tb=[auto/long/short/line/native/no]` - controls the traceback style
- `-l` / `--showlocals` - displays local variables alongside the stacktrace

Flags to start a command-line debugger:

- `--pdb` - starts an interactive debugging session at the point of failure
- `--trace` - starts the pdb source-code debugger immediately when running each test
- `--pdbcls` - uses alternatives to pdb

`pdb` - Python Debugger - part of the Python standard library. Add `breakpoint()` call, when a pytest hits this function
call, it will stop there and launch `pdb`. There are common commands recognized by `pdb` - full list in the
documentation (or use PyCharm's debugger instead if you can).

## Chapter 14: Third-Party Plugins

The pytest code is designed to allow customisation and extensions, and there are hooks available to allow modifications
and improvements through plugins.

Every time you put fixtures and/or hook functions into a project's `conftest.py` file, you create a local plugin. Only
some extra work is needed to turn these files into installable plugins.

`pytest` plugins are installed with `pip`.

Plugins that change the normal test run flow:

- `pytest-order` - specify the order using marker
- `pytest-randomly` - randomize order, first by file, then by a class, then by test
- `pytest-repeat` - makes it easy to repeat a single/multiple test(s), specific number of times
- `pytest-rerunfailures` - rerun failed tests (helpful for flaky tests)
- `pytest-xdist` - runs tests in parallel, either using multiple CPUs or multiple remote machines

Plugins that alter or enhance output:

- `pytest-instafail` - reports tracebacks and output from failed tests right after the failure
- `pytest-sugar` - shows green checkmarks instead of dots and has nice progress bar
- `pytest-html` - allows for HTML report generation

Plugins for web development:

- `pytest-selenium` - additional fixtures to allow easy configuration of browser-based tests
- `pytest-splinter` - built on top of Selenium, allows Splinter to be used more easily from pytest
- `pytest-django`, `pytest-flask` - make testing Django/Flask apps easier

Plugins for fake data:

- `Faker` - generates fake data, provides `faker` fixture
- `model-bakery` - generates Django models with fake data
- `pytest-factoryboy` - includes fixtures for Factory Boy
- `pytest-mimesis` - generates fake data similarly to Faker, but Mimesis is quite a bit faster

Plugins that extend pytest functionality:

- `pytest-cov` - runs coverage while testing
- `pytest-benchmark` - runs benchmark timing on code within tests
- `pytest-timeout` - doesn't let tests run too long
- `pytest-asyncio` - test async functions
- `pytest-bdd` - BDD-style tests with pytest
- `pytest-freezegun` - freezes time so that any code that reads the time will get the same value during a tests, you can
  also set a particular date or time
- `pytest-mock` - thin wrapper around the `unittest.mock`

Full list of plugins: https://docs.pytest.org/en/latest/reference/plugin_list.html

## Chapter 15: Building Plugins

Hook functions - function entry points that pytest provides to allow plugin developers to intercept pytest behaviour at
certain points and make changes. There are multiple hook functions, example:

- `pytest_configure()` - perform initial config. We can use this function to for example, pre-declare `slow` marker.
- `pytest_addoption()` - register options and settings, e.g. new flag: _--slow_
- `pytest_collection_modifyitems()` - called after test collection, can be used to filter or re-order the test items,
  e.g. to find _slow_ tests

The Node Interface: https://docs.pytest.org/en/latest/reference/reference.html#node

You can transform local `conftest.py` to installable plugin. You can use `Flit` to get help with the `pyproject.toml`
and `LICENSE`.

Plugins are code that needs to be tested just like any other code. `pytester` ias a plugin shipped with `pytest`.
`pytester` creates a temporary directory for each test that uses the `pytester` fixture, there are a bunch of
functions to help populate this directory - https://docs.pytest.org/en/latest/reference/reference.html#pytester

## Chapter 16: Advanced Parametrization

When using complex parametrization values, `pytest` numbers test cases like: `starting_card0, starting_card1, ...`. It
is possible to generate custom identifiers:

```py
card_list = [
    Card("foo", "todo"),
    Card("foo", "in prog"),
    Card("foo", "done"),
]


@pytest.mark.parametrize("starting_card", card_list, ids=str)
```

You can write custom ID function:

```py
def cards_state(card):
    return card.state


@pytest.mark.parametrize("starting_card", card_list, ids=cards_state)
```

Lambda function works as well:

```py
@pytest.mark.parametrize("starting_card", card_list, ids=lambda c: c.state)
```

If you have one wor two parameters requiring special treatment, use `pytest.param` to override the ID:

```py
card_list = [
    Card("foo", "todo"),
    pytest.param(Card("foo", "in prog"), id="special"),
    Card("foo", "done"),
]


@pytest.mark.parametrize("starting_card", card_list, ids=cards_state)
```

You can supply a list to `ids`, instead of a function:

```py
id_list = ["todo", "in prog", "done"]


@pytest.mark.parametrize("starting_card", card_list, ids=id_list)
```

but you have to be extra careful to keep the lists synchronized. Otherwise, the IDs are wrong.

It is possible to write our own function to generate parameter values:

```py
def text_variants():
    # This function can read data from a file/API/database/... as well.
    variants = {...: ...}

    for key, value in variants.items():
        yield pytest.param(value, id=key)


@pytest.mark.parametrize("variant", text_variants())
```

If you want to test all combinations, stacking parameters is the way to go:

```py
@pytest.mark.parametrize("state", states)
@pytest.mark.parametrize("owner", owners)
@pytest.mark.parametrize("summary", summaries)
def test_stacking(summary, owner, state):
```

this will act rather like cascading for loops, looping on the parameters from the bottom decorator to the top.

An _indirect parameter_ is the one that get passed to a fixture before it gets send to the test function. Indirect
parameters essentially let us parameterize a fixture, while keeping the parameter values with the test function. This
allows different tests to use the same fixture with different parameter values.

```py
@pytest.fixture()
def user(request):
    role = request.param
    print(f"Logging in as {role}")
    yield role
    print(f"Logging out {role}")


@pytest.mark.parametrize("user", ["admin", "team_member", "visitor"], indirect=["user"])
def test_access_rights(user):
    ...
```


================================================
FILE: books/pytest/requirements.txt
================================================
tinydb
pytest
faker
tox
coverage
pytest-cov
tinydb
typer
rich


================================================
FILE: books/pytest/setup.cfg
================================================
[tool:pytest]
python_paths = .
testpaths = tests


================================================
FILE: books/pytest/src/__init__.py
================================================
"""Top-level package for cards."""

__version__ = "1.0.0"

from .api import *  # noqa
from .cli import app  # noqa


================================================
FILE: books/pytest/src/api.py
================================================
"""
API for the cards project
"""
from dataclasses import asdict
from dataclasses import dataclass
from dataclasses import field

from src.db import DB

__all__ = [
    "Card",
    "CardsDB",
    "CardsException",
    "MissingSummary",
    "InvalidCardId",
]

__version__ = "1.0.0"


@dataclass
class Card:
    summary: str = None
    owner: str = None
    state: str = "todo"
    id: int = field(default=None, compare=False)

    @classmethod
    def from_dict(cls, d):
        return Card(**d)

    def to_dict(self):
        return asdict(self)


class CardsException(Exception):
    pass


class MissingSummary(CardsException):
    pass


class InvalidCardId(CardsException):
    pass


class CardsDB:
    def __init__(self, db_path):
        self._db_path = db_path
        self._db = DB(db_path, ".cards_db")

    def add_card(self, card: Card) -> int:
        """Add a card, return the id of card."""
        if not card.summary:
            raise MissingSummary
        if card.owner is None:
            card.owner = ""
        id = self._db.create(card.to_dict())
        self._db.update(id, {"id": id})
        return id

    def get_card(self, card_id: int) -> Card:
        """Return a card with a matching id."""
        db_item = self._db.read(card_id)
        if db_item is not None:
            return Card.from_dict(db_item)
        else:
            raise InvalidCardId(card_id)

    def list_cards(self, owner=None, state=None):
        """Return a list of cards."""
        all = self._db.read_all()
        if (owner is not None) and (state is not None):
            return [
                Card.from_dict(t)
                for t in all
                if (t["owner"] == owner and t["state"] == state)
            ]
        elif owner is not None:
            return [
                Card.from_dict(t) for t in all if t["owner"] == owner
            ]
        elif state is not None:
            return [
                Card.from_dict(t) for t in all if t["state"] == state
            ]
        else:
            return [Card.from_dict(t) for t in all]
z
    def count(self) -> int:
        """Return the number of cards in db."""
        return self._db.count()

    def update_card(self, card_id: int, card_mods: Card) -> None:
        """Update a card with modifications."""
        try:
            self._db.update(card_id, card_mods.to_dict())
        except KeyError as exc:
            raise InvalidCardId(card_id) from exc

    def start(self, card_id: int):
        """Set a card state to 'in prog'."""
        self.update_card(card_id, Card(state="in prog"))

    def finish(self, card_id: int):
        """Set a card state to 'done'."""
        self.update_card(card_id, Card(state="done"))

    def delete_card(self, card_id: int) -> None:
        """Remove a card from db with given card_id."""
        try:
            self._db.delete(card_id)
        except KeyError as exc:
            raise InvalidCardId(card_id) from exc

    def delete_all(self) -> None:
        """Remove all cards from db."""
        self._db.delete_all()

    def close(self):
        self._db.close()

    def path(self):
        return self._db_path


================================================
FILE: books/pytest/src/cli.py
================================================
"""Command Line Interface (CLI) for cards project."""
import os
import pathlib
from contextlib import contextmanager
from io import StringIO
from typing import List

import rich
import typer
from rich.table import Table

import src.api as cards

app = typer.Typer(name="cards", add_completion=False)


@app.command()
def version():
    """Return version of cards application"""
    print(cards.__version__)


@app.command()
def add(
        summary: List[str], owner: str = typer.Option(None, "-o", "--owner")
):
    """Add a card to db."""
    summary = " ".join(summary) if summary else None
    with cards_db() as db:
        db.add_card(cards.Card(summary, owner, state="todo"))


@app.command()
def delete(card_id: int):
    """Remove card in db with given id."""
    with cards_db() as db:
        try:
            db.delete_card(card_id)
        except cards.InvalidCardId:
            print(f"Error: Invalid card id {card_id}")


@app.command("list")
def list_cards(
        owner: str = typer.Option(None, "-o", "--owner"),
        state: str = typer.Option(None, "-s", "--state"),
):
    """
    List cards in db.
    """
    with cards_db() as db:
        the_cards = db.list_cards(owner=owner, state=state)
        table = Table(box=rich.box.SIMPLE)
        table.add_column("ID")
        table.add_column("state")
        table.add_column("owner")
        table.add_column("summary")
        for t in the_cards:
            owner = "" if t.owner is None else t.owner
            table.add_row(str(t.id), t.state, owner, t.summary)
        out = StringIO()
        rich.print(table, file=out)
        print(out.getvalue())


@app.command()
def update(
        card_id: int,
        owner: str = typer.Option(None, "-o", "--owner"),
        summary: List[str] = typer.Option(None, "-s", "--summary"),
):
    """Modify a card in db with given id with new info."""
    summary = " ".join(summary) if summary else None
    with cards_db() as db:
        try:
            db.update_card(
                card_id, cards.Card(summary, owner, state=None)
            )
        except cards.InvalidCardId:
            print(f"Error: Invalid card id {card_id}")


@app.command()
def start(card_id: int):
    """Set a card state to 'in prog'."""
    with cards_db() as db:
        try:
            db.start(card_id)
        except cards.InvalidCardId:
            print(f"Error: Invalid card id {card_id}")


@app.command()
def finish(card_id: int):
    """Set a card state to 'done'."""
    with cards_db() as db:
        try:
            db.finish(card_id)
        except cards.InvalidCardId:
            print(f"Error: Invalid card id {card_id}")


@app.command()
def config():
    """List the path to the Cards db."""
    with cards_dbz() as db:
        print(db.path())


@app.command()
def count():
    """Return number of cards in db."""
    with cards_db() as db:
        print(db.count())


@app.callback(invoke_without_command=True)
def main(ctx: typer.Context):
    """
    Cards is a small command line task tracking application.
    """
    if ctx.invoked_subcommand is None:
        list_cards(owner=None, state=None)


def get_path():
    db_path_env = os.getenv("CARDS_DB_DIR", "")
    if db_path_env:
        db_path = pathlib.Path(db_path_env)
    else:
        db_path = pathlib.Path.home() / "cards_db"
    return db_path


@contextmanager
def cards_db():
    db_path = get_path()
    db = cards.CardsDB(db_path)
    yield db
    db.close()


================================================
FILE: books/pytest/src/db.py
================================================
"""
DB for the cards project
"""
import tinydb


class DB:
    def __init__(self, db_path, db_file_prefix):
        self._db = tinydb.TinyDB(
            db_path / f"{db_file_prefix}.json", create_dirs=True
        )

    def create(self, item: dict) -> int:
        id = self._db.insert(item)
        return id

    def read(self, id: int):
        item = self._db.get(doc_id=id)
        return item

    def read_all(self):
        return self._db

    def update(self, id: int, mods) -> None:
        changes = {k: v for k, v in mods.items() if v is not None}
        self._db.update(changes, doc_ids=[id])

    def delete(self, id: int) -> None:
        self._db.remove(doc_ids=[id])

    def delete_all(self) -> None:
        self._db.truncate()

    def count(self) -> int:
        return len(self._db)

    def close(self):
        self._db.close()


================================================
FILE: books/pytest/tests/ch_02/test_card.py
================================================
import pytest

from src import Card


def test_field_access():
    c = Card("something", "brian", "todo", 123)
    assert (c.summary, c.owner, c.state, c.id) == ("something", "brian", "todo", 123)


def test_defaults():
    c = Card()
    assert (c.summary, c.owner, c.state, c.id) == (None, None, "todo", None)


def test_equality():
    assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 123)


def test_equality_with_different_ids():
    assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 321)


def test_inequality():
    assert Card("something", "brian", "todo", 123) != Card("completely different", "okken", "todo", 123)


def test_to_dict():
    assert Card.from_dict({
        "summary": "something",
        "owner": "brian",
        "state": "todo",
        "id": 123
    }) == Card("something", "brian", "todo", 123)


def test_from_dict():
    assert Card("something", "brian", "todo", 123).to_dict() == {
        "summary": "something",
        "owner": "brian",
        "state": "todo",
        "id": 123
    }


================================================
FILE: books/pytest/tests/ch_02/test_classes.py
================================================
from src import Card


class TestEquality:
    def test_equality(self):
        assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 123)

    def test_equality_with_different_ids(self):
        assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 321)

    def test_inequality(self):
        assert Card("something", "brian", "todo", 123) != Card("completely different", "okken", "todo", 123)


================================================
FILE: books/pytest/tests/ch_02/test_exceptions.py
================================================
import pytest
from src import CardsDB


def test_no_path_raises():
    with pytest.raises(TypeError):
        CardsDB()


def test_raises_with_info():
    with pytest.raises(TypeError, match="missing 1 .* positional argument"):
        CardsDB()


================================================
FILE: books/pytest/tests/ch_02/test_helper.py
================================================
import pytest

from src import Card


def assert_identical(c1: Card, c2: Card):
    # Do not include 'assert_identical' in traceback:
    __tracebackhide__ = True

    assert c1 == c2
    if c1.id != c2.id:
        pytest.fail(f"id's don't match. {c1.id} != {c2.id}")


def test_identical():
    assert_identical(Card("foo", id=123), Card("foo", id=123))


@pytest.mark.skip()
def test_identical_fail():
    assert_identical(Card("foo", id=123), Card("foo", id=321))


================================================
FILE: books/pytest/tests/ch_03/conftest.py
================================================
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest

from src import (
    Card,
    CardsDB,
)


@pytest.fixture(scope="session")
def db():
    with TemporaryDirectory() as db_dir:
        db_path = Path(db_dir)
        _db = CardsDB(db_path)
        yield _db
        _db.close()


@pytest.fixture(scope="function")
def cards_db(db):
    db.delete_all()
    return db


@pytest.fixture(scope="session")
def some_cards():
    return [
        Card("write book", "brian", "done"),
        Card("edit book", "katie", "done"),
        Card("write 2nd edition", "brian", "todo"),
        Card("edit 2nd edition", "katie", "todo"),
    ]


================================================
FILE: books/pytest/tests/ch_03/test_autouse.py
================================================
from time import (
    localtime,
    sleep,
    strftime,
    time,
)

import pytest


@pytest.fixture(scope="function")
def non_empty_db(cards_db, some_cards):
    for c in some_cards:
        cards_db.add_card(c)
    return cards_db


@pytest.fixture(autouse=True, scope="session")
def footer_session_scope():
    yield
    now = time()
    print("---")
    print(f"finished : {strftime('%d %b %X', localtime(now))}")
    print("--------")


@pytest.fixture(autouse=True)
def footer_function_scope():
    start = time()
    yield
    stop = time()
    print(f"test duration: {stop - start:0.3}")


def test_1():
    sleep(1)


def test_2():
    sleep(1.23)


================================================
FILE: books/pytest/tests/ch_03/test_count.py
================================================
from src import Card


def test_empty(cards_db):
    assert cards_db.count() == 0


def test_two(cards_db):
    cards_db.add_card(Card("first"))
    cards_db.add_card(Card("second"))
    assert cards_db.count() == 2


def test_three(cards_db):
    cards_db.add_card(Card("first"))
    cards_db.add_card(Card("second"))
    cards_db.add_card(Card("three"))
    assert cards_db.count() == 3


================================================
FILE: books/pytest/tests/ch_03/test_count_initial.py
================================================
from pathlib import Path
from tempfile import TemporaryDirectory

from src import CardsDB


def test_empty():
    with TemporaryDirectory() as db_dir:
        db_path = Path(db_dir)
        db = CardsDB(db_path)

        count = db.count()
        db.close()

        assert count == 0


================================================
FILE: books/pytest/tests/ch_03/test_fixtures.py
================================================
import pytest


@pytest.fixture()
def some_data():
    return 42


def test_some_data(some_data):
    assert some_data == 42


================================================
FILE: books/pytest/tests/ch_03/test_rename_fixture.py
================================================
import pytest


@pytest.fixture(name="ultimate_answer")
def ultimate_answer_fixture():
    return 42


def test_everything(ultimate_answer):
    assert ultimate_answer == 42


================================================
FILE: books/pytest/tests/ch_03/test_some.py
================================================
import pytest


@pytest.fixture(scope="function")
def non_empty_db(cards_db, some_cards):
    for c in some_cards:
        cards_db.add_card(c)
    return cards_db


def test_add_some(cards_db, some_cards):
    expected_count = len(some_cards)
    for c in some_cards:
        cards_db.add_card(c)
    assert cards_db.count() == expected_count


def test_non_empty(non_empty_db):
    assert non_empty_db.count() > 0


================================================
FILE: books/pytest/tests/ch_04/conftest.py
================================================
import pytest
from src import CardsDB


@pytest.fixture(scope="session")
def db(tmp_path_factory):
    db_path = tmp_path_factory.mktemp("cards_db")
    _db = CardsDB(db_path)
    yield _db
    _db.close()


================================================
FILE: books/pytest/tests/ch_04/test_config.py
================================================
import src as cards
from typer.testing import CliRunner


def run_cards(*params):
    runner = CliRunner()
    result = runner.invoke(cards.app, params)
    return result.output.rstrip()


def test_run_cards():
    assert run_cards("version") == cards.__version__


def test_patch_get_path(monkeypatch, tmp_path):
    def fake_get_path():
        return tmp_path

    monkeypatch.setattr(cards.cli, "get_path", fake_get_path)
    assert run_cards("config") == str(tmp_path)


def test_patch_home(monkeypatch, tmp_path):
    full_cards_dir = tmp_path / "cards_db"

    def fake_home():
        return tmp_path

    monkeypatch.setattr(cards.cli.pathlib.Path, "home", fake_home)
    assert run_cards("config") == str(full_cards_dir)


def test_patch_env_var(monkeypatch, tmp_path):
    monkeypatch.setenv("CARDS_DB_DIR", str(tmp_path))
    assert run_cards("config") == str(tmp_path)


================================================
FILE: books/pytest/tests/ch_04/test_tmp.py
================================================
def test_tmp_path(tmp_path):
    file = tmp_path / "file.txt"
    file.write_text("Hello")
    assert file.read_text() == "Hello"


def test_tmp_path_factory(tmp_path_factory):
    path = tmp_path_factory.mktemp("sub")
    file = path / "file.txt"
    file.write_text("Hello")
    assert file.read_text() == "Hello"


================================================
FILE: books/pytest/tests/ch_04/test_version.py
================================================
from typer.testing import CliRunner

import src as cards


def test_version(capsys):
    cards.cli.version()
    output = capsys.readouterr().out.rstrip()
    assert output == cards.__version__


def test_version_v2():
    runner = CliRunner()
    result = runner.invoke(cards.app, ["version"])
    output = result.output.rstrip()
    assert output == cards.__version__


================================================
FILE: books/pytest/tests/ch_05/test_parametrize.py
================================================
import pytest

from src import (
    Card,
    CardsDB,
)


@pytest.fixture(scope="session")
def db(tmp_path_factory):
    db_path = tmp_path_factory.mktemp("cards_db")
    _db = CardsDB(db_path)
    yield _db
    _db.close()


@pytest.fixture(scope="function")
def cards_db(db):
    db.delete_all()
    return db


@pytest.mark.parametrize("initial_state", ["done", "in prog", "todo"])
def test_finish(cards_db, initial_state):
    c = Card("write a book", state=initial_state)
    index = cards_db.add_card(c)
    cards_db.finish(index)

    c = cards_db.get_card(index)

    assert c.state == "done"


@pytest.fixture(params=["done", "in prog", "todo"])
def start_state(request):
    return request.param


def test_finish_v2(cards_db, start_state):
    c = Card("write a book", state=start_state)
    index = cards_db.add_card(c)
    cards_db.finish(index)

    c = cards_db.get_card(index)

    assert c.state == "done"


def pytest_generate_tests(metafunc):
    if "start_state_2" in metafunc.fixturenames:
        metafunc.parametrize("start_state_2", ["done", "in prog", "todo"])


def test_finish_v3(cards_db, start_state_2):
    c = Card("write a book", state=start_state_2)
    index = cards_db.add_card(c)
    cards_db.finish(index)

    c = cards_db.get_card(index)

    assert c.state == "done"

================================================
FILE: books/pytest/tests/ch_06/pytest.ini
================================================
[pytest]
markers =
    smoke: subset of tests
    exception: check for expected exceptions
    custom: run only ch_06/custom
    num_cards: number of cards to prefill for cards_db fixture
adopts =
    --stric-markers


================================================
FILE: books/pytest/tests/ch_06/test_builtin.py
================================================
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest
from packaging.version import parse

from src import (
    Card,
    CardsDB,
    api,
)


@pytest.mark.skip(reason="card doesn't support comparison yet")
def test_less_than_skip():
    assert Card("a task") < Card("b task")


@pytest.mark.skipif(
    parse(api.__version__).major < 2,
    reason="Card comparison not supported in 1.x"
)
def test_less_than_skipif():
    assert Card("a task") < Card("b task")


@pytest.mark.xfail(
    parse(api.__version__).major < 2,
    reason="Card comparison not supported in 1.x"
)
def test_less_than_xfail():
    assert Card("a task") < Card("b task")


@pytest.mark.xfail(reason="XPASS demo")
def test_xpass():
    assert Card("a task") == Card("a task")


@pytest.mark.xfail(reason="strict demo", strict=True)
@pytest.mark.skip
def test_xpass_strict():
    assert Card("a task") == Card("a task")


================================================
FILE: books/pytest/tests/ch_06/test_custom.py
================================================
import pytest

from src import (
    Card,
    CardsDB,
    InvalidCardId,
)

pytestmark = [pytest.mark.custom]

@pytest.fixture(scope="session")
def db(tmp_path_factory):
    db_path = tmp_path_factory.mktemp("cards_db")
    _db = CardsDB(db_path)
    yield _db
    _db.close()


@pytest.fixture(scope="function")
def cards_db(db):
    db.delete_all()
    return db


@pytest.mark.smoke
def test_start(cards_db):
    i = cards_db.add_card(Card("foo", state="todo"))
    cards_db.start(i)
    c = cards_db.get_card(i)
    assert c.state == "in prog"


@pytest.mark.exception
def test_start_non_existent(cards_db):
    with pytest.raises(InvalidCardId):
        cards_db.start(123)


================================================
FILE: books/pytest/tests/ch_06/text_combination.py
================================================
import pytest
from src import (
    Card,
    CardsDB,
)


@pytest.fixture(scope="session")
def db(tmp_path_factory):
    db_path = tmp_path_factory.mktemp("cards_db")
    _db = CardsDB(db_path)
    yield _db
    _db.close()


@pytest.fixture(scope="function")
def cards_db(db, request, faker):
    db.delete_all()

    faker.seed_instance(101)
    m = request.node.get_closest_marker("num_cards")
    if m and len(m.args) > 0:
        num_cards = m.args[0]
        for _ in range(num_cards):
            db.add_card(Card(summary=faker.sentence(), owner=faker.first_name()))
    return db


@pytest.mark.num_cards
def test_zero(cards_db):
    assert cards_db.count() == 0


@pytest.mark.num_cards(3)
def test_three(cards_db):
    assert cards_db.count() == 3


================================================
FILE: books/pytest/tests/ch_12/hello.py
================================================
def main():
    print("Hello world")


if __name__ == '__main__':
    main()


================================================
FILE: books/pytest/tests/ch_12/test_hello.py
================================================
from tests.ch_12 import hello


def test_hello(capsys):
    hello.main()
    output = capsys.readouterr().out
    assert output == "Hello world\n"


================================================
FILE: books/pytest/tests/ch_15/conftest.py
================================================
import pytest


def pytest_configure(config):
    config.addinivalue_line("markers", "slow: mark test as slow to run")


def pytest_addoption(parser):
    parser.addoption("--slow", action="store_true", help="include tests marked slow")


def pytest_collection_modifyitems(config, items):
    if not config.getoption("--slow"):
        skip_slow = pytest.mark.skip(reason="need --slow option to run")
        for item in items:
            if item.get_closest_marker("slow"):
                item.add_marker(skip_slow)


================================================
FILE: books/pytest/tests/ch_15/pytest.ini
================================================
[pytest]
markers = slow: mark test as slow to run


================================================
FILE: books/pytest/tests/ch_15/test_slow.py
================================================
import pytest


def test_normal():
    pass

@pytest.mark.slow
def test_slow():
    pass

================================================
FILE: books/python-architecture-patterns/Dockerfile
================================================
FROM python:3.10.2

WORKDIR /src

ENV PYTHONPATH "${PYTHONPATH}:/src"

COPY requirements.txt .
COPY setup.cfg .

RUN pip install -r requirements.txt

COPY src/ src/
COPY tests/ tests/


================================================
FILE: books/python-architecture-patterns/Makefile
================================================
test-flake8:
	docker-compose run --rm api flake8 .

test-mypy:
	docker-compose run --rm api mypy .

test-pytest:
	docker-compose run --rm api pytest .


================================================
FILE: books/python-architecture-patterns/docker-compose.yml
================================================
version: "3.9"
services:

  redis_pubsub:
    build:
      context: .
      dockerfile: Dockerfile
    image: allocation-image
    depends_on:
      - postgres
      - redis
      - mailhog
    environment:
      - DB_HOST=postgres
      - DB_PASSWORD=abc123
      - REDIS_HOST=redis
      - EMAIL_HOST=mailhog
      - PYTHONDONTWRITEBYTECODE=1
    volumes:
      - ./:/src
    entrypoint:
      - python
      - src/redis_consumer.py

  api:
    image: allocation-image
    build:
      context: .
      dockerfile: Dockerfile
    depends_on:
      - redis_pubsub
    volumes:
      - ./:/src
    environment:
      - DB_HOST=postgres
      - DB_PASSWORD=abc123
      - API_HOST=api
      - REDIS_HOST=redis
      - EMAIL_HOST=mailhog
      - PYTHONUNBUFFERED=1
      - PYTHONDONTWRITEBYTECODE=1
    command: uvicorn src.app:api --host 0.0.0.0 --port 80 --reload
    ports:
      - "5005:80"

  postgres:
    image: postgres:14.2
    environment:
      - POSTGRES_USER=allocation
      - POSTGRES_PASSWORD=abc123
    ports:
      - "54321:5432"

  redis:
    image: redis:alpine
    ports:
      - "63791:6379"

  mailhog:
    image: mailhog/mailhog
    ports:
      - "11025:1025"
      - "18025:8025"


================================================
FILE: books/python-architecture-patterns/notes.md
================================================
[go back](https://github.com/pkardas/learning)

# Architecture Patterns with Python: Enabling Test-Driven Development, Domain-Driven Design, and Event-Driven Microservices

Book by Harry Percival and Bob Gregory

Code here: [click](.)

- [Introduction](#introduction)
- [Chapter 1: Domain Modeling](#chapter-1-domain-modeling)
- [Chapter 2: Repository Pattern](#chapter-2-repository-pattern)
- [Chapter 3: On Coupling and Abstractions](#chapter-3-on-coupling-and-abstractions)
- [Chapter 4: FlaskAPI and Service Layer](#chapter-4-flaskapi-and-service-layer)
- [Chapter 5: TDD in High Gear and Low Gear](#chapter-5-tdd-in-high-gear-and-low-gear)
- [Chapter 6: Unit of Work Pattern](#chapter-6-unit-of-work-pattern)
- [Chapter 7: Aggregates and Consistency Boundaries](#chapter-7-aggregates-and-consistency-boundaries)
- [Chapter 8: Events and the Message Bus](#chapter-8-events-and-the-message-bus)
- [Chapter 9: Going to Town the Message Bus](#chapter-9-going-to-town-the-message-bus)
- [Chapter 10: Commands and Command Handler](#chapter-10-commands-and-command-handler)
- [Chapter 11: Event-Driven Architecture: Using Events to Integrate Microservices](#chapter-11-event-driven-architecture-using-events-to-integrate-microservices)
- [Chapter 12: Command-Query Responsibility Segregation (CQRS)](#chapter-12-command-query-responsibility-segregation-cqrs)
- [Chapter 13: Dependency Injection (and Bootstrapping)](#chapter-13-dependency-injection-and-bootstrapping)
- [Epilogue](#epilogue)
- [Appendix](#appendix)

## Introduction

Software systems tend toward chaos. When we first start building a new system, we have grand ideas that our code will be
clean and well-ordered, but iver time we find that it gathers cruft and edge cases and ends up a confusing morass of
manager classes and util modules.

Fortunately, the techniques to avoid creating a big ball of mud aren't complex.

Encapsulation covers two closely related ideas: simplifying behavior and hiding data. We encapsulate behavior by
identifying a task that needs to be done in our code and giving that task a well-defined object or function. We call
that object ro function an abstraction.

Encapsulating behavior by using abstractions is a powerful tool for making code more expressive, more testable, and
easier to maintain.

Encapsulation and abstraction help us by hiding details and protecting the consistency of our data, but wee= need to pay
attention to the interactions between our objects and functions. When one function, module or object uses another, we
say that the one depends on the other. Those dependencies form a kind of network or graph. For example: Presentation
Layer -> Business Logic -> Database Layer.

Layered architecture is the most common pattern for building business software.

The Dependency Inversion Principle:

1. High-level modules should not depend on low-level modules. Both should depend on abstractions.
2. Abstractions should not depend on details. Instead, details should depend on abstractions.

High-level modules are the code that your organization really cares about. The high-level modules of a software system
are the functions, classes, and packages that deal with our real-world concepts. By contract, low-level modules are the
code that your organization doesn't care about. If payroll runs on time, your business is unlikely to care whether that
is a coron job or a transient function running on Kubernetes.

> All problems in computer science can be solved by adding another level of indirection ~ David Wheeler.

We don't want business logic changes to slow down because they arte closely coupled to low-level infrastructure details.
Adding an abstraction between them allows the two to change independently of each other.

## Chapter 1: Domain Modeling

The _domain_ is a fancy word of saying _the problem you are trying to solve_. A _model_ is a map of a process or
phenomenon that captures a useful property.

In a nutshell, DDD says that the most important thing about software is that it provides a useful model of a problem. If
we get that model right, our software delivers value and makes new things possible.

When we hear our business stakeholders using unfamiliar words, or using terms in a specific way, we should listen to
understand the deeper meaning and encode their hard-won experience into our software.

Choose memorable identifiers for our objects so that the examples are easier to talk about.

Whenever we have a business concept that has data but has no identity, we often choose to represent it using the Value
Object pattern. A value object is any domain object that is uniquely identified as by the data it holds, we usually make
them immutable. Named tuples and frozen data classes are a great tool for this.

Entities, unlike values, have identity equality. We can change their values, and they are still recognizably the same
thing. Batches, in our example, are entities. We can allocate lines to a batch, or change the date that we expect it to
arrive, and it will still be the same entity.

We usually make this explicit in code by implementing equality operators on entities.

For value objects, the hash should be based on all attributes, and we should ensure that the objects are immutable. For
entities, the simplest option is to say that the hash is None, meaning that the object is not hashable and cannot, for
example be used in a set. If for some reason you decide to use set or dict operations with entities, the hash should be
based on the attributes, that defines the entity's unique identity over time.

Exceptions can express domain concepts too.

## Chapter 2: Repository Pattern

Repository Pattern - a simplifying abstraction over data storage, allowing us to decouple our model layer from the data
layer. This simplifying abstraction makes our system more testable by hiding the complexities of the database. It hides
the boring details of data access by pretending that all of our data is in memory. This pattern is very common in DDD.

Layered architecture is a common approach to structuring a system that has a UI, some logic, and a database.

Onion architecture - model being inside, and dependencies flowing inward to it.

ORM gives us persistence ignorance - fancy model doesn't need to know anything about how data is loaded or persisted.
Using and ORM is already an example of the DIP. Instead of depending on hardcoded SQL, we depend on abstraction - the
ORM.

The simplest repository has just two methods:

- add - to put a new item in the repository
- get - to return a previously added item.

One of the biggest benefits of the Repository pattern is the possibility to build a fake repository.

> Building fakes for your abstractions is an excellent way to get design feedback: if it's hard to fake, the abstraction
> is probably too complicated.

Simple CRUD wrapper around a database, don't need a domain model or a repository.

Repository Pattern Recap:

- Apply dependency inversion to your ORM - Domain model should be free of infrastructure concerns, so your ORM should
  import your model, and not the other way around.
- The Repository pattern is a simple abstraction around permanent storage - The repository gives you the illusion of a
  collection of in-memory objects. It makes it easy to create a FakeRepository for testing and to swap fundamental
  details of your infrastructure without disrupting your code application.

## Chapter 3: On Coupling and Abstractions

When we are unable to change component A for fear of breaking component B, we say that the components have become
coupled. Globally, coupling increases the risk and the cost of changing our code, sometimes to the point where we feel
unable to make any changes at all.

We can reduce the degree of coupling within a system by abstracting away the details.

According to authors it is better to use fake resources instead of mocks:

- Mocks are used to verify how something gets used, they have methods like `assert_called_once_with`. They are
  associated with London-school TDD.
- Fakes are working implementations of the thing they are replacing, but they are designed for use only in tests. They
  wouldn't work in real life. You can use them to make assertions about the end state of a system rather than the
  behaviours along the way, so they are associated with classic-style TDD.

TDD is a design practice first and a testing practice second. The tests act as a record of our design choices and serve
to explain the system to use when we return to the code after a long absence.

Tests that use too many mocks get overwhelmed with setup code that hides the story we care about.

Links:

- [YOW! Conference 2017 - Steve Freeman - Test Driven Development: That’s Not What We Meant](https://www.youtube.com/watch?v=B48Exq57Zg8)
- [Edwin Jung - Mocking and Patching Pitfalls](https://www.youtube.com/watch?v=Ldlz4V-UCFw)

## Chapter 4: FlaskAPI and Service Layer

Service Layer - extract logic from the endpoint, because it might be doing too much - validating input, handling errors,
committing.

Our high-level module, the service layer, depends on the repository abstraction. And the details of the implementation
for our specific choice of persistent storage also depend on the same abstraction.

The responsibilities of the ~~Flask~~ FastAPI app are just standard web stuff - per-request session management, parsing
information out of POST parameters, response status codes and JSON. All the orchestration logic is in the use
case/service layer, and the domain logic stays in the domain.

Application service - its job is to handle requests from the outside world and to orchestrate an operation. Drives the
application by following a bunch of simple steps:

- Get some data from the database
- Update the domain model
- Persist any changes

This is the kind of boring work that has to happen for every operation in your system, and keeping it separate from
business logic helps to keep things tidy.

Domain service - this is the name for a piece of logic that belongs in the domain model but doesn't sit naturally inside
a stateful entity or value object.

## Chapter 5: TDD in High Gear and Low Gear

Once you implement domain modeling and the service layer, you really actually can get to a stage where unit tests
outnumber integration and end-to-end tests by an order of magnitude.

Tests are supposed to help us change our system fearlessly, but often we see teams writing too many tests against their
domain model. This causes problems when they come to change their codebase and find that they need to update tens or
even hundreds of unit tests.

The service layer forms an API for our system that we can drive in multiple ways. Testing against this API reduces the
amount of code that we need to change when we refactor our domain model. If we restrict ourselves to testing only against
the service layer, we will not have any tests that directly interact with "private" methods or attributes on our model
objects, which leaves us freer to refactor them.

Most of the time, when we are adding a new feature or fixing a bug, we don't need to make extensive changes to the
domain mode. IN these cases, we prefer to write tests against service because of the lower coupling and higher coverage.

When starting a new project or when hitting a particularly gnarly problem, we will drop back down to writing tests
against the domain model, so we get better feedback and executable documentation of our intent.

Metaphor of shifting gears - when starting a journey, the bicycle needs to be in a low hear, so it can overcome inertia.
Once we are off and running, we can go faster and more efficiently by changing into a high gear. But if we suddenly
encounter a steep hill or are forced to slow down by a hazard, we again drop to a low gear until we can pick up speed
again.

Rules of Thumb for Different Types of Test:

1. Aim for one end-to-end test per feature - the objective is to demonstrate that the feature works, and that all the
   moving parts are glued together.
2. Write the bulk of your tests against the service layer - these end-to-end tests offer a good trade-off between
   coverage, runtime, and efficiency.
3. Maintain a small core of tests written against your domain model - these tests have highly focused coverage and are
   more brittle, but they have the highest feedback. Don't be afraid to delete these tests if the functionality is later
   covered by tests at the service layer.
4. Error handling counts as a feature - ideally, your application will be structured such that all errors bubble up to
   your entrypoints are handled in the same way. This means you need to test ony the happy path for each feature, and to
   reserve one end-to-end test for all unhappy paths.

Express your service layer in terms of primitives rather than domain objects.

## Chapter 6: Unit of Work Pattern

If the Repository pattern is our abstraction over persistent storage, the Unit of Work pattern is our abstraction over
the idea of atomic operations. It will allow us to decouple our service layer from the data layer.

Unit of Work acts as a single entrypoint to our persistent storage, and it keeps track of what objects were loaded and
of the latest state.

Unit of Work and Repository classes are collaborators.

> Don't mock what you don't own

Rule of thumb that forces us to build these simple abstractions over messy subsystems. This encourages us to think
carefully about or designs.

It is better to require explicit commit, so we can choose when to flush state. The default behaviour is to not change
anything, this makes software safe by default. There is one code path that leads to changes in the system: total success
and an explicit commit. Any other code path, any exception, any early exit from the UoW's scope leads to safe state.

You should always feel free to throw away tests if you think they are not going to add value longer term.

SQLAlchemy already uses a Unit Of Work in the shape of Session object (track changes to the entity, and when the session
is flushed, all your changes are persisted together). Then, why bother? The Session API is very rich, Unit Of Work can
simplify the session to its essential core: start, commit or throw away. Besides, our Unit Of Work can access Repository
object.

Unit of Work Pattern Recap:

- _The Unit of Work Pattern is an abstraction around data integrity_ - It helps to enforce the consistency of our domain
  model, and improves performance, by letting us perform a single flush operation at the end of an operation.
- _It works closely with the Repository Pattern and Service Layer patterns_ - The Unit of Work pattern completes our
  abstractions over data access by representing atomic updates. Each of our service-layer use cases runs in a single
  unit of work that succeeds of rails as a block.
- _This is a lovely case for a context manager_ - Context managers are an idiomatic way of defining scope in Python. We
  can use a context manager to automatically roll back our work at the end of a request, which means the system is safe
  by default.
- _SQLAlchemy already implements this pattern_ - We introduce an even simpler abstraction over the SQLAlchemy Session
  object in order to "narrow" the interface between the ORM and our code. This helps keep us loosely coupled.

## Chapter 7: Aggregates and Consistency Boundaries

Constraint is a rule that restricts the possible states of our model can get into, while an invariant is defined a
little more precisely as a condition that is always true.

The Aggregate pattern - a design pattern from the DDD community that helps us to solve concurrency issues. An aggregate
is just a domain object that contains other domain objects and lets us treat the whole collection as a single unit.

> An aggregate is a cluster of associated objects that we treat as a unit for the purpose of data changes.

We have to choose right granularity for our aggregate. Candidates: Shipment, Cart, Stock, Product.

Bounded contexts were invented as a reaction against attempts to capture entire businesses into a single model.
Attributes needed in one context are irrelevant in another. Concepts with the same name can have entirely different
meanings in different contexts. It is better to have several models, draw boundaries around each context, and handle the
translation between different contexts explicitly.

This concept translates very well to the world of microservices, where each microservice is free to have its own concept
of "customer" and its own rules for translating that to and from other microservices it integrates with.

Aggregates should be the only way to get to out model.

The Aggregate pattern is designed to help manage some technical constraints around consistency and performance.

Version numbers are just one way to implement optimistic* locking. Optimistic - our default assumption is that
everything will be fine when two users want to make changes to the database. We think it will be unlikely that they will
conflict each other. We let them go ahead and just make sure we have a way to notice if there is a problem.

Pessimistic - works under the assumption that two users are to cause conflicts, and we want to prevent conflicts in all
cases, so we lock everything just to be safe. In our example, that would mean locking the whole `batches` table or using
`SELECT FOR UPDATE`. With pessimistic locking, you don't need to think about handling failures because the database will
prevent them.

The usual way to handle a failure is to retry the operation from the beginning.

Aggregates and Consistency Boundaries Recap:

- _Aggregates are your entrypoints into the domain model_ - By restricting the number of ways that things can be
  changed, we make the system easier to reason about.
- _Aggregates are in charge of a consistency boundaries_ - An aggregate's job is to be able to manage our business rules
  about invariants as they apply to a group of related objects. It is the aggregate's job to check that the objects
  within its remit are consistent with each other and with our rules, and to reject changes that would break the rules.
- Aggregates and concurrency issues go together - When thinking about implementing these consistency checks, we end up
  thinking about transactions and locks. Choosing the right aggregate is about performance as well as conceptual
  organization of your domain.

## Chapter 8: Events and the Message Bus

Reporting, permissions and workflows touching zillions of objects make a mess of our codebase.

> Rule of thumb: if you can't describe what your function does without using words like "then" or "and", you might be
> violating the SRP.

A message bus gives us a nice way to separate responsibilities when we have to take multiple actions in response to a
request.

Domain Events and the Message Bus Recap:

- _Events can help with the single responsibility principle_ - Code gets tangled up when we mix multiple concerns in one
  place. Events can help us to keep things tidy by separating primary use cases from secondary ones. We also use events
  for communicating between aggregates so that we don't need to run long-running transactions that lock against multiple
  tables.
- _A message bus routes messages to handlers_ - You can think of a message bus as a dict that maps from events to their
  consumers. It doesn't "know" anything bout the meaning of events; it is just a piece of dumb infrastructure for
  getting messages around the system.
- _Option 1: Service layer raises events and passes them to message bus_ - The simplest way to start using events is
  your system is to raise them from handlers by calling `bus.handle(event)` after you commit your unit of work.
- _Option 2: Domain model raises events, service layer passes them to message bus_ - The logic about when to raise an
  event really should live with the model, so we can improve our system's design and testability by raising events from
  the domain model. It is easy for our handlers to collect events off the model objects after commit and pass them to
  the bus.
- _Option 3: UoW collects events from aggregates and passes the to message bus_ - Adding `bus.handle(aggregate.events)`
  to every handler is annoying, so we can tidy up by making our unit of work responsible for raising events that were
  raised by loaded objects. This is the most complex design and might rely on ORM magic, but it is clean and easy to use
  once set up.

## Chapter 9: Going to Town the Message Bus

If we rethink our API calls as capturing events, the service-layer functions can be event handlers too, and we no longer
need to make a distinction between internal and external event handlers.

Multiple database transactions can cause integrity issues. Something could happen that means the first transaction
completes but the second one does not.

Events are simple dataclasses that define the data structures for inputs and internal messages within our system. This
is quite powerful from a DDD standpoint, since events often translate very well into business language.

Handlers are the way we react to the events. They can call down to our model or call out to external services. We can
define multiple handlers for a single event if we want to. Handlers can also raise other events. This allows us to be
very granular about what a handler does and really stick to the SRP.

## Chapter 10: Commands and Command Handler

Commands are a type of message - instructions sent by one part of a system to another. We usually represent commands
with dumb data structures and can handle them in much the same way as events. Commands are sent by one actor to another
specific actor with the expectation that a particular thing will happen as a result. When we post a form to an API
handler, we are sending a command. We name commands with imperative mood verb phrases like "allocate stock" or "delay
shipment".

Events are broadcast by an actor to all interested listeners. We often use events to spread the knowledge about
successful commands. We name events with past-tense verb phrases like "order allocated to stock" or "shipment delayed".

How to mitigate problems caused by the lost messages? The system might be left in an inconsistent state. In our
allocation service we have already taken steps to prevent that from happening. We have carefully identified aggregates
that act as consistency boundaries, and whe have introduced a UoW that manages the atomic success or failure of an
update to an aggregate.

When a user wants to make the system do something, we represent their request as a command. That command should modify a
single aggregate and either succeed or fail in totality. Any other bookkeeping, cleanup and notification we need to do
can happen via an event. We don't require the event handlers to succeed in order for the command to be successful.

We raise events about an aggregate after we persist our state to the database. It is OK for events to fail independently
from the commands that raised them.

Tenacity is a Python library that implements common patterns for retrying.

## Chapter 11: Event-Driven Architecture: Using Events to Integrate Microservices

Often, first instinct when migrating an existing application to microservices, is to split system into _nouns_.

Style of architecture, where we create a microservice per database table and treat out HTTP APIa as CRUD interfaces to
anemic models, is the most common initial way for people to approach service-oriented design. This works fine for
systems that are very simple, but it can quickly degrade into a distributed ball of mud.

When two things have to be changed together, we say that they are coupled. We can never completely avoid coupling,
except by having our software not talk to any other software. What we want is to avoid inappropriate coupling.

How do we get appropriate coupling? We should think in terms of verbs, not nouns. Our domain model is about modeling a
business process. It is not a static data about a thing, it is a model of a verb.

Instead of thinking about a system for orders and a system for batches, we think about a system for allocating and
ordering.

Microservices should be consistency boundaries. That means we don't need to rely on synchronous calls. Each service
accepts commands from the outside world and raises events to record the result. Other services can listen to those
events to trigger the next steps in the workflow.

Things can fail independently, it is easier to handle degraded behavior - we can still take orders if the allocation
service is having a bad day. Secondly, we are reducing the strength of coupling between our systems. If we need to
change the order of operations or to introduce new steps in the process, we can do that locally.

Events can come from the outside, but they can also be published externally.

> Event notification is nice because it implies a low level of coupling, and is pretty simple to set up. It can become
> problematic, however, if there really is a logical flow that runs over various event notifications. It can be hard to
> see such flow as it is not explicit in any program text. This can make it hard to debug and modify.

~ Martin Fowler.

## Chapter 12: Command-Query Responsibility Segregation (CQRS)

Reads (queries) and writes (commands) are different, so they should be treated differently.

Most users are not going to buy your product, they are just viewers. We can make reads eventually consistent in order to
make them perform better.

All distributed systems are inconsistent. As soon as you have a web server and two customers, you have the potential for
stale data. No matter what we do, we are always going to find that our software systems are inconsistent with reality,
and so we will always need business process to cope with these edge cases. It is OK to trade performance for consistency
on the read side, because stale data is essentially unavoidable.

READS: Simple read, highly cacheable, can be stale.

WRITE: Complex business logic, uncacheable, must be transactionally consistent.

Post/Redirect/Get Pattern - In this technique, a web endpoint accepts an HTTP POST and responds with a redirect to see
the result. For example, we might accept a POST to /batches to create a new batch and redirect user to /batches/123 to
see their newly created batch. This approach fixes the problems that arise whe users refresh the results page in their
browser. It can lead to our users double-submitting data and thus buying two sofas when they needed only one. This
technique is a simple example of CQS. In CQS we follow one simple rule - functions should either modify state or answer
questions. We can apply the same design by returning 201 Created or 202 Accepted, with a Location header containing the
URI of our new resources.

ORM can expose us to performance problems. SELECT N+1 Problem is a common performance problem with ORMs - when
retrieving a list of objects, your ORM will often perform an initial query to get all IDs of the objects it needs, and
then issue individual queries for each object to retrieve their attributes. This is especially likely if there are any
foreign-key relationships on your objects.

Even with well-tuned indexes, a relational database uses a lot of CPU to perform joins. The fastest queries will always
be `SELECT * FROM table WHERE condition`. More than raw speed, this approach buys us scale. Read-only stores can be
horizontally scaled out.

Read model can be implemented using Redis.

As domain model becomes richer and more complex, a simplified read model become compelling.

## Chapter 13: Dependency Injection (and Bootstrapping)

Mocks tightly couple us to the implementation. By choosing to monkeypatch `email.send_mail`, we are tied to
doing `import email`, and if we ever want to do `from email import send_mail`, we will have to change all our mocks.

Declaring explicit dependencies is unnecessary, and using them would make our application code marginally more complex.
But in return we would get tests that are easier to write and manage.

> Explicit is better than implicit.

Putting all the responsibility for passing dependencies to the right handler onto the message bus feels like a violation
of the SRP. Instead, we will reach for a pattern called Composition Root (a bootstrap script), and we will do a bit of "
manual DI" (dependency inversion without a framework).

Setting up dependency injection is just one of many typical setup activities that you need to do when starting your app.
Putting this all together into a bootstrap script is often a good idea.

The bootstrap is also good as a place to provide sensible default configuration for your adapters, and as a single place
to override those adapters with fakes for your tests.

## Epilogue

Making complex changes to a system is often an easier sell if you link it to feature work. Perhaps you are launching a
new product or opening your service to new markets? This is the right time to spend engineering resources on fixing the
foundations. With a six-month project to deliver, it is easier to make the argument for three weeks of cleanup work.

The Strangler Fig pattern involves creating a new system around the edges of an old system, while keeping it running.
Bits of old functionality are gradually intercepted and replaced, until the old system is left doing nothing at all and
can be switched off.

Focus on a specific problem and ask yourself how you can put the relevant ideas to use, perhaps in an initially limited
and imperfect fashion.

Reliable messaging is hard: Redis pub/sub is not reliable and should not be used as a general-purpose messaging tool.

We explicitly choose small, focused transactions that can fail independently.

## Appendix

- Entity - A domain object whose attributes may change but that has a recognizable identity over time.
- Value object - An immutable domain object whose attributes entirely define it. It is fungible with other identical
  objects.
- Aggregate - Cluster of associated objects that we treat as a unit for the purpose of data changes. Defines and
  enforces a consistency boundary.
- Event - Represents something that happened.
- Command - Represents a job the system should perform.
- Unit of work - Abstraction around data integrity. Each unit of work represents an atomic update. Makes repository
  available. Tracks new events on retrieved aggregates.
- Repository - Abstraction around persistent storage. Each aggregate has its own repository.

Docker: Mounting our source and test code as `volumes` means we don't need to rebuild our containers every time we make
a code change.

Postel's Law (robustness principle):

> Be liberal in what you accept, and conservative in what you emit

Tolerant Reader Pattern: Validate as little as possible. Read only the fields you need, and don't overspecify their
contents. This will help your system stay robust when other systems change over time. Resist the temptation to share
message definitions between systems: instead make it easy to define the data you depend on.

If you are in change of an API that is open to the public on the big bad internet, there might be good reasons to be
more conservative about what inputs you allow.

If validation is needed, do it at the edge of the system in order to avoid polluting domain model. Bear in mind that
invalid data wandering through your system is a time bomb, the deeper it gets, the more damage it can do.


================================================
FILE: books/python-architecture-patterns/requirements.txt
================================================
pytest==6.2.5
mypy==0.931
flake8==4.0.1
SQLAlchemy==1.4.31
fastapi==0.73.0
sqlmodel==0.0.6
requests==2.27.1
psycopg2==2.9.3
uvicorn==0.17.4
redis==4.1.4
types-redis==4.1.17
tenacity==8.0.1


================================================
FILE: books/python-architecture-patterns/setup.cfg
================================================
[tool:pytest]
python_paths = .
testpaths = tests
norecursedirs = .*
addopts = -sl
filterwarnings =
    ignore::DeprecationWarning
    ignore::PendingDeprecationWarning

[mypy]
python_version = 3.10
ignore_missing_imports = True
strict_optional = False

[mypy-app.cache]
ignore_errors = True

[flake8]
max-line-length = 180
max-complexity = 10
format = pylint
show-source = True
statistics = True


================================================
FILE: books/python-architecture-patterns/src/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/src/adapters/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/src/adapters/notifications.py
================================================
from abc import (
    ABC,
    abstractmethod,
)
import smtplib

from src import config

DEFAULT_HOST = config.get_email_host_and_port()["host"]
DEFAULT_PORT = config.get_email_host_and_port()["port"]


class AbstractNotifications(ABC):
    @abstractmethod
    def send(self, destination, message):
        raise NotImplementedError


class EmailNotifications(AbstractNotifications):
    def __init__(self, smtp_host=DEFAULT_HOST, port=DEFAULT_PORT):
        self.server = smtplib.SMTP(smtp_host, port=port)
        self.server.noop()

    def send(self, destination, message):
        self.server.sendmail(
            from_addr="allocations@example.com",
            to_addrs=[destination],
            msg=f"Subject: allocation service notification\n{message}",
        )


================================================
FILE: books/python-architecture-patterns/src/adapters/orm.py
================================================
from sqlmodel import (
    Field,
    SQLModel,
)


class AllocationsView(SQLModel, table=True):
    id: int = Field(primary_key=True)
    order_id: str
    sku: str
    batch_ref: str


def create_db_and_tables(engine):
    SQLModel.metadata.create_all(engine)


def clean_db_and_tables(engine):
    SQLModel.metadata.drop_all(engine)


================================================
FILE: books/python-architecture-patterns/src/adapters/redis_publisher.py
================================================
from redis.client import Redis

from src import config
from src.domain.events import Event

r = Redis(**config.get_redis_host_and_port())


def publish(channel: str, event: Event):
    r.publish(channel, event.json())


================================================
FILE: books/python-architecture-patterns/src/adapters/repository.py
================================================
from typing import (
    Optional,
    Protocol,
    Set,
)

from sqlmodel import (
    Session,
    select,
)

from src.domain.model import (
    Batch,
    Product,
)


class AbstractRepository(Protocol):
    def add(self, product: Product):
        ...

    def get(self, sku: str) -> Optional[Product]:
        ...

    def get_by_batch_ref(self, ref: str) -> Optional[Product]:
        ...


class Repository(AbstractRepository):
    def __init__(self, session: Session):
        self.session = session

    def add(self, product: Product):
        self.session.add(product)
        self.session.commit()

    def get(self, sku: str) -> Optional[Product]:
        return self.session.exec(select(Product).where(Product.sku == sku)).first()

    def get_by_batch_ref(self, ref: str) -> Optional[Product]:
        return self.session.exec(select(Product).join(Batch).where(Batch.reference == ref)).first()


class TrackingRepository(AbstractRepository):
    seen: Set[Product]

    def __init__(self, repo: AbstractRepository):
        super().__init__()
        self.seen = set()
        self._repo = repo

    def add(self, product: Product):
        self._repo.add(product)
        self.seen.add(product)

    def get(self, sku: str) -> Optional[Product]:
        product = self._repo.get(sku)
        if product:
            self.seen.add(product)
        return product

    def get_by_batch_ref(self, ref: str) -> Optional[Product]:
        if product := self._repo.get_by_batch_ref(ref):
            self.seen.add(product)
        return product


================================================
FILE: books/python-architecture-patterns/src/app.py
================================================
from fastapi import (
    FastAPI,
    Response,
    status,
)

from src import views
from src.bootstrap import bootstrap
from src.domain import commands
from src.domain.model import (
    Batch,
    OrderLine,
    OutOfStock,
)
from src.service_layer.handlers import InvalidSku

bus = bootstrap()
api = FastAPI()


@api.post("/allocate")
async def allocate_endpoint(order_line: OrderLine, response: Response):
    try:
        bus.handle(commands.Allocate(order_id=order_line.order_id, sku=order_line.sku, qty=order_line.qty))
    except (OutOfStock, InvalidSku) as e:
        response.status_code = status.HTTP_400_BAD_REQUEST
        return {"message": str(e)}

    return {"message": "ok"}


@api.post("/add_batch")
async def add_batch_endpoint(batch: Batch):
    bus.handle(commands.CreateBatch(ref=batch.reference, sku=batch.sku, qty=batch.purchased_quantity, eta=batch.eta))
    return {"message": "ok"}


@api.post("/allocate/{order_id}")
async def allocate_view_endpoint(order_id: str, response: Response):
    if result := views.allocations(order_id, bus.uow):
        return result
    response.status_code = status.HTTP_400_BAD_REQUEST
    return response


================================================
FILE: books/python-architecture-patterns/src/bootstrap.py
================================================
import inspect
from typing import Callable

from sqlalchemy.engine import Engine
from sqlmodel import create_engine

from src import config
from src.adapters import redis_publisher
from src.adapters.notifications import (
    AbstractNotifications,
    EmailNotifications,
)
from src.adapters.orm import create_db_and_tables
from src.service_layer.message_bus import (
    COMMAND_HANDLERS,
    EVENT_HANDLERS,
    MessageBus,
)
from src.service_layer.unit_of_work import (
    AbstractUnitOfWork,
    UnitOfWork,
)


def bootstrap(start_orm: bool = True, engine: Engine = create_engine(config.get_postgres_uri()), uow: AbstractUnitOfWork = UnitOfWork(),
              notifications: AbstractNotifications = EmailNotifications(), publish: Callable = redis_publisher.publish):
    if start_orm:
        create_db_and_tables(engine)

    dependencies = {"uow": uow, "notifications": notifications, "publish": publish}
    injected_event_handlers = {
        event_type: [
            inject_dependencies(handler, dependencies)
            for handler in event_handlers
        ]
        for event_type, event_handlers in EVENT_HANDLERS.items()
    }
    injected_command_handlers = {
        command_type: inject_dependencies(handler, dependencies)
        for command_type, handler in COMMAND_HANDLERS.items()
    }

    return MessageBus(uow=uow, event_handlers=injected_event_handlers, command_handlers=injected_command_handlers)


def inject_dependencies(handler, dependencies):
    params = inspect.signature(handler).parameters
    deps = {
        name: dependency
        for name, dependency in dependencies.items()
        if name in params
    }
    return lambda message: handler(message, **deps)


================================================
FILE: books/python-architecture-patterns/src/config.py
================================================
import os


def get_postgres_uri():
    host = os.environ.get("DB_HOST", "localhost")
    port = 54321 if host == "localhost" else 5432
    password = os.environ.get("DB_PASSWORD", "abc123")
    user, db_name = "allocation", "allocation"
    return f"postgresql://{user}:{password}@{host}:{port}/{db_name}"


def get_api_url():
    host = os.environ.get("API_HOST", "localhost")
    port = 80
    return f"http://{host}:{port}"


def get_redis_host_and_port():
    host = os.environ.get("REDIS_HOST", "localhost")
    port = 63791 if host == "localhost" else 6379
    return dict(host=host, port=port)


def get_email_host_and_port():
    host = os.environ.get("EMAIL_HOST", "localhost")
    port = 11025 if host == "localhost" else 1025
    http_port = 18025 if host == "localhost" else 8025
    return dict(host=host, port=port, http_port=http_port)


================================================
FILE: books/python-architecture-patterns/src/domain/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/src/domain/commands.py
================================================
from dataclasses import dataclass
from datetime import date
from typing import Optional


class Command:
    pass


@dataclass
class Allocate(Command):
    order_id: str
    sku: str
    qty: int


@dataclass
class CreateBatch(Command):
    ref: str
    sku: str
    qty: int
    eta: Optional[date] = None


@dataclass
class ChangeBatchQuantity(Command):
    ref: str
    qty: int


================================================
FILE: books/python-architecture-patterns/src/domain/events.py
================================================
from pydantic import BaseModel


class Event(BaseModel):
    pass


class OutOfStock(Event):
    sku: str


class Allocated(Event):
    order_id: str
    sku: str
    qty: int
    batch_ref: str


class Deallocated(Event):
    order_id: str
    sku: str
    qty: int


class BatchQuantityChanged(Event):
    batch_ref: str
    qty: int


================================================
FILE: books/python-architecture-patterns/src/domain/model.py
================================================
from datetime import date
from typing import (
    Iterable,
    List,
    Optional,
    Union,
    cast,
)

from pydantic import PrivateAttr
from pydantic.fields import ModelPrivateAttr
from sqlmodel import (
    Field,
    Relationship,
    SQLModel,
)

from src.domain import (
    commands,
    events,
)

Message = Union[commands.Command, events.Event]


class OutOfStock(Exception):
    pass


class OrderLine(SQLModel, table=True):
    order_id: str
    sku: str
    qty: int
    # DB-specific fields:
    id: Optional[int] = Field(default=None, primary_key=True)
    batch_id: Optional[int] = Field(default=None, foreign_key="batch.id")
    batch: Optional["Batch"] = Relationship(back_populates="allocations")


class Batch(SQLModel, table=True):
    reference: str
    sku: str
    purchased_quantity: int
    eta: Optional[date]
    allocations: List["OrderLine"] = Relationship(back_populates="batch")
    # DB-specific fields:
    id: Optional[int] = Field(default=None, primary_key=True)
    product_id: Optional[int] = Field(default=None, foreign_key="product.id")
    product: Optional["Product"] = Relationship(back_populates="batches")

    def __eq__(self, other):
        if not isinstance(other, Batch):
            return False
        return other.reference == self.reference

    def __hash__(self):
        return hash(self.reference)

    def __gt__(self, other):
        if self.eta is None:
            return False
        if other.eta is None:
            return True
        return self.eta > other.eta

    def allocate(self, order_line: OrderLine) -> None:
        if not self.can_allocate(order_line):
            return
        if order_line in self.allocations:
            return
        self.allocations.append(order_line)

    def deallocate(self, order_line: OrderLine) -> None:
        if order_line not in self.allocations:
            return
        self.allocations.remove(order_line)

    def deallocate_one(self):
        return self.allocations.pop()

    @property
    def allocated_quantity(self) -> int:
        return sum(line.qty for line in self.allocations)

    @property
    def available_quantity(self) -> int:
        return self.purchased_quantity - self.allocated_quantity

    def can_allocate(self, order_line: OrderLine) -> bool:
        return self.sku == order_line.sku and self.available_quantity >= order_line.qty


class Product(SQLModel, table=True):
    sku: str
    batches: List["Batch"] = Relationship(back_populates="product")
    # DB-specific fields:
    id: Optional[int] = Field(default=None, primary_key=True)
    version_number: int = 0
    # DB excluded fields:
    _messages: ModelPrivateAttr = PrivateAttr(default=[])

    def __hash__(self):
        return hash(self.sku)

    @property
    def messages(self) -> List[Message]:
        return self._messages.default

    def allocate(self, order_line: OrderLine) -> Optional[str]:
        try:
            batch = next(b for b in sorted(cast(Iterable, self.batches)) if b.can_allocate(order_line))
        except StopIteration:
            self.messages.append(events.OutOfStock(sku=order_line.sku))
            return None
        batch.allocate(order_line)
        self.version_number += 1
        self.messages.append(events.Allocated(
            order_id=order_line.order_id,
            sku=order_line.sku,
            qty=order_line.qty,
            batch_ref=batch.reference
        ))
        return batch.reference

    def change_batch_quantity(self, ref: str, qty: int):
        batch = next(b for b in self.batches if b.reference == ref)
        batch.purchased_quantity = qty
        while batch.available_quantity < 0:
            line = batch.deallocate_one()
            self.messages.append(commands.Allocate(order_id=line.order_id, sku=line.sku, qty=line.qty))


================================================
FILE: books/python-architecture-patterns/src/redis_consumer.py
================================================
import json
from typing import Dict

from redis.client import Redis

from src import config
from src.bootstrap import bootstrap
from src.domain import (
    commands,
    events,
)
from src.service_layer.message_bus import MessageBus

r = Redis(**config.get_redis_host_and_port())


def main():
    bus = bootstrap()
    pubsub = r.pubsub(ignore_subscribe_messages=True)
    pubsub.subscribe("change_batch_quantity")

    for m in pubsub.listen():
        _handle_change_batch_quantity(m, bus)


def _handle_change_batch_quantity(message: Dict, bus: MessageBus):
    event = events.BatchQuantityChanged(**json.loads(message["data"]))
    cmd = commands.ChangeBatchQuantity(ref=event.batch_ref, qty=event.qty)

    bus.handle(message=cmd)


if __name__ == "__main__":
    main()


================================================
FILE: books/python-architecture-patterns/src/service_layer/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/src/service_layer/handlers.py
================================================
from src.adapters import redis_publisher
from src.adapters.notifications import AbstractNotifications
from src.domain import (
    commands,
    events,
)

from src.domain.model import (
    Batch,
    OrderLine,
    Product,
)
from src.service_layer.unit_of_work import (
    AbstractUnitOfWork,
    UnitOfWork,
)


class InvalidSku(Exception):
    pass


def allocate(command: commands.Allocate, uow: AbstractUnitOfWork) -> str:
    order_line = OrderLine(order_id=command.order_id, sku=command.sku, qty=command.qty)
    with uow:
        product = uow.products.get(sku=command.sku)

        if not product:
            raise InvalidSku(f"Invalid SKU: {command.sku}")

        batch_ref = product.allocate(order_line)
        uow.commit()

    return batch_ref


def add_batch(command: commands.CreateBatch, uow: AbstractUnitOfWork):
    with uow:
        product = uow.products.get(command.sku)
        if not product:
            product = Product(sku=command.sku, batches=[])
            uow.products.add(product)
        product.batches.append(Batch(reference=command.ref, sku=command.sku, purchased_quantity=command.qty, eta=command.eta))
        uow.commit()


def change_batch_quantity(command: commands.ChangeBatchQuantity, uow: AbstractUnitOfWork):
    with uow:
        product = uow.products.get_by_batch_ref(command.ref)
        product.change_batch_quantity(ref=command.ref, qty=command.qty)
        uow.commit()


def send_out_of_stock_notification(event: events.OutOfStock, notifications: AbstractNotifications):
    notifications.send("stock@made.com", f"Out of stock for {event.sku}")


def publish_allocated_event(event: events.Allocated, uow: AbstractUnitOfWork):
    redis_publisher.publish("line_allocated", event)


def add_allocation_to_read_model(event: events.Allocated, uow: UnitOfWork):
    with uow:
        uow.session.execute(
            """
            INSERT INTO allocationsview (order_id, sku, batch_ref)
            VALUES (:order_id, :sku, :batch_ref)
            """,
            dict(order_id=event.order_id, sku=event.sku, batch_ref=event.batch_ref),
        )
        uow.commit()


def remove_allocation_from_read_model(event: events.Deallocated, uow: UnitOfWork):
    with uow:
        uow.session.execute(
            """
            DELETE FROM allocationsview
            WHERE order_id = :order_id AND sku = :sku
            """,
            dict(order_id=event.order_id, sku=event.sku),
        )
        uow.commit()


def reallocate(event: events.Deallocated, uow: AbstractUnitOfWork, ):
    with uow:
        product = uow.products.get(sku=event.sku)
        product.messages.append(commands.Allocate(**event.dict()))
        uow.commit()


================================================
FILE: books/python-architecture-patterns/src/service_layer/message_bus.py
================================================
import logging
from typing import (
    Callable,
    Dict,
    List,
    Type,
    Union,
)

from src.domain import (
    commands,
    events,
)
from src.service_layer import handlers
from src.service_layer.unit_of_work import AbstractUnitOfWork

logger = logging.getLogger(__name__)

Message = Union[commands.Command, events.Event]

EVENT_HANDLERS: Dict[Type[events.Event], List[Callable]] = {
    events.OutOfStock: [handlers.send_out_of_stock_notification],
    events.Allocated: [handlers.publish_allocated_event, handlers.add_allocation_to_read_model],
    events.Deallocated: [handlers.remove_allocation_from_read_model, handlers.reallocate]
}

COMMAND_HANDLERS: Dict[Type[commands.Command], Callable] = {
    commands.CreateBatch: handlers.add_batch,
    commands.ChangeBatchQuantity: handlers.change_batch_quantity,
    commands.Allocate: handlers.allocate,
}


class MessageBus:
    def __init__(self, uow: AbstractUnitOfWork, event_handlers: Dict[Type[events.Event], List[Callable]], command_handlers: Dict[Type[commands.Command], Callable]):
        self.uow = uow
        self.event_handlers = event_handlers
        self.command_handlers = command_handlers

        self.queue: List[Message] = []

    def handle(self, message: Message):
        self.queue = [message]
        while self.queue:
            message = self.queue.pop(0)
            if isinstance(message, events.Event):
                self._handle_event(message)
            elif isinstance(message, commands.Command):
                self._handle_command(message)
            else:
                raise Exception(f"{message} was not an Event or Command")

    def _handle_event(self, event: events.Event):
        for handler in self.event_handlers[type(event)]:
            try:
                logger.debug(f"Handling event {event} with handler {handler}")
                handler(event)
                self.queue.extend(self.uow.collect_new_messages())
            except Exception as e:
                logger.exception(f"Exception handling event {event}: {e}")
                continue

    def _handle_command(self, command: commands.Command):
        try:
            handler = self.command_handlers[type(command)]
            handler(command)
            self.queue.extend(self.uow.collect_new_messages())
        except Exception:
            logger.exception("Exception handling command %s", command)
            raise


================================================
FILE: books/python-architecture-patterns/src/service_layer/unit_of_work.py
================================================
from __future__ import annotations

from abc import (
    ABC,
    abstractmethod,
)
from typing import Optional

from sqlmodel import (
    Session,
    create_engine,
)

from src.adapters.repository import (
    Repository,
    TrackingRepository,
)
from src.config import get_postgres_uri


class AbstractUnitOfWork(ABC):
    products: TrackingRepository

    def __enter__(self) -> AbstractUnitOfWork:
        return self

    def __exit__(self, *args):
        self.rollback()

    def commit(self):
        self._commit()

    def collect_new_messages(self):
        for product in self.products.seen:
            while product.messages:
                yield product.messages.pop(0)

    @abstractmethod
    def rollback(self):
        raise NotImplementedError

    @abstractmethod
    def _commit(self):
        raise NotImplementedError


def default_session():
    return Session(create_engine(get_postgres_uri(), isolation_level="REPEATABLE READ"))


class UnitOfWork(AbstractUnitOfWork):
    def __init__(self, session: Optional[Session] = None):
        # 'default_session()' can not be in the '__init__' because it would be evaluated only once:
        self.session = session if session else default_session()

    def __enter__(self):
        self.products = TrackingRepository(repo=Repository(self.session))
        return super().__enter__()

    def __exit__(self, *args):
        super().__exit__(*args)
        self.session.close()

    def rollback(self):
        self.session.rollback()

    def _commit(self):
        self.session.commit()


================================================
FILE: books/python-architecture-patterns/src/views.py
================================================
from typing import (
    Dict,
    List,
)

from src.service_layer.unit_of_work import UnitOfWork


def allocations(order_id: str, uow: UnitOfWork) -> List[Dict]:
    with uow:
        results = uow.session.execute(
            "SELECT sku, batch_ref FROM allocationsview WHERE order_id = :order_id",
            dict(order_id=order_id),
        )
    return [dict(r) for r in results]


================================================
FILE: books/python-architecture-patterns/tests/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/tests/conftest.py
================================================
import pytest
import redis
from sqlmodel import (
    Session,
    create_engine,
)
from starlette.testclient import TestClient
from tenacity import (
    retry,
    stop_after_delay,
)

from src import config
from src.adapters.orm import (
    clean_db_and_tables,
    create_db_and_tables,
)
from src.app import api


@pytest.fixture
def in_memory_db():
    engine = create_engine("sqlite:///:memory:")
    clean_db_and_tables(engine)
    create_db_and_tables(engine)
    return engine


@pytest.fixture
def session(in_memory_db):
    create_db_and_tables(in_memory_db)
    yield Session(in_memory_db)
    clean_db_and_tables(in_memory_db)


@retry(stop=stop_after_delay(10))
def wait_for_postgres_to_come_up(engine):
    engine.connect()


@retry(stop=stop_after_delay(10))
def wait_for_redis_to_come_up():
    r = redis.Redis(**config.get_redis_host_and_port())
    return r.ping()


@pytest.fixture(scope="session")
def postgres_db():
    engine = create_engine(config.get_postgres_uri())
    wait_for_postgres_to_come_up(engine)
    clean_db_and_tables(engine)
    create_db_and_tables(engine)
    return engine


@pytest.fixture
def postgres_session(postgres_db):
    create_db_and_tables(postgres_db)
    yield Session(postgres_db)
    clean_db_and_tables(postgres_db)


@pytest.fixture
def client():
    return TestClient(api)


================================================
FILE: books/python-architecture-patterns/tests/e2e/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/tests/e2e/api_client.py
================================================
import json

from src.domain.model import (
    Batch,
    OrderLine,
)


def post_to_allocate(client, order_id, sku, qty):
    return client.post("/allocate", json=json.loads(OrderLine(order_id=order_id, sku=sku, qty=qty).json()))


def get_allocation(client, order_id):
    return client.post(f"/allocate/{order_id}")


def post_to_add_batch(client, ref, sku, qty, eta):
    return client.post("/add_batch", json=json.loads(Batch(reference=ref, sku=sku, purchased_quantity=qty, eta=eta).json()))


================================================
FILE: books/python-architecture-patterns/tests/e2e/redis_client.py
================================================
import json
import redis

from src import config

r = redis.Redis(**config.get_redis_host_and_port())


def subscribe_to(channel):
    pubsub = r.pubsub()
    pubsub.subscribe(channel)
    confirmation = pubsub.get_message(timeout=3)
    assert confirmation["type"] == "subscribe"
    return pubsub


def publish_message(channel, message):
    r.publish(channel, json.dumps(message))


================================================
FILE: books/python-architecture-patterns/tests/e2e/test_app.py
================================================
from datetime import date
from uuid import uuid4

from tests.e2e.api_client import (
    get_allocation,
    post_to_add_batch,
    post_to_allocate,
)


def random_suffix():
    return uuid4().hex[:6]


def random_sku(name=''):
    return f"sku-{name}-{random_suffix()}"


def random_batch_ref(name=''):
    return f"batch-{name}-{random_suffix()}"


def random_order_id(name=''):
    return f"order-{name}-{random_suffix()}"


def test_happy_path_returns_200_and_allocated_batch(client):
    sku, other_sku = random_sku(), random_sku("other")
    order_id = random_order_id()
    early_batch, later_batch, other_batch = random_batch_ref('1'), random_batch_ref('2'), random_batch_ref('3')
    post_to_add_batch(client, later_batch, sku, 100, date(2011, 1, 2))
    post_to_add_batch(client, early_batch, sku, 100, date(2011, 1, 1))
    post_to_add_batch(client, other_batch, other_sku, 100, None)

    response = post_to_allocate(client=client, order_id=order_id, sku=sku, qty=3)
    assert response.status_code == 200, response.status_code

    response = get_allocation(client=client, order_id=order_id)
    assert response.status_code == 200
    assert response.json() == [{"sku": sku, "batch_ref": early_batch}]


def test_unhappy_path_returns_400_and_error_message(client):
    unknown_order_id, unknown_sku = random_order_id(), random_sku()
    response = post_to_allocate(client=client, order_id=random_order_id(), sku=unknown_sku, qty=20)

    assert response.status_code == 400
    assert response.json()["message"] == f"Invalid SKU: {unknown_sku}"

    response = get_allocation(client=client, order_id=unknown_order_id)
    assert response.status_code == 400


================================================
FILE: books/python-architecture-patterns/tests/e2e/test_external_events.py
================================================
import json
from datetime import date

import pytest
from tenacity import (
    Retrying,
    stop_after_delay,
)

from tests.e2e import redis_client
from tests.e2e.api_client import (
    post_to_add_batch,
    post_to_allocate,
)
from tests.e2e.test_app import (
    random_batch_ref,
    random_order_id,
    random_sku,
)


def test_change_batch_quantity_leading_to_allocation(client):
    order_id, sku = random_order_id(), random_sku()
    earlier_batch, later_batch = random_batch_ref("old"), random_batch_ref("new")
    post_to_add_batch(client=client, ref=earlier_batch, sku=sku, qty=10, eta=date(2021, 1, 1))
    post_to_add_batch(client=client, ref=later_batch, sku=sku, qty=10, eta=date(2021, 1, 2))

    response = post_to_allocate(client=client, order_id=order_id, sku=sku, qty=10)
    assert response.status_code == 200

    subscription = redis_client.subscribe_to("line_allocated")

    redis_client.publish_message("change_batch_quantity", {"batch_ref": earlier_batch, "qty": 5})

    # it may take some for message to arrive:
    for attempt in Retrying(stop=stop_after_delay(3), reraise=True):
        with attempt:
            message = subscription.get_message(timeout=1)
            if not message:
                continue
            data = json.loads(message["data"])
            assert data["order_id"] == order_id
            assert data["batch_ref"] == later_batch
    if not message:
        pytest.fail("Message not fetched")


================================================
FILE: books/python-architecture-patterns/tests/integration/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/tests/integration/test_uow.py
================================================
from threading import Thread
from time import sleep
from typing import List

import pytest
from sqlalchemy.orm import selectinload
from sqlmodel import (
    Session,
    select,
)

from src.domain.model import (
    Batch,
    OrderLine,
    Product,
)
from src.service_layer.unit_of_work import UnitOfWork
from tests.e2e.test_app import random_batch_ref

sku = "GENERIC-SOFA"


def insert_batch(session, batch_id):
    session.add(Product(sku=sku, batches=[Batch(reference=batch_id, sku=sku, purchased_quantity=100, eta=None)]))


def get_allocated_batch_ref(session, order_id, sku):
    batches = session.exec(select(Batch).where(Batch.sku == sku).options(selectinload(Batch.allocations))).all()
    batch = next(batch for batch in batches for allocation in batch.allocations if allocation.order_id == order_id)
    return batch.reference


def test_uow_retrieve_batch_and_allocate_to_it(session):
    insert_batch(session, "batch1")
    session.commit()

    with UnitOfWork(session) as uow:
        product = uow.products.get(sku=sku)
        line = OrderLine(order_id="o1", sku=sku, qty=10)
        product.allocate(order_line=line)
        uow.commit()

    assert get_allocated_batch_ref(session, "o1", "GENERIC-SOFA") == "batch1"


def test_rolls_back_uncommitted_work_by_default(in_memory_db):
    old_session, new_session = Session(in_memory_db), Session(in_memory_db)
    with UnitOfWork():
        insert_batch(old_session, "batch1")
    assert list(new_session.exec(select(Batch)).all()) == []


def test_rolls_back_on_error(in_memory_db):
    old_session, new_session = Session(in_memory_db), Session(in_memory_db)

    class MyException(Exception):
        pass

    with pytest.raises(MyException):
        with UnitOfWork(old_session):
            insert_batch(old_session, "batch1")
            raise MyException()

    assert list(new_session.exec(select(Batch)).all()) == []


def try_to_allocate(order_id: str, exceptions: List[Exception]):
    line = OrderLine(order_id=order_id, sku=sku, qty=10)
    try:
        with UnitOfWork() as uow:
            product = uow.products.get(sku)
            product.allocate(line)
            sleep(0.2)
            uow.commit()
    except Exception as e:
        exceptions.append(e)


def test_concurrent_updates_to_version_number_are_not_allowed(postgres_db):
    session = Session(postgres_db)
    insert_batch(session, random_batch_ref())
    session.commit()
    exceptions = []

    t1, t2 = Thread(target=try_to_allocate, args=("order_id_1", exceptions)), Thread(target=try_to_allocate, args=("order_id_2", exceptions))
    t1.start(), t2.start(), t1.join(), t2.join()

    product = session.exec(select(Product).where(Product.sku == sku)).one()
    assert product.version_number == 1
    assert "could not serialize access due to concurrent update" in str(exceptions[0])


================================================
FILE: books/python-architecture-patterns/tests/integration/test_views.py
================================================
from datetime import date
from unittest.mock import Mock

import pytest
from sqlmodel import Session

from src import views
from src.adapters.orm import clean_db_and_tables
from src.bootstrap import bootstrap
from src.domain import commands
from src.service_layer.unit_of_work import UnitOfWork

today = date.today()


@pytest.fixture
def sqlite_bus(in_memory_db):
    bus = bootstrap(
        start_orm=True,
        uow=UnitOfWork(Session(in_memory_db)),
        notifications=Mock(),
        publish=lambda *args: None,
    )
    yield bus
    clean_db_and_tables(in_memory_db)


def test_allocations_view(sqlite_bus):
    sqlite_bus.handle(commands.CreateBatch("sku1batch", "sku1", 50, None))
    sqlite_bus.handle(commands.CreateBatch("sku2batch", "sku2", 50, today))
    sqlite_bus.handle(commands.Allocate("order1", "sku1", 20))
    sqlite_bus.handle(commands.Allocate("order1", "sku2", 20))

    sqlite_bus.handle(commands.CreateBatch("sku1batch-later", "sku1", 50, today))
    sqlite_bus.handle(commands.Allocate("other_order", "sku1", 30))
    sqlite_bus.handle(commands.Allocate("other_order", "sku2", 10))

    assert views.allocations("order1", sqlite_bus.uow) == [
        {"sku": "sku1", "batch_ref": "sku1batch"},
        {"sku": "sku2", "batch_ref": "sku2batch"},
    ]


def test_deallocation(sqlite_bus):
    sqlite_bus.handle(commands.CreateBatch("b1", "sku1", 50, None))
    sqlite_bus.handle(commands.CreateBatch("b2", "sku1", 50, today))
    sqlite_bus.handle(commands.Allocate("o1", "sku1", 40))
    sqlite_bus.handle(commands.ChangeBatchQuantity("b1", 10))

    assert views.allocations("o1", sqlite_bus.uow) == [
        {"batch_ref": "b1", "sku": "sku1"},
        {"batch_ref": "b2", "sku": "sku1"}
    ]


================================================
FILE: books/python-architecture-patterns/tests/unit/__init__.py
================================================


================================================
FILE: books/python-architecture-patterns/tests/unit/test_batches.py
================================================
from datetime import date

from src.domain.model import (
    Batch,
    OrderLine,
)


def batch_and_line(sku, batch_quantity, line_quantity):
    return Batch(reference="batch-001", sku=sku, purchased_quantity=batch_quantity, eta=date.today()), OrderLine(order_id="order-123", sku=sku, qty=line_quantity)


def test_allocating_to_batch_reduces_available_quantity():
    batch, line = batch_and_line("SMALL-TABLE", 20, 2)
    batch.allocate(line)
    assert batch.available_quantity == 18


def test_can_allocate_if_available_greater_than_required():
    large_batch, small_line = batch_and_line("ELEGANT-LAMP", 20, 2)
    assert large_batch.can_allocate(small_line)


def test_cannot_allocate_if_available_smaller_than_required():
    small_batch, large_line = batch_and_line("ELEGANT-LAMP", 2, 20)
    assert not small_batch.can_allocate(large_line)


def test_not_allocate_if_available_equal_to_required():
    small_batch, large_line = batch_and_line("ELEGANT-LAMP", 2, 2)
    assert small_batch.can_allocate(large_line)


def test_cannot_allocate_if_skus_dont_match():
    batch = Batch(reference="batch-001", sku="UNCOMFORTABLE-CHAIN", purchased_quantity=100, eta=None)
    different_sku_line = OrderLine(order_id="order-123", sku="EXPENSIVE-TOASTER", qty=10)
    assert not batch.can_allocate(different_sku_line)


def test_can_only_deallocate_allocated_lines():
    batch, unallocated_line = batch_and_line("DECORATIVE-TRINKET", 20, 2)
    batch.deallocate(unallocated_line)
    assert batch.available_quantity == 20


def test_allocation_is_idempotent():
    batch, line = batch_and_line("ANGULAR-DESK", 20, 2)
    batch.allocate(line)
    batch.allocate(line)
    assert batch.available_quantity == 18


================================================
FILE: books/python-architecture-patterns/tests/unit/test_handlers.py
================================================
from __future__ import annotations

from collections import defaultdict
from datetime import date
from typing import (
    Dict,
    List,
    Optional,
)

import pytest

from src.adapters.notifications import AbstractNotifications
from src.adapters.repository import (
    AbstractRepository,
    TrackingRepository,
)
from src.bootstrap import bootstrap
from src.domain import commands
from src.domain.model import Product
from src.service_layer.handlers import InvalidSku
from src.service_layer.unit_of_work import AbstractUnitOfWork


class FakeRepository(AbstractRepository):
    def __init__(self, products):
        super().__init__()
        self._products = set(products)

    def add(self, product: Product):
        self._products.add(product)

    def get(self, sku: str) -> Optional[Product]:
        return next((product for product in self._products if product.sku == sku), None)

    def get_by_batch_ref(self, ref: str) -> Optional[Product]:
        return next((product for product in self._products for batch in product.batches if batch.reference == ref), None)


class FakeUnitOfWork(AbstractUnitOfWork):
    def __init__(self):
        self.products = TrackingRepository(repo=FakeRepository([]))
        self.committed = False

    def rollback(self):
        pass

    def _commit(self):
        self.committed = True


class FakeNotifications(AbstractNotifications):
    def __init__(self):
        self.sent: Dict[str, List[str]] = defaultdict(list)

    def send(self, destination, message):
        self.sent[destination].append(message)


def bootstrap_test_app():
    return bootstrap(
        start_orm=False,
        uow=FakeUnitOfWork(),
        notifications=FakeNotifications(),
        publish=lambda *args: None,
    )


class TestAddBatch:
    def test_for_new_product(self):
        bus = bootstrap_test_app()
        bus.handle(commands.CreateBatch(ref="b1", sku="CRUNCHY-ARMCHAIN", qty=100))
        assert bus.uow.products.get("CRUNCHY-ARMCHAIN") is not None
        assert bus.uow.committed

    def test_for_existing_product(self):
        bus = bootstrap_test_app()
        bus.handle(commands.CreateBatch(ref="b1", sku="GARISH-RUG", qty=100))
        bus.handle(commands.CreateBatch(ref="b2", sku="GARISH-RUG", qty=99))
        assert "b2" in [b.reference for b in bus.uow.products.get("GARISH-RUG").batches]


class TestAllocate:
    def test_errors_for_invalid_sku(self):
        bus = bootstrap_test_app()
        bus.handle(commands.CreateBatch(ref="b1", sku="AREALSKU", qty=100))
        with pytest.raises(InvalidSku, match="Invalid SKU: NONEXISTENTSKU"):
            bus.handle(commands.Allocate(order_id="o1", sku="NONEXISTENTSKU", qty=10))

    def test_commits(self):
        bus = bootstrap_test_app()
        bus.handle(commands.CreateBatch(ref="b1", sku="OMINOUS-MIRROR", qty=100))
        bus.handle(commands.Allocate(order_id="o1", sku="OMINOUS-MIRROR", qty=10))
        assert bus.uow.committed

    def test_sends_email_on_out_of_stock_error(self):
        fake_notifications = FakeNotifications()
        bus = bootstrap(
            start_orm=False,
            uow=FakeUnitOfWork(),
            notifications=fake_notifications,
            publish=lambda *args: None,
        )
        bus.handle(commands.CreateBatch(ref="b1", sku="POPULAR-CURTAINS", qty=9))
        bus.handle(commands.Allocate(order_id="o1", sku="POPULAR-CURTAINS", qty=10))
        assert fake_notifications.sent["stock@made.com"] == ["Out of stock for POPULAR-CURTAINS"]


class TestChangeBatchQuantity:
    def test_changes_available_quantity(self):
        bus = bootstrap_test_app()
        bus.handle(commands.CreateBatch(ref="batch1", sku="ADORABLE-SETTEE", qty=100))

        [batch] = bus.uow.products.get("ADORABLE-SETTEE").batches
        assert batch.available_quantity == 100

        bus.handle(commands.ChangeBatchQuantity(ref="batch1", qty=50))
        assert batch.available_quantity == 50

    def test_reallocates_if_necessary(self):
        bus = bootstrap_test_app()
        event_history = [
            commands.CreateBatch(ref="batch1", sku="INDIFFERENT-TABLE", qty=50),
            commands.CreateBatch(ref="batch2", sku="INDIFFERENT-TABLE", qty=50, eta=date.today()),
            commands.Allocate(order_id="order1", sku="INDIFFERENT-TABLE", qty=20),
            commands.Allocate(order_id="order2", sku="INDIFFERENT-TABLE", qty=20),
        ]
        for e in event_history:
            bus.handle(e)

        [batch_1, batch_2] = bus.uow.products.get("INDIFFERENT-TABLE").batches
        assert batch_1.available_quantity == 10
        assert batch_2.available_quantity == 50

        bus.handle(commands.ChangeBatchQuantity(ref="batch1", qty=25))
        assert batch_1.available_quantity == 5
        assert batch_2.available_quantity == 30


================================================
FILE: books/python-architecture-patterns/tests/unit/test_product.py
================================================
from datetime import date

from src.domain import events
from src.domain.model import (
    Batch,
    OrderLine,
    Product,
)


def test_prefers_current_stock_batches_to_shipments():
    in_stock_batch = Batch(reference="in-stock-batch", sku="RETRO-CLOCK", purchased_quantity=100, eta=None)
    shipment_batch = Batch(reference="shipment-batch", sku="RETRO-CLOCK", purchased_quantity=100, eta=None)
    line = OrderLine(order_id="oref", sku="RETRO-CLOCK", qty=10)
    product = Product(sku="RETRO-CLOCK", batches=[in_stock_batch, shipment_batch])

    product.allocate(line)

    assert in_stock_batch.available_quantity == 90
    assert shipment_batch.available_quantity == 100


def test_prefers_earlier_batches():
    earliest = Batch(reference="speedy-batch", sku="MINIMALIST-SPOON", purchased_quantity=100, eta=date(2022, 1, 7))
    medium = Batch(reference="normal-batch", sku="MINIMALIST-SPOON", purchased_quantity=100, eta=date(2022, 1, 8))
    latest = Batch(reference="slow-batch", sku="MINIMALIST-SPOON", purchased_quantity=100, eta=date(2022, 1, 9))
    line = OrderLine(order_id="oref", sku="MINIMALIST-SPOON", qty=10)
    product = Product(sku="MINIMALIST-SPOON", batches=[medium, earliest, latest])

    product.allocate(line)

    assert earliest.available_quantity == 90
    assert medium.available_quantity == 100
    assert latest.available_quantity == 100


def test_returns_allocated_batch_ref():
    in_stock_batch = Batch(reference="in-stock-batch-ref", sku="HIGHBROW-POSTER", purchased_quantity=100, eta=None)
    shipment_batch = Batch(reference="shipment-batch-ref", sku="HIGHBROW-POSTER", purchased_quantity=100, eta=date(2022, 1, 7))
    line = OrderLine(order_id="oref", sku="HIGHBROW-POSTER", qty=10)
    product = Product(sku="HIGHBROW-POSTER", batches=[in_stock_batch, shipment_batch])

    allocation = product.allocate(line)

    assert allocation == in_stock_batch.reference


def test_records_out_of_stock_event_if_cannot_allocate():
    batch = Batch(reference="batch", sku="SMALL-FORM", purchased_quantity=10, eta=date(2022, 1, 7))
    product = Product(sku="SMALL-FORK", batches=[batch])
    product.allocate(OrderLine(order_id="oref", sku="SMALL-FORM", qty=10))

    allocation = product.allocate(OrderLine(order_id="oref", sku="SMALL-FORM", qty=1))

    assert product.messages[-1] == events.OutOfStock(sku="SMALL-FORM")
    assert allocation is None


================================================
FILE: books/refactoring.md
================================================
[go back](https://github.com/pkardas/learning)

# Refactoring: Improving the Design of Existing Code

Book by Martin Fowler (Second Edition)

- [Chapter 1: Refactoring: A First Example](#chapter-1-refactoring-a-first-example)
- [Chapter 2: Principles in Refactoring](#chapter-2-principles-in-refactoring)
- [Chapter 3: Bad Smells in Code](#chapter-3-bad-smells-in-code)
- [Chapter 4: Building Tests](#chapter-4-building-tests)
- [Chapter 5: Introducing the Catalog](#chapter-5-introducing-the-catalog)
- [Chapter 6: A First Set of Refactorings](#chapter-6-a-first-set-of-refactorings)
- [Chapter 7: Encapsulation](#chapter-7-encapsulation)
- [Chapter 8: Moving Features](#chapter-8-moving-features)
- [Chapter 9: Organising Data](#chapter-9-organising-data)
- [Chapter 10: Simplifying Conditional Logic](#chapter-10-simplifying-conditional-logic)
- [Chapter 11: Refactoring APIs](#chapter-11-refactoring-apis)
- [Chapter 12: Dealing with Inheritance](#chapter-12-dealing-with-inheritance)

## Chapter 1: Refactoring: A First Example

A poorly designed system is hard to change - because it is hard to figure out what to change and hoe these changes will
interact with existing code.

> When you have to add a feature to a program but the code is not structured in a convenient way, first refactor the
> program to make it easy to add the feature, then add the feature.

Before making any changes, start with self-checking tests (assertions checked by testing framework). Tests can be
considered as bug detectors, they should catch any change that introduces bugs.

Refactoring changes the programs in small steps, so if you make a mistake, it is easy to find where the bug is. Author
suggests committing after each successful refactoring, so it is easier get back to a working state, then he squashes
changes into more significant commits before pushing changes to the remote repository.

When refactoring a long functions, mentally try to identify points that separate different parts of the overall
behaviour (decomposition). Extracting a function is a common refactoring technique.

> Any fool can write code that a computer can understand. Good programmers write code that humans can understand.

Other techniques discussed also later: Replace Temp with Query, Inline Variable, Change Function Declaration, Split
Loop, Slide Statements.

Think of the best name at the moment and rename it later. Breaking large functions into smaller, only adds value if the
names are good.

> Programmers are poor judges of how code actually performs. Many of our intuitions are broken by clever compilers,
> modern caching techniques, .... The performance of software usually depends on just a few parts of the code, and
> changes anywhere else don't make an appreciable difference.

ANYHOW, if refactoring introduces performance slow-downs, finish refactoring first and then do performance tuning.

Mutable data quickly becomes something rotten.

> Always leave the code base healthier than when you found it. It will never be perfect, but it should be better.

> A true test of good code is how easy it is to change it. Code should be obvious.

When doing refactoring, take small steps, each step should leave code in a working state that compiles and passes its
tests.

## Chapter 2: Principles in Refactoring

Refactoring (noun) - a change made to the internal structure of software to make it easier to understand and cheaper to
modify without changing its observable behaviour.

Refactoring (verb) - to restructure software by applying a series of refactorings without changing its observable
behaviour.

When doing refactoring, code should not spend much time in a broken state, meaning it allows stopping at any moment even
if you haven't finished. If someone says their code was broken for a couple of days while they are refactoring, you can
be pretty sure they were not refactoring.

Two Hats - when developing new functionalities - do not change existing code, when refactoring - do not add new
functionalities. Swap hats: refactor, add functionality, refactor, ...

Why should we refactor?

- software design improvement - changes are made to achieve short-term goals, because of that, code looses its
  structure, regular refactoring help keep the code in shape. Important aspect of refactoring is eliminating duplicated
  code.
- makes software easier to understand - think about future developers, decrease time needed to make a change. You don't
  have to remember every aspect of code, make it easy to understand and decrease load on your brain.
- helps in finding bugs - clarify the structure, certain assumptions.
- helps programming faster - adding new features might be difficult in a system full of patches and patches for patches,
  clear structure allows adding new capabilities faster. Good design allows to quickly find place where a change needs
  to be made. Also, if code is clear, it is less likely to introduce a bug. Code base should be a platform for building
  new features for its domain.

> The Rule of Three - The first time you do something, you just do it. The second time you do something similar, you
> wince at the duplication, but you do the duplicate anyway. The third time you do something similar, you refactor.

When should we refactor?

- preparatory refactoring - building a foundation for a new feature.

    - > It is like you want to go 100 km east but instead of traipsing through the woods, you drive 20 kms north to the
      > highway, and then you are going 3x the speed you could have if you just went straight there.

- comprehension refactoring - making code easier to understand. Move understanding of a subject from head to code
  itself.

- litter-pickup refactoring - make small changes around place you are currently viewing - Boy Scout Rule.

- planned and opportunistic refactoring - refactoring should happen when doing other things, planned refactorings are
  usually required in teams that neglected refactoring.

- long-term refactoring - refactoring may take weeks because of new library or pull some section of code out into a
  component that can be shared between teams - even in such cases refactoring should be performed in small steps.

- refactoring in a code review - code reviews help spread knowledge, through a development team. Code may look clear to
  me but not for my team. Code reviews give the opportunity for more people to suggest useful ideas.

Sometimes it is easier to rewrite than refactor. The decision to refactor or rewrite requires good judgement and
experience.

However, there are a couple of problems associated to refactoring:

- some people see refactoring as something that is slowing down development (which is not really true), this should be
  explained - the economic benefits of refactoring should always be the driving factor, we refactor because it makes us
  faster to add features and fix bugs.
- merge conflicts may be painful, especially in a team of multiple full-time developers, suggested approach is to use CI
    - Continuous Integration - each team member integrates with mainline at least once per day.
- to perform refactoring correctly you need to have good tests, code needs to be self-testing, without self-testing code
  refactoring carries high risk of introducing bugs
- refactoring legacy code is hard, but is a fantastic tool to help understand a legacy system. Legacy code is often
  missing tests, adding tests for legacy code is difficult because it wasn't designed with testing in mind.
- some time ago database refactoring was considered a problem era, currently we have migrations which are making
  database refactoring possible

Refactoring changed how people think about architecture (previously: completed before any development, now: changed
iteratively). YAGNI does not mean you need to neglect all architectural thinking.

In order to be fully agile, team has to be capable and enthusiastic refactorers. The first foundation for refactoring is
self-testing code, the second is CI.

Good programmers know that they rarely write clean code the first time around.

IDEs use the syntax tree to analyse and refactor code (e.g. changing variable name is on syntax tree level, not on text
level), this makes IDEs more powerful than text editors.

## Chapter 3: Bad Smells in Code

When you should start refactoring? It is a matter of intuition. However, there are some indicators.

MYSTERIOUS NAME - code needs to be mundane and clear, good name can save hours of puzzled incomprehension in the future.

DUPLICATED CODE - if you see the same code structure in more than one place, your program will be better if you find a
way to unify them., duplication means every time you read these copies you need to read them carefully and look for
differences.

LONG FUNCTION - the programs that live best and longest are those with short functions. Whenever you feel you need to
comment something - decompose. Even a single line is worth extracting if it needs an explanation. Conditionals and loops
are also signs for extractions.

LONG PARAMETER LIST - long lists of parameters are confusing - pass an object, use query on existing object or combine
function on object.

GLOBAL DATA - problem with global data is that it can be modified from any place in the code base, this leads to bugs.
Global data: global variables, class variables, singletons. Global data is especially nasty when it is mutable.

MUTABLE DATA - (from functional programming) data should never change, updating data structure should return a new copy
of the structure, leaving the old data pristine.

DIVERGENT CHANGE - making changes should be easy, if you need to, for example, edit 4 functions every time you add a new
financial instrument, something is off.

SHOTGUN SURGERY - every time you make a change, you have to make a lot of little edits to a lot of different classes,
when changes are all over the place, they are hard to find, and it is easy to miss an important change. In such case all
fields should be put in a single module.

FEATURE ENVY - for example: a function in one module spends more time communicating with functions or data inside
another module than it does within its own module - the function clearly wants to be with the data, so move function to
get it there. Put things together that change together.

DATA CLUMPS - some items enjoy hanging around together, same three or four data items appear together in lots of places

- you can group them together.

PRIMITIVE OBSESSION - many programmers are reluctant to create their own fundamental types which are useful for their
domain.

REPEATED SWITCHES - basically the same problem as in DUPLICATED CODE.

LOOPS - loops are less relevant in programming today because of presence of map and filter mechanisms.

LAZY ELEMENT - sometimes you may want to replace function with inline code or collapse objects hierarchy.

SPECULATIVE GENERALITY - all the special cases to handle situations that are not going to happen soon (YAGNI).

TEMPORARY FIELD - a class with a field which is set only in certain circumstances - difficult to understand.

MESSAGE CHAINS - client asks object for another object, which the client asks for yer another object - this might cause
a train wreck, navigating such code is difficult.

MIDDLE MAN - internal details of the object should be hidden from the rest of the world

INSIDER TRADING - modules should be separated to keep them whispering, if 2 modules have common interests, create a
third module for this communication.

LARGE CLASS - when class has too many fields it is a sign that it is doing too much, this means duplicated code, chaos
and death.

ALTERNATIVE CLASSES WITH DIFFERENT INTERFACES - if you are allowing substitution, classes have to have the same
interface.

DATA CLASS - classes with fields, setters and getters - nothing else. Such classes are often being manipulated in far
too much detail by other classes. You can try to move that behaviour into the data class.

REFUSED BEQUEST - wrong hierarchy, subclasses don't want or need what they are given.

COMMENTS - when you feel the need to write a comment, first try to refactor the code so that any comment becomes
superfluous.

## Chapter 4: Building Tests

Proper refactoring can not be done without proper tests. A suite of tests is a powerful bug detector that decapitates
the time it takes to find bugs.

TDD allows concentrating on the interface rather than the implementation, which is a good thing.

Always make sure a test will fail when it should (try to break your code, to see if test fails as well).

Testing should be risk-driven, you don't need to test every getter.

When you get a bug report, start by writing a unit test that exposes the bug.

The best measure for a good enough test suite is subjective: How confident are you that is someone introduces a defect
into your code, some test will fail?

## Chapter 5: Introducing the Catalog

The rest of the book is a catalog of refactorings. Each *Refactoring* has: name, sketch, motivation, mechanics and
examples.

## Chapter 6: A First Set of Refactorings

EXTRACT FUNCTION - write small functions.

INLINE FUNCTIONS - inverse of *extract function*, sometimes function body is as clear as the name. Helpful when you need
to group functions - first you join them and then extract functions.

EXTRACT VARIABLE - inverse of *inline variable*, expressions can become very complex and hard to read in such
situations, local variables may help break the expression down into something more manageable.

INLINE VARIABLE - inverse of *extract variable*, sometimes name doesn't communicate more than the expression itself.

CHANGE FUNCTION DECLARATION - if you see a function with the wrong name, change it as soon you understand what a better
name would be, so next time you are looking at the code you don't have to figure out what is going on. Often a good way
of improving name is to write a comment to describe the function's purpose - then turn that comment into a name (applies
to names as well). Adding / removing parameters can be done through introducing intermediate wrapping function.

ENCAPSULATE VARIABLE - encapsulate access to the variable using functions, instead of accessing data directly, do this
through single access point - function. Keeping data encapsulated is less important for immutable data.

RENAME VARIABLE - variables can do a lot to explain what programmer is up to (if he names it well).

INTRODUCE PARAMETER OBJECT - often a group of data items travel together, appear in function after function. Such group
is a data clump - this can be easily replaced with data structure.

Example:

```
def amountInvoiced(start: date, end: date)
def amountInvoiced(date_range: Range)
```

Grouping data into a structure is valuable because it makes explicit the relationship between the data items and reduces
the size of parameter lists. Grouping helps to identify new structures.

COMBINE FUNCTIONS INTO CLASS - when group of functions operate closely together on a common body of data, there is an
opportunity to form a class.

> Uniform access principle - All services offered by a module should be available through a uniform notation, which does
> not betray whether they are implemented through storage or through computation. With this, the client of the class
> can't tell whether the *value* is a field or derived value.

COMBINE FUNCTIONS INTO TRANSFORM - instead of aggregating function into classes you can build functions that are
enriching existing objects. Transformation is about producing essentially the same thing with some additional
information.

SPLIT PHASE - whenever you encounter code that does two things, look for a way to split it into separate modules. If
some processing has 2 stages, make the difference explicit by turning them into 2 separate modules.

## Chapter 7: Encapsulation

ENCAPSULATE RECORD - instead of using plain dictionaries, encapsulate them into object. With object, you can hide what
is stored and provide methods for all the values. The user does not have to care which value is calculated and which is
stored. **Dictionaries are useful** in many programming situations **but they are not explicit about their fields**.
Refactor implicit structures into explicit ones.

ENCAPSULATE COLLECTION - good idea is to ensure that the getter for the collection can not accidentally change it. One
way to prevent modification of a collection is to use some form of read-only proxy to the collection. Such proxy can
allow all reads but block any write to the collection. The most popular approach is to provide a getting method for the
collection, but make it return a copy of underlying collection.

Replacing `customer.orders.size` with `customer.num_of_orders` is not recommended, because adds a lot of extra code and
cripples the easy composability of collection operations.

If the team has the habit of not modifying collections outside the original module, it might be enough.

It is worse to be moderately paranoid about collections, rather copy them unnecessarily than debug errors due to
unexpected modifications. For example instead of sorting in place return a new copy.

REPLACE PRIMITIVE WITH OBJECT - simple facts can be represented by simple data items such as numbers or strings, as
development proceeds, those simple items aren't so simple anymore. This is one of the most important refactorings.
Starting with simple wrapping value with the object, you can extend the class with additional behaviours.

REPLACE TEMP WITH QUERY - using temporary variables allows referring to the value while explaining its meaning and
avoiding repeating the code that calculates it. But while using a variable is handy, it can often be worthwhile to go a
step further and use a function instead, mostly when the variable needs to be calculated multiple times across the
class.

EXTRACT CLASS - split classes containing too much logic into separate classes. Good signs for doing so:

- subset of the data and a subset of methods seem to go together
- data that usually change together or are particularly dependent on each other

Useful test: Ask question: what would happen if you remove a piece of data or a method, what other fields and methods
would become nonsense?

INLINE CLASS - inverse of *Extract Class*. Generally useful as intermediate step when performing refactoring, e.g. you
put all attributes in one class, just to split them later.

HIDE DELEGATE - Example: `person.department.manager` should be replaced with `person.manager` (additional getter hiding
delegate). Why? If delegate changes its interface, change has to propagated across all parts of the system.

REMOVE MIDDLE MAN - inverse of *Hide Delegate*. Sometimes forwarding introduced by Hide Delegate, becomes irritating.
Sometimes it is easier to call the delegate directly (violation of Law of Demeter, but author suggests better name:
Occasionally Useful Suggestion of Demeter).

SUBSTITUTE ALGORITHM - There are usually several ways to do the same thing, same is with algorithms. When you learn more
about the problem, you can realise there is an easier way to do it.

## Chapter 8: Moving Features

Another important part of refactoring is moving elements between contexts.

MOVE FUNCTION - one of the most straightforward reasons to move a function is when it references elements in other
contexts more than the one it currently resides in. Deciding to move a function rarely an easy decision. Examine the
current and candidate contexts for that function.

MOVE FIELD - programming involves writing a lot of code that implements behaviour - but the strength of a program is
really founded on its data structures. If I have a good set of data structures that match the problem, then my behaviour
code is simple and straightforward. Moving fields usually happen in the context of a broader set of changes.

MOVE STATEMENTS INTO FUNCTION - removing duplication is one of the best rules of thumb of healthy code. Look to combine
repeating code into the function. That way any future modifications to the repeating code can be done in one place and
used by all the callers.

MOVE STATEMENTS TO CALLERS - this is inverse of *Move Statements into Function*. Motivation for this refactoring is that
we rarely get the boundaries right. Sometimes common behaviour used in several places needs to vary in some of its
calls, that is why you can move the varying behaviour function to its callers.

REPLACE INLINE CODE WITH FUNCTION CALL - functions allow packaging bits of behaviour, this is useful for understanding

- a named function can explain the purpose of the code rather than its mechanics. Also, useful for deduplication.

SLIDE STATEMENTS - code is easier to understand when things that are related to each other appear together. If several
lines of code accesses the same data structure, it is best for them to be together rather than intermingled with code
accessing other data structures. You can also declare the variable just before you first use it.

SPLIT LOOP - you have often seen loops that are doing two different things at once just because they can do that with
one pass through a loop. But if you are doing two different things in the same loop, then whenever you need to modify
the loop you have to understand both things. By splitting loop, you ensure you only need to understand the behaviour you
need to modify. Many programmers are uncomfortable with this refactoring as it forces you to execute the loop twice.
REMINDER: Once you have your code clear, you can optimise it, and if the loop traversal is a bottleneck, it is easy to
slam the loops back together. But the actual iteration through even a large list I rarely a bottleneck, and splitting
the loops often enables other, more powerful optimisations.

REPLACE LOOP WITH PIPELINE - language environments provide better constructs than loops - the collection
pipeline (`input.filter(...).map(...)`). Logic much easier to follow if it is expressed as a pipeline. It can be read
from top to bottom to see how objects flow through the pipeline.

REMOVE DEAD CODE - decent compilers will remove unused code. But unused code is still a significant burden when trying
to understand how the software works. Once code is not used it should be deleted. If you need it sometime in future -
you have a version control system, so you can always dig it out again. Commenting out dead code was once a bad habit, it
was useful before version control systems were widely used or when they were inconvenient.

## Chapter 9: Organising Data

Data structures play an important role in our programs, so no surprise there are a clutch of refactorings that focus on
them.

SPLIT VARIABLE - Using a variable for two different things is very confusing for the reader. Any variable with more than
one responsibility should be replaced with multiple variables, one for each responsibility.

Exception: Collecting variables (e.g. `i = i + 1`) - often used for calculating sums, string concatenation, writing to
stream or adding to a collection - don't split it.

RENAME FIELD - Data structures are the key to understand what is going in inside the system. It is essential to keep
them clear. Rename fields in classes / records, so they are easy to understand.

REPLACE DERIVED VARIABLE WITH QUERY - One of the biggest sources of problems in software is mutable data. Data changes can
often couple together parts of code in awkward ways, with changes in one part leading to knock-on effects that are hard
to spot. Remove variables that can be easily calculated. A calculation often makes it clearer what the meaning of the
data is, and it is being protected by from being corrupted when you fail to update the variable as the source data
changes.

CHANGE REFERENCE TO VALUE - Instead of updating values of the nested objects, create new object with updated params.
Value objects are generally easier to reason about, particularly because they are immutable. Immutable data structures
are easier to work with.

CHANGE VALUE TO REFERENCE - (inverse of *Change Reference to Value*). A data structure may have several records linked
to the same logical data structure. The biggest difficulty in having physical copies of the same logical data occurs
when you need to update the shared data. Then you have to find all the copies and update them all. If you miss one, you
will get a troubling inconsistency in the data. In this case, it is often worthwhile to change the copied data into a
single reference.

## Chapter 10: Simplifying Conditional Logic

Much of the power of programs comes from their ability to implement conditional logic - but, sadly, much of the
complexity of programs lies in these conditionals.

DECOMPOSE CONDITIONAL - Length of a function is in itself a factor that makes it hared to read, but conditions increase
the difficulty. As with any large block of code, you can make your intention clearer by decomposing it and replacing
each chunk of code with a function call named after the intention of that chunk.

CONSOLIDATE CONDITIONAL EXPRESSION - Sometimes you can run into a series of conditional checks where each check is
different yet the resulting action is the same. When you see this, you can use `and` and `or` operators to consolidate
them into a single conditional check with a single result.

Consolidating is important because it makes it clearer by showing that you are making a single check that combines other
checks, and because it often sets you up for *Extract Function*. Extracting a condition is one of the most useful things
you can do to clarify code.

REPLACE NESTED CONDITIONAL WITH GUARD CLAUSES - Guard Clause says: "This isn't the core to this function, and if it
happens, do something and get out". In other words, if you know the result, return it immediately instead of assigning
to `result` variable, just to have one single return statement at the end of the function.

*// A guard clause is simply a check that immediately exits the function, either with a return statement or an
exception.*

REPLACE CONDITIONAL WITH POLYMORPHISM - It is possible to put logic in superclasses which allows reasoning about it
without having to worry about the variants. Each variant case can be put in a subclass. Complex conditional logic can
be improved using polymorphism. This feature can be overused, basic conditional logic should use basic conditional
statements.

INTRODUCE SPECIAL CASE - also known as: *Introduce Null Object*. Many parts of the system have the same reaction to a
particular value, you may want to bring that reaction into a single place. Special Case pattern is a mechanism that
captures all the common behaviour, this allows to replace most of special-case checks with simple calls. A common value
that needs special-case processing is null, which is why this pattern is often called the Null Object pattern.

INTRODUCE ASSERTION - Often, sections of code work only if certain conditions are true. Such assumptions are not often
stated explicitly, but can only be deducted by looking through an algorithm. Sometimes, these assumptions are stated with
a comment. A better technique is to make the assumption explicit by writing assertion. Failure of an assertion indicates
a programmer error. Assertions should never be checked by other parts of the system. Assertions should be written that
the program functions equally correctly if they all removed. Use assertions to check things that need to be true, use
them when you think they should never fail.

## Chapter 11: Refactoring APIs

Modules and functions are building the blocks of our software. APIs are the joints that we use to plug them together.
Making APIs easy to understand and use is difficult.

SEPARATING QUERY FROM MODIFIER - It is a good idea to clearly signal the difference between functions with side
effects and those without. A good rule to follow is that any function that returns value should not have *observable* (
e.g. cache does not count) side effects (command-query separation). Having a function that gives value without
observable side effects is very valuable because you can call this function as often as you like.

PARAMETRISE FUNCTION - If you see two functions that carry out very similar logic with different literal values, you can
remove duplication by using a single function with parameters for the different values.

REMOVE FLAG ARGUMENT - A flag argument is a function argument that the caller uses to indicate which logic the called
function should execute (via boolean value, enum or strings). Flags complicate the process of understanding what
function calls are available and how to call them. Boolean values are the worst since they don't convey their meaning to
the reader - what `true` means? Remove flag arguments. There is only one case for flag arguments - when there are more
than one flag arguments - making specialised function for every combination of values would greatly increase the
complexity. But on the other hand this is a signal of function doing too much.

PRESERVE WHOLE OBJECT - If you see code that derives couple of values from a record and then passes these values into a
function, replace those values with the whole record itself, letting the function body derive the value it needs. This
change reduces number of parameters and handles better future changes. Pulling several values from an object to do some
logic on them alone is a smell - *Feature Envy* - and usually a signal that this logic should be moved into the object
itself. If several bits of code only use the same subset of an object's features, then that may indicate a good
opportunity for *Extract Class*.

REPLACE PARAMETER WITH QUERY - (inverse of *Replace Query with Parameter*). The parameter list to a function should
summarise the points of variability of that function, indicating the primary ways in which that function may behave
differently. If a call passes in value that the function can easily determine for itself, that is a form of duplication.
When the parameter is present, determining its value is the caller's responsibility - otherwise, that responsibility
shifts to the function body. Usually habit should be to simplify life for callers, which implies moving responsibility
to the function body.

REPLACE QUERY WITH PARAMETER - (inverse of *Replace Parameter with Query*). You can move query to the parameter, you
force caller to figure out how to provide this value. This complicates life for callers of the functions (preferably
make life easier for customers).

REMOVE SETTING METHOD - Providing a setting method indicates that a field may be changed. If you don't want that field
to change once the object is created, do not provide a setting method (and make field immutable). Remove setter to
make it clear that updates make no sense after construction.

REPLACE CONSTRUCTOR WITH FACTORY FUNCTION - Constructors often come with awkward limitations that aren't there for
regular functions. Constructor name is fixed, often require special operator (`new`). A factory function from no such
limitations.

REPLACE FUNCTION WITH COMMAND - There are times when it is useful to encapsulate a function into its now object (command
object / command). Such an object is mostly built around a single method, whose request and execution is the purpose of
the object. A command offers a greater flexibility for the control and expression of a function than the plain function
mechanism. Commands can have operations such as `undo`. There are good reasons to use commands, but do not forget that
this flexibility comes at a price paid in complexity.

REPLACE COMMAND WITH FUNCTION - (inverse of *Replace Function with Command*) - Command object provide a powerful
mechanism for handling complex computations. Most of the time, you just want to invoke a function and have it to do its
thing. If the function isn't too complex, then a command object is more trouble than its worth and should be turned into
a regular function.

## Chapter 12: Dealing with Inheritance

Inheritance is a very useful and easy to misuse mechanism.

PULL UP METHOD - form of removing duplication (duplication is bad because there is risk that an alteration to one copy
will not be made to the other). Pulling method up means putting method in a parent class.

PULL UP CONSTRUCTOR BODY - Common constructor behaviour should reside in the superclass.

PUSH DOWN METHOD - (inverse of *Pull Up Method*). If a method is only relevant to someone subclass (or a small
proportion of subclasses), removing it from the superclass and putting it only on the subclass makes that clearer. You
can only do this refactoring if the caller knows it is working with a particular subclass - otherwise, use *Replace
Conditional with Polymorphism* with some placebo behaviour on the superclass.

PUSH DOWN FIELD - If a field is only used by one subclass (or a small proportion of subclasses), move it to those
subclasses.

REPLACE TYPE CODE WITH SUBCLASS - Instead of using *flag* in the object indicating type of the class (
e.g. `Employe(engineer)`) create specialised superclass.

REMOVE SUBCLASS - (inverse of *Replace Type Code with Subclasses*). Subclasses are useful, but as software system
evolves, subclasses can lose their value. A subclass that does too little incur a cost in understanding that is no
longer worthwhile. When that time, it is best to remove the subclass, replacing it with a field on its
superclass.

EXTRACT SUPERCLASS - If you see 2 classes doing similar things, you can take advantage of the basic mechanism of
inheritance to pull their similarities together into a superclass.

COLLAPSE HIERARCHY - When refactoring a class hierarchy, you can often pull and push features around. As the hierarchy
evolves, you can find that a class and its parent are no longer different enough to be worth keeping separate. At this
point you can merge them together.

REPLACE SUBCLASS WITH DELEGATE - Instead of subclassing objects you can create separate, independent entity. There is a
popular principle: "*Favour object composition over class inheritance*", however it doesn't mean "*inheritance is
considered harmful*". Inheritance is a valuable mechanism that does the job most of the time without problems. So reach
for inheritance first, and move for delegation when it starts to rub badly.

REPLACE SUPERCLASS WITH DELEGATE - Subclassing can be done in a way that leads to confusion and complication. One of
classing example is mis-inheritance from the early days of objects was making a stack be a subclass of a list. The idea
was to reuse list's data storage and operations, however many additional, not applicable methods were available to the
stack. A better approach is to make the list a field of the stack and delegate the necessary operations to it. 


================================================
FILE: books/release-it.md
================================================
[go back](https://github.com/pkardas/learning)

# Release It! Design and Deploy Production-Ready Software

Book by Michael T. Nygard (Second Edition)

- [Chapter 1: Living in Production](#chapter-1-living-in-production)
- [Chapter 2: Case Study: The Exception That Grounded an Airline](#chapter-2-case-study-the-exception-that-grounded-an-airline)
- [Chapter 3: Stabilise Your System](#chapter-3-stabilise-your-system)
- [Chapter 4: Stability Anti-patterns](#chapter-4-stability-anti-patterns)
- [Chapter 5: Stability Patterns](#chapter-5-stability-patterns)
- [Chapter 6: Case Study: Phenomenal Cosmic Powers, Itty-Bitty Living Space](#chapter-6-case-study-phenomenal-cosmic-powers-itty-bitty-living-space)
- [Chapter 7: Foundations](#chapter-7-foundations)
- [Chapter 8: Processes on Machines](#chapter-8-processes-on-machines)
- [Chapter 9: Interconnect](#chapter-9-interconnect)
- [Chapter 10: Control Plane](#chapter-10-control-plane)
- [Chapter 11: Security](#chapter-11-security)
- [Chapter 12: Case Study: Waiting for Godot](#chapter-12-case-study-waiting-for-godot)
- [Chapter 13: Design for Deployment](#chapter-13-design-for-deployment)
- [Chapter 14: Handling Versions](#chapter-14-handling-versions)
- [Chapter 15: Case Study: Trampled by Your Own Customers](#chapter-15-case-study-trampled-by-your-own-customers)
- [Chapter 16: Adaptation](#chapter-16-adaptation)
- [Chapter 17: Chaos Engineering](#chapter-17-chaos-engineering)

## Chapter 1: Living in Production

"Feature complete" doesn't mean it is "production ready". A lot of bad things can happen on production (crazy users,
viruses, high traffic, ...). Production is the only place to learn how the software will respond to real-world stimuli,
hence software should be delivered to production quickly and gradually.

Most software architecture and design happens in clean and distant from production environments.

Design and architecture decisions are also financial decisions (downtime, resource usage, ...). It is important to
consider availability, capacity and flexibility when designing software. Pragmatic architect should consider dynamic of
change.

## Chapter 2: Case Study: The Exception That Grounded an Airline

A tiny programming error starts the snowball rolling downhill.

In any incident, author's priority is always to restore service. Restoring service takes precedence over investigation.
If it is possible to gather some data for postmortem analysis, that's great - unless it makes the outage longer. The
trick to restoring the service is figuring out what to target. You can always "reboot the world" by restarting every
single server, layer by layer but that's not effective. Instead, be a doctor diagnosing a disease, look at the symptoms
and figure what disease to treat.

A postmortem is like a murder mystery, there are set of clues - some are reliable like logs, some are unreliable like
comments from people, there is no corpse - the servers are up and running, the state that caused the error no longer
exists.

Log analysis helped to identify the root cause.

Bugs are inevitable, how to prevent bugs in one system from affecting everything else? We are going to look at design
patterns that can prevent this type of problem from spreading.

## Chapter 3: Stabilise Your System

Enterprise software must be cynical - expects bad things to happen and is never surprised when they do. It doesn't even
trust itself, it refuses to get too intimate with other systems, because it could get hurt.

Poor stability means real costs - millions lost for example in lost transaction in trading system, reputation loss. On
the other hand, good stability does not necessarily cost a lot. Highly stable design usually costs the same to implement
as the unstable one.

Transaction - abstract unit of work processed by the system.

Impulse - rapid shock to the system. For example rumor about a new console, causes impulse on the manufacturer's website
or celebrity tweet. Things that can fracture (break) the system in a blink of an eye.

Stress - a force applied to the system over an extended period.

The major dangers to system's longevity are memory leaks and data growth, difficult to catch during tests. Applications
never run long enough in development environment to reveal longevity bugs.

Failures will happen, you have ability to prepare system for specific failures (like car engineers areas designated to
protect passengers by failing first). It is possible to create failure modes that protect the rest of the system.

Less-coupled architectures act as shock absorbers, diminishing the effect of the error instead of amplifying them.

Terminology:

- Fault - a condition that creates an incorrect internal state in the software.
- Error - visibly incorrect behaviour, e.g. trading system buying 10M Pokemon futures
- Failure - an unresponsive system

Chain of failure: Triggering a fault opens the crack, faults become errors and errors provoke failures. On each step, a
fault may accelerate. Tight coupling accelerate cracks.

One way to prepare for every possible failure is to look at every external call, every I/O, every use of resources, and
ask WHAT IF IT:  can't make connection, takes 10 minutes to make the connection, makes connection and then disconnects,
takes 10 minutes to respond my query, 10k requests arrive at the same time, ...?

IT community is divided into 2 camps:

1. Make system fault-tolerant, catch exceptions, check error codes, keep faults from becoming errors
2. "let it crash", so you can restart from a good known state

## Chapter 4: Stability Anti-patterns

Antipatterns that can wreck the system, they create, accelerate or multiply cracks in the system. These bad behaviours
should be avoided.

You have to set the socket timeout if you want to break out of blocking call, for example request may be stuck in the
listening queue for minutes or forever. Network failure can hit you in 2 ways: fast (immediate exception, e.g.
connection refused) or slow (dropped ACK). The blocked thread can't process other transactions, so overall capacity is
reduced. If all threads are blocked, from practical point of view, the server is down.

Sometimes not every problem can be solved at the level of abstraction where it manifests. Sometimes the causes
reverberate up and down the layers. You need to know how to drill through at least two layers of abstraction to find the
reality at that level in order to understand problems.

REST with JSON over HTTP is the lingua franca for services today. HTTP-based protocols have their own issues:

- TCP connection can be accepted but never respond to HTTP request
- provider may accept the connection but not read the request
- provider may send back a response the caller doesn't know how to handle
- provider may send back a response with a content type the caller doesn't expect or know how to handle
- provider may claim to be sending JSON but in actually sending plain text

Treat response as data until you have confirmed it meets your expectations.

Libraries can have bugs too, they all have the variability in quality, style, and safety that you see from any other
random sampling of code.

The most effective stability patterns to combat integration points failures are *Circuit Breaker* and *Decoupling
Middleware*.

BEWARE NECESSARY EVIL - every integration point will fail in some way, you need to be prepared.

PREPARE FOR MANY FORMS OF FAILURE - failure may take several forms: network errors, semantic errors, slow response, ...

KNOW WHEN TO OPEN UP ABSTRACTIONS - debugging integration point failures usually requires peeling back a layer of
abstraction

FAILURES PROPAGATE QUICKLY - failure in remote systems quickly becomes your problem, when your code isn't defensive
enough

APPLY PATTERNS TO AVERT INTEGRATION POINT PROBLEMS - use patterns like Circuit Breaker, Timeouts, Decoupling Middleware
and Handshaking - discussed later

Horizontal scaling - adding capacity through adding more servers, fault tolerance through redundancy. Vertical scaling -
scaling by building bigger and bigger servers (more cores, memory and storage).

RECOGNISE THAT ONE SERVER DOWN JEOPARDISED THE REST - a chain reaction can happen because the death of one server makes
the others pick up the slack

HUNT FOR RESOURCE LEAK - most of the time, chain reactions happens when application has a memory leak

HUNT FOR OBSCURE TIMING BUGS - race conditions can be triggered by traffic, if one server dies because of deadlock, the
increased load on the others makes them more likely to hit the deadlock too

USE AUTOSCALING - create health-checks for every autoscaling group, the scaler could shut down instances that fail their
health checks and start new ones

DEFEND WITH BULKHEADS - partitioning servers with Bulkheads - more details later.

Cascading failures - occurs when a crack in one layer triggers a crack in a calling layer. If caller handles errors
badly it will start to fail, resulting in cascading failure (for example database failure is going to impact any system
that is calling the database). Every dependency is a chance for a failure to cascade.

- a cascading failure often results from a resource pool (e.g. connection pool) that gets exhausted, safe resource pools
  always limit the time a thread can wait to check out a resource
- defend with timeouts and circuit breaker

Capacity is the maximum throughput your system can sustain under a given workload while maintaining acceptable
performance. Breaking limits creates cracks in the system. Limits:

- heap memory - for example in memory-based sessions, memory can get short- many things can go wrong: out-of-memory
  exceptions, not working logging. It is possible to use Weak References - Garbage Collection may reclaim memory if it
  is too low (before out-of-memory error occurs). Callers have to behave nicely when payload is gone. Weak references
  are useful but they do add complexity.
- off-heap memory, off-host memory - for example Redis, but this is slower than local memory and there is a problem with
  replication
- number of sockets on the server is limited, every request corresponds to an open socket, the OS assigns inbound
  connections to an ephemeral port that represents the receiving side of the connection. Because of TCP packet format,
  one server can have up to 64 511 connections open. How can we serve millions of concurrent connections? The virtual IP
  addresses.
- closed sockets can be problematic too - before socket can be reused it goes through couple of states, for example
  bongos defence algorithm. Bogon is a wandering packet that got routed inefficiently and arrives late (out of sequence)
  , if socket were reused too quickly, late packet could trigger response.

Cookies are a clever way to pass state back and forth from client to server and vice versa. They allow all kinds of new
applications, such as personalised portals and shopping sites. Cookies carry small amount of data because they need to
be encrypted and this is CPU heavy task.

A session is an abstraction that makes building applications easier. All the user really sends are series of HTTP
requests, the server receives them, compute and returns response. Sessions are about caching data in memory.

Truly dangerous users are the ones that target your website, once you are targeted, you will almost certainly be
breached.

Adding complexity to solve one problem creates the risk of entirely new failure modes, e.g. multithreading - enables
scalability but also introduces concurrency errors.

Caching can be a powerful response to performance problem, however caching can cause troubles - it can eat away at the
memory available for the system, when that happens the garbage collector will spend more and more time attempting to
recover enough memory to process requests. You need to monitor hit rates for the cached items to see whether most items
are being used from cache. **Caches should be built using weak references to hold the cached item itself.** It will help
the GC reclaim the memory.

Libraries are notorious sources of blocking threads.

Self-Denial Attack - any situation in which the system conspires against itself. For example a coupon code sent to 10k
users to be used at certain date is going to attract millions of users (like XBOX preorder). Self-Denial can be avoided
by building a shared-nothing architecture (no databases nor other resources)  - ideal horizontal scaling. Talk to
marketing department when they are going to send out mass emails - you will be able to pre-scale (prepare some
additional instances for increased load). Also be careful with open links to the resources, also watch out for Fight
Club bugs - increased front-end load causes exponentially increasing backend processing.

With point-to-point connections, each instance has to talk directly to every other instance - this means O(n^2) scaling

- be careful. Point-to-point communication can be replaced by: UDP broadcasts, TCP/UDP multicast, pub/sub messaging,
  message queues.

XP principle: Do the simplest thing that will work.

Watch out for shared resources - they can be a bottleneck, stress-test it heavily, be sure clients will keep working
despite malfunctioning resource.

Frontend always has the ability to overwhelm the backend, because their capacities are not balanced. However, you can
not build every service to be large enough to serve enormous load from the frontend - instead you myst build services to
be resilient in the face of tsunami of requests (e.g. Circuit Breaker, Handshaking, Back-pressure, Bulkheads).

Dog-pile - when a bunch of servers impose transient load all at once (term from American football). Can occur: when
booting all servers at once, on cron job, when the config management pushes out a change. Use random clock slew to
diffuse the demand from cron job (every instance does something at different time). Use a backoff algorithm so every
client retries at different time.

Infrastructure management tools can cause a lot of trouble (e.g. Reddit outage) - build limiters and safeguards into
them, so they won't destroy entire system at once.

Slow response is worse than refusing a connection or returning an error - because ties up resources in the calling
system and in the called system. Slow responses usually result from excessive demand. System should have the ability to
monitor its own performance, so it can also tell when it isn't meeting its SLAs (service-level agreement).

Why slow responses are dangerous: because they trigger cascading failures, users hitting *reload* button cause even more
traffic to already overloaded system. If system tracks its own responsiveness, then it can tell when it is getting slow.
In such situation developer should consider sending an immediate error response.

> Design with scepticism, and you will achieve resilience. Ask "What can system X do to hurt me" and then design a way
> to dodge whatever wrench your supposed ally throws.

Use realistic data volumes - typical development and test data sets are too small to exhibit problems, you need
production size-data to see what happens when your query returns a million rows that you turn into objects. Calls should
be paginated. Do not rely on data providers, once they will go *berserk* and fill up a table for no reason.

## Chapter 5: Stability Patterns

Healthy patterns to reduce, eliminate or mitigate the effects of cracks in the system. Apply patterns wisely to reduce
the damage done by an individual failure.

TIMEOUTS - Today every application is a distributed system, every system must grapple with the fundamental nature of
networks - they are fallible. When any element breaks, code can't wait forever for a response that may never come -
sooner or later. *Hope is not a design method*.

Timeout is a simple mechanism allowing you to stop waiting for an answer once you think it will not come. Well-placed
timeouts provide fault isolation - **a problem in some other service does not have to become your problem**.

Timeouts can also be relevant within a single service. Any resource pool can be exhausted. Any resource that block
threads must have a timeout to ensure that calling threads eventually unblock.

Timeouts are often found in the company of retries, fast retries are very likely to fail again (wait between retries).

CIRCUIT BREAKER - in the past houses were catching fire because of heated wires, when too many appliances were connected
to the power source. Energy industry came up with a device that fails first in order to prevent fire.

The circuit breaker exists to fail without breaking the entire system, furthermore once the danger has passed, the
circuit breaker can be reset to restore full function to the system.

The same technique can be applied to software, dangerous operations can be wrapped with a component that can circumvent
call when the system is not healthy.

In a closed state, the circuit breaker executes operations as usual (calls to another system or other internal
operations that are subject to timeout or other failure), if it fails, the circuit breaker makes a note of the failure.
Once the number of failures exceeds a threshold, the circuit breaker opens the circuit. When the circuit is open, calls
are suspended - they fail immediately. After some time the circuit decides the operation has a chance of succeeding, so
it goes to the half-open state, if the call succeeds - goes to the open state, if not - returns to the open state.

The circuit breaker can have different thresholds for different types of failures. Involve stakeholders to decide how
system should behave when circuit is open.

How to measure number of failures - interesting idea is Leaky Bucket - separate thread counting failures and
periodically removing them. If buckets become empty quickly it means, the system is flooded with errors.

It should be possible to automatically open/close circuit.

Circuit Breaker - don't do it if it hurts. Use it with timeouts. Ensure proper reporting of opened circuit.

BULKHEADS - in a ship, bulkheads prevents water from moving from one compartment to another. You can apply the same
technique, by partitioning the system, you can keep a failure in one part of the system from destroying everything. This
can be achieved by for example running application on multiple servers - if one fails we still have redundancy (e.g.
instances across zones and regions in AWS).

Bulkhead partitions capacity to preserve partial functionality when bad things happen. Granularity should be picked
carefully - thread pools in the application, CPUs, servers in a cluster. Bulkheads are especially useful in
service-oriented or microservice architectures in order to prevent chain reactions and entire company go down.

STEADY STATE - every time human touches a severer it is an opportunity for unforced errors. It is best to keep people
off production systems to the greatest extent possible. People should treat servers as "cattle", not "pets", they should
not be logged to the server all the time to watch if everything is fine.

The Steady State pattern says that for every mechanism that accumulated a resource (log files, rows in the database,
caches in memory), some other mechanism must recycle that resource. Several types of sludge that can accumulated and how
to avoid the need for fiddling:

- data purging - easy to do, however can be nasty, especially in relational databases there is a risk of leaving
  orphaned rows, also you need to make sure application will work when the data is gone.
- log files - logs are valuable source of information, however if left unchecked are risk. When logs fill up the
  filesystem, they jeopardise stability. Configure log file retention based on size. Probably best you can do is to
  store logs on some centralised server (especially if you are required to store logs for years because of compliance
  regime). Logstash - centralised server for logs, where they can be indexed, searched and monitored.
- in-memory caching - improper usage of caching is the major cause of memory leaks, which in turn lead to horrors like
  daily server restarts. Limit the amount of memory a cache can consume.

Steady State encourages better operational discipline by limiting the need for system administrators to log on to the
production servers.

FAIL FAST - if the system can determine in advance that it will fail; at an operation, it is always better to fail fast

- the caller does not have wasted its capacity for waiting. No, you don't need Deep Learning team to tell whether it
  will fail. Example: if call requires database connection, application can quickly check if database is available.
  Other approach is to configure load balancer appropriately (no servers - reject request). Use request validation to
  know if data is correct.

The Fail Fast pattern improves overall system stability by avoiding slow responses.

LET IT CRASH - there is no way to test everything or predict all the ways a system can break. We must assume that errors
will happen.

There must be a boundary for trashiness. We want to crash a component in isolation, the rest of the system must protect
itself from a cascading failure. In a microservice architecture, a whole instance of the service might be the right
granularity.

We must be able to get back to clean state and resume normal operation as quickly as possible - otherwise we will see
performance degradation.

Supervisors need to keep close track of how often they restart child processes. It might be necessary to restart
supervisor. Number of restarts can indicate that either the state is not sufficiently cleaned up of the system is in
jeopardy and the supervisor is just masking the underlying problem.

The final element of a "let it crash" is reintegration - the instance must be able somehow to join the pool to accept
the work. This can be done through health checks on instance level.

HANDSHAKING - can be most valuable when unbalanced capacities are leading to slow responses. If the sever can detect
that it will not be able to meet its SLAs, then it should have some means to ask the caller to back off. It is an
effective way to stop cracks from jumping layers, as in the case of a cascading failure.

The application can notify the load balancer through a health check that is not able to take more requests (503 - Not
Available), then the load balancer knows not to send any additional work to that particular server.

TEST HARNESSES - you can create test harnesses to emulate the remote system on the other end of each integration point.
A good test harness should be as nasty and vicious as real-world systems will be.

A test harness runs as a separate server, so it is not obliged to conform to the defined interface. It can provoke
network errors, protocol errors or application level errors.

Consider building a test harness that substitutes for the remote end for every web services call.

Integration testing environments are good at examining failures only in the seventh layer of the OSI model (application
layer) - and not even all of those.

The test harness can be designed like an application server - it can have pluggable behaviour for the tests that are
related to the real application. Broadly speaking, a test harness leads toward "chaos engineering".

The Test Harness pattern augments other testing methods. It does not replace unit tests, acceptance test, penetration
tests and so on.

DECOUPLING MIDDLEWARE - middleware is a graceless name of tools that inhabit a singularly messy space - integrating
systems that were never meant to work together. The connective tissue that bridges gaps between different islands of
automation.

Middleware, integrates systems by passing data and events back and forth between systems, decouples them by letting the
participating systems remove specific knowledge of and calls to the other systems.

Tightly coupled middleware amplifies shocks to the systems, synchronous calls are particularly vicious amplifiers that
facilitate cascading failures (this includes JSON over HTTP).

Message oriented middleware decouples the endpoints in bots space and time, because the requesting system doesn't just
sit around and wait for a reply. This form of middleware cannot produce a cascading failure.

SHED LOAD - applications have zero control over their demand, at any moment, more that a billion devices could make a
request.

Services should model TCPs approach: When load gets too high, start to refuse new requests for work. This is related to
Fail Fast.

The ideal way to define "load is too high" is for a service toi monitor its own performance relative to its SLA. When
requests take longer than SLA, it is time to shed some load.

CREATE BACK PRESSURE - every performance problem starts with a queue backing up somewhere, if a queue is unbounded, it
can consume all available memory. As queue's length reaches toward infinity, response time also heads toward infinity.

Blocking the producer is a kind of flow control. It allows the queue to apply "back pressure" upstream. Back pressure
propagates all the way to the ultimate client, who will be throttled down in speed until the queue releases.

TCP uses back pressure - once the window is full, senders are not allowed to send anything until released.

GOVERNOR - machines are great at performing repetitive tasks, humans are great at perceiving high level situation.

In 18th century steam engineers discovered it is possible to run machines so fast that the metal breaks. The solution
was the governor - a person which limits the speed of an engine.

We can create governors to slow down the rate of actions. A governor is stateful and time-aware, it knows what actions
have been taken over a period of time. (Reddit uses a governor to slow down the autoscaler, by adding logic that says it
can only shut down a certain percentage of instances at a time).

The whole point of a governor is to slow things down enough for humans to get involved.

## Chapter 6: Case Study: Phenomenal Cosmic Powers, Itty-Bitty Living Space

Launching a new site is like having a baby. You must expect certain thing, such as being awakened in the middle of the
night. Monitoring technology provides a great safety net, pinpointing problems when they occur, but nothing beats the
patter-matching power of the human brain.

Response time is always a lagging indicator. You can only measure the response time on requests that are done. So
whatever your worst response time may be, you can't measure it until the slowest requests finish. Requests that didn't
complete, never got averaged in.

Recovery-Oriented Computing - principles:

- Failures are inevitable, in both hardware and software.
- Modeling and analysis can be never sufficiently complete. A priori prediction of all failure modes is not possible.
- Human action is a major source of system failures.

Investigations aim to improve survivability in the face of failures. The ability to restart single components, instead
of entire servers, is a key concept of recovery-oriented computing.

## Chapter 7: Foundations

Designing for production means thinking about production issues as first-class concerns (network, logging, monitoring,
runtime control, security, people who do operations). There are several layers of concerns:

1. Operations - security, availability, capacity, status, communication
2. Control Plane - system monitoring, deployment, anomaly detection, features
3. Interconnect - routing, load balancing, failover, traffic management
4. Instances - services, processes, components, instance monitoring
5. Foundation - hardware, VMs, IPs

Virtualization promised developers a common hardware appearance across the bewildering array of physical configurations
in the data centre. On the downside, performance is much less predictable. Many virtual machines can reside on the same
physical hosts. It is rare to move VMs from one host to another.

When designing applications to run in virtual machines you must make sure that they are not sensitive to the loss or
slowdown of the host.

A clock on the VM is not monotonic and sequential, because VM can be suspended for an indefinite span of real time. The
bottom line is: don't trust the OS clock. If external time is important, use an external source like a local NTP server.

Containers have short-lived identity. As a result, it should not be configured on a per-instance basis. Container won't
have much, if any, local storage, so the application must rely on external storage for files, data, and maybe even
cache.

When you design an application for containers, keep a few things in mind: the whole container image moves from
environment to environment, so the image can't hold things like production database credentials. Containers should not
contain hostnames or port numbers - because the setting needs to change dynamically while the container image stays the
same. Containerised applications need to send their telemetry out to a data collector.

The 12-Factor App [12factor.net] - created by engineers at Heroku, is a succinct description of a cloud-native,
scalable, deployable application:

1. Codebase - track one codebase in revision control. Deploy the same build to every environment.
2. Dependencies - explicitly declare and isolate dependencies.
3. Config - store config in the environment.
4. Backing services - treat backing services as attached resources.
5. Build, release, run - strictly separate build and run stages.
6. Process - execute the app as one or more stateless processes.
7. Port binding - export services via port binding.
8. Concurrency - scale out via process model
9. Disposability - maximise robustness with fast startup and graceful shutdown.
10. Dev-prod parity - keep environment, staging and production as similar as possible.
11. Logs - treat logs as event streams.
12. Admin processes - run admin / management tasks as one-off processes.

## Chapter 8: Processes on Machines

Service - a collection of processes across machines that work together to deliver a unit of functionality.

Instance - an installation on a single machine out of a load-balanced array of the same executable.

Executable - an artefact that a machine can launch as process and created by build process.

Process - an operating system process running on a machine.

Installation - the executable and any attendant directories, configuration files and other resources.

Deployment - the act of creating an installation on a machine.

Developers should not do production builds from their now machines. Developer boxes are hopelessly polluted. We install
all kinds of junk on these systems, play games and visit sketchy websites. Only make production builds on a CI server,
and have it put the binary into a safe repository that nobody else can write into.

Configuration management tools like Chef, Puppet and Ansible are all about applying changes to running machines. They
use scripts, playbooks or recipes to transition the machine from one state to a new state.

We don't want our instance binaries to change per environment, but we do want their properties to change. That means the
code should look outside the deployment directory to find per-environment configurations.

ZooKeeper and etc are popular choices for a configuration service - but any outage to these systems can cause a lot of
trouble.

Shipboard engineers can tell when something is about to go wrong by the sound of the giant Diesel engines. We must
facilitate that awareness by building transparency into our systems. Transparency refers to the qualities that allow
operators, developers and business sponsors to gain understanding of the system's historical trends, present conditions,
instantaneous state and future projections. Debugging a transparent system I s vastly easier, so transparent systems
will mature faster that opaque ones. System without transparency cannot survive long in production.

Transparency arises from deliberate design and architecture. Instances should log their health and events to a plain old
text file. Any log-scraper can collect these without disturbing the server process. Logging is certainly a white-box
technology, it must be integrated pervasively into the source code.

Not every exception needs to be logged as an error. Just because a user entered a bad card number and the validation
compound threw an exception doesn't mean anything has to be done about it. Log errors in business logic or user input as
WARNINGs. Reserve ERROR for a serious system problem.

Logs have to present clear, accurate and actionable information to the humans who read them.

Message should include an identifier that can be used to trace the steps of a transaction.

Health Checks should be more that just "yup, it is running", it should report at least: IP, interpreter version,
application version, if instance is accepting work, the status of connection pools, caches and circuit breakers. Load
balancer can use the health check for the "go live" transition too. When the health check on a new instance goes from
failing to passing, it means the app is done with its startup.

## Chapter 9: Interconnect

The interconnect layer covers all the mechanisms that knit a bunch of instances together into a cohesive system. That
includes a traffic management. Load balancing and discovery. This is the layer where we can really create high
availability.

Consul - dynamic discovery service, suited for large teams with hundreds of small services. On the other hand small
business with just a few developers would probably stick with direct DNS entries.

DNS might be the best choice for small teams, particularly in a slowly changing infrastructure. When using DNS, it is
important to have a logical service name to call, rather than physical hostname. Even if that logical name is just an
alias to the underlying host, it is still preferable. DNS round-robin an easy approach to load balancing but suffers
from putting too much control in the client's hands. DNS outage can be serious, do it should not be hosted on the same
infrastructure as production system. There should be more than one DNS provider with servers on different locations.

Almost everything we build today uses horizontally scalable farms of instances that implement request/reply semantics.
Horizontal scaling helps with overall capacity and resilience, but it introduces the need for load balancing. Load
balancing is all about distributing requests across a pool of instances to serve all requests correctly in the shortest
feasible time.

Software Load Balancing - low cost approach, uses an application to listen for requests and dole them out across the
pool of instances. This is basically a reverse proxy (proxy - multiplexes any outgoing calls into a single source IP
address, reverse proxy - demultiplexes calls coming into a single IP address and fans them out to multiple addresses).
Examples: squid, HAProxy, Apache httpd, nginx.

Hardware Load Balancing - specialised network devices that serve a similar role to the reverse proxy server. They
provide better capacity and throughput because they operate closed to the network.

One of the most important services a load balancer can provide is service health checks. The load balancer will not send
to an instance that fails a certain number of health checks.

Load balancers can also attempt to direct repeated requests to the same instance. This helps when you have stateful
services, like user session state in an application server. Directing the same requests to the same instances will
provide better response time for the caller because necessary resources will already be in that instance's memory. A
downside of sticky sessions is that they can prevent load from being distributed evenly.

Another useful way to employ load balancer is "content based routing". For example, search requests may go to one set of
instances, while user-signup requests go somewhere else.

Demand Control - when, where and how to refuse to work under big demand.

> Every failing system starts with a queue backing up somewhere.

Going nonlinear - service slowing down under heavy load, this means fewer and fewer sockets available to receive
requests exactly when the most requests are coming in.

Load shedding - under high load, turning away work system can't complete in time, the most important way to control
incoming demand. We want to shed load as early as possible, so we can avoid tying up resources at several tiers before
rejecting the request. Service should measure its response time and present it in the health check.

Service discovery. Services can announce themselves to begin receiving a load. A caller needs to know at least one IP
address to contact a particular service. Service discovery is itself another service, it can fail or get overloaded.
Service discover can be built on top of a distributed data store such as ZooKeeper or etc.

In CAP theorem, ZooKeeper is a CP system - when there is a network partition, some nodes will not answer queries or
accept writes. HashiCorp's Consul resamples ZooKeeper, however Consul's architecture places it in the AP area - it
prefers to remain available and risk stale information when a partition occurs.

## Chapter 10: Control Plane

The control plane encompasses all the software and services that run in the background to make production load
successful. One way to think about it is this: if production user data passes through it, it is production software. If
its main job is to manage other software, it is control plane.

Every part of control plane is optional if you are willing to make trade-offs. - for example: logging and monitoring
helps with postmortem analysis, without it all those will take longer or simply not be done.

Mechanical advantage is the multiplier on human effort that simple machines provide. With mechanical advantage, a person
can move something much heavier than themselves. It works for good of for ill. High leverage allows a person to make
large changes with less effort.

Every postmortem review has 3 important jobs to do: Explain what happened. Apologise. Commit to improvement.

Automation has no judgement. When it goes wrong, it tends to do so really, really quickly. By the time a human perceives
the problem, it is a question of recovery rather than intervention. We should use automation for the things humans are
bad at: repetitive tasks and fast response. We should use humans for the things' automation is bad at: perceiving the
whole situation at a higher level.

Monitoring team should be responsible for providing monitoring tools - offer a monitoring service to customers.

Log collectors can work in push (the instance is pushing logs over the network, helpful with containers) or pull mode (
the collector runs on a ventral machine and reaches out to all known hosts to remote-copy the logs). Getting all the
logs on one host is a minor achievement, the real beauty comes from indexing the logs - then you can search for
patterns, make trend line graphs and raise alerts when bad things happen. This can be done using Elasticsearch, Logstash
and Kibana.

Categories of metrics that can be useful:

- Traffic indicators - page requests, transaction count
- Business transaction for each type - number processed, aborted, conversion rate
- Users - demographics, number of users, usage patterns, errors encountered
- Resource pool health - enabled state, total resources, number of resources created, number of blocked threads
- Database connection health - number of SQLExceptions thrown, number of queries, average response time
- Data consumption - number of rows present, footprint in memory and on disk
- Integration point health - state of circuit breaker, number of timeouts, number of requests, average response time,
  number of good responses, number of network, protocol errors, actual IP address
- Cache health - items in cache, memory used by cache, cache hit rate, items flushed by garbage collector

Canary deployment - a small set of instances that get the new build first. For a period of time, the instances running
the new build coexist with instances running the old build. The purpose of the canary deployment is to reject a bad
build before it reaches the users.

The net result is that GUIs make terrible administrative interfaces for long-term production operation. The best
interface for long-term operation is the command line. Given a command line, operators can easily build a scaffolding of
scripts, logging and automated actions to keep your software happy.

## Chapter 11: Security

Security must be baked in. It is not a seasoning to sprinkle onto your system at the end. You are responsible to protect
your consumers and your company.

OWASP Top 10 - catalogued application security incidents and vulnerabilities. Top 10 list represents a consensus about
the most critical web application security flaws:

1. Injection - an attack on a parser or interpreter that relies on user-supported input. Classic example - SQL
   injection, it happens when code bashes strings together to make queries, but every SQL library allows the use of
   placeholders in query strings. Keep in mind that "*comes from a user*", doesn't only mean the input arrived just now
   in an HTTP request, data from a database may have originated from a user as well. XML parsers are vulnerable as
   well (XXE injection).

2. Broken Authentication and Session Management - at one time, it was common to use query parameters on URLS and
   hyperlinks to carry session IDs, not only are thoseIDs are visible to every switch, router and proxy server, they are
   also visible to humans. Anyone who copies and pastes their link from their browser shares their session. Session
   hijacking can be dangerous when it is stolen from administrator. OWASP suggest the following guidelines for handling
   session IDs:

    1. Use long session ID with lots of entropy
    2. Generate session ID using a pseudorandom number generator with good cryptographic properties (`rand` is not a
       good choice)
    3. Protect against XSS to avoid script execution that would reveal session ID
    4. When user authenticates, generate a fresh session ID
    5. Keep up to date with security patches and versions, too many systems run outdated versions with known
       vulnerabilities
    6. Use cookies to exchange session IDs, do not accept session IDS via other mechanisms

   *Authentication* means we verify the identity of the caller. Is the caller who he or she claims to be? Some dos and
   don't:

    1. Don't keep passwords in your database
    2. Never email a password to a user as a part of "*forgotten password*" process
    3. Do apply strong hash algorithm to password. Use "*salt*, which is some random data added to the password to make
       dictionary attacks harder
    4. Do allow users to enter overly long passwords
    5. Do allow users to paste passwords into GUIs
    6. Do allow users to paste passwords into GUIs
    7. Do plan on rehashing passwords at some point in the future. We have to keep increasing the strength of our hash
       algorithms. Make sure you can change the salt too
    8. Don't allow attackers to make unlimited authentication attempts

3. Cross-site Scripting - happens when a service renders a user's input directly into HTML without applying input
   escaping, it is related to injection attacks. Bottom line is: never trust input, scrub it on the way and escape it on
   the way out. Don't build structured data by smashing strings together.

4. Broken Access Control - refers to application problems that allow attackers to access data they shouldn't. One of
   common forms of broken access control is "*direct object access*", this happens when a URL contains something like a
   database ID as a query parameter. Solution for this is to reduce the value of URL probing and checking authorisation
   to objects in the first place. Generate unique but non-sequential identifiers or use a generic ULR that is
   session-sensitive (`/users/123` -> `/users/me`). Rule of thumb: *If a caller is not authorised to see the contents of
   a resource, it should be as if the resource doesn't even exist* (`404` instead of `403`). When a request involves a
   file upload, the caller can overwrite any file the service is allowed to modify. The only safe way to handle file
   uploads is to tread the client's filename as an arbitrary string to store in a database field. Don't build a path
   from the filename in the request.

5. Security Misconfiguration - default passwords are a serious problem. Security misconfiguration usually takes the form
   of omission. Servers enable unneeded features by default. Admin consoles are a common source of problems. Another
   common security misconfiguration relates to servers listening too broadly. You can improve information security right
   away by splitting internal traffic onto its own NIC separate from public-facing traffic. Make sure every
   administrator uses a personal account, not a group account. Go ahead and add some logging to those administrative and
   internal calls.

6. Sensitive Data Exposure - credit cards, medical records, insurance files, purchasing data, emails - all these
   valuable things people can steal from you or use against you. Hackers don't attack your strong points, they look for
   cracks in your shell. It can be as simple as employee's stolen laptop with a database extract in a spreadsheet. Some
   guidelines:

    1. Don't store sensitive information that you don't need
    2. Use HTTP Strict Transport Security - it prevents clients from negotiating their way to insecure protocols
    3. Stop using SHA-1
    4. Never store passwords in plain text
    5. Make sure sensitive data is encrypted in the database
    6. Decrypt data based on the user's authorisation, not the server's

   Consider using AWS Key Management Service. Application can request data encryption keys, which they use to encrypt or
   decrypt data. HashiCorp Vault - alternative to AWS KMS.

7. Insufficient Attack Protection - always assume that attackers have unlimited access to other machines behind
   firewall. Services do not typically track illegitimate requests by their origin. They do not block callers that issue
   too many bad requests. That allows an attacking program to keep making calls. API Gateways are a useful defence here.
   An API Gateway can block callers by their API key. It can also throttle their request rate. Normally this helps
   preserve capacity. In the case of an attack, it slows the rate of data compromise, thereby limiting the damage.

8. Cross-Site Request Forgery - used to be a bigger issue than it is now. A VCSRF attack starts on another website, an
   attacker uses a web page with JS, CSS or HTML that includes a Lin to your system. When the hapless user's browser
   accesses your system, your system thinks it is a valid request from that user. Make sure that requests with side
   effects (password change, mailing address update, purchases) use anti-CSRF tokens. These are extra fields containing
   random data that your system emits when rendering a form. Most frameworks today do this for you. You can also tighten
   up your cookie policy with the "*SameSite*" property. The SameSite attribute causes browser to send the cookie only
   if the documents' origin is the same as the target's origin. SameSite cookie may require change session management
   approach.

9. Using Components with Known Vulnerabilities - most successful attacks are not the exciting "*zero day, rush to patch
   before they get it*". Most attacks are mundane. It is important to keep applications up-to-date.

10. Underprotected APIs - it is essential to make are sure that APIs are not misused. APIs must ensure that malicious
    request cannot access data the original user would not be able to see. API should use the most secure means
    available to communicate. Make sure the parser is hardened against malicious input. Fuzz-testing APIs is especially
    important.

The principle of Least Privilege - a process should have the lowest level of privilege needed to accomplish the task.
Anything application services need to do, they should do as nonadministrative users. Containers provide a nice degree of
isolation from each other. Instead of creating multiple application-specific users on the host operating system, you can
package each application into its own container.

Configured Passwords - at the absolute minimum, passwords to production databases should be kept separate from any other
configuration files. Password vaulting keeps passwords in encrypted files, which reduces the security problem. AWS Key
Management Service is useful here. With KMS applications use API calls to acquire decryption keys. That way the
encrypted data don't sit in the same storage as the decryption keys.

Frameworks can't protect you from the Top 10, neither can a one-time review by your company's applications security
team. Security is an ongoing activity. It must be part of system's architecture. You must have a process to discover
attacks.

## Chapter 12: Case Study: Waiting for Godot

## Chapter 13: Design for Deployment

How to design applications for easy rollout - packaging, integration point versioning and database schema.

Once upon a time, we wrote our software, zipped it up and threw it over the wall to the operations, so they could deploy
it. Operations would schedule some *planned* downtime to execute the release. HOWEVER, users should not care about
downtime, application should be updated without them knowing about the release.

Most of the time, we design for the state of the system after a release. It assumes the whole system can be changed in
some instantaneous quantum jump. We have to treat deployment as a feature. Three key concerns: automation, orchestration
and zero-downtime deployment.

AUTOMATED DEPLOYMENTS. Build pipeline is the first tool of interest. It picks up after someone commits a change to VCS.
Build pipelines are often implemented with CI servers. CI would stop after publishing a test report and an archive, the
build pipeline goes beyond - run a series of steps that culminate in a production deployment (deploy code to trial env,
run migrations, perform integration tests). Each stage of build pipeline is looking for reasons to reject the build -
failed tests, lint complaints, integration fails.

Tools: Jenkins, GoCD, Netflix Spinnaker, AWS Code Pipeline. Do not look for the best tools, pick one that suffices and
get good with it. Avoid analysis trap.

At the end of the build pipeline, build served interacts with one of the configuration management tools.

Between the time a developer commits code to the repository and the time it runs in production, code is a pure
liability. It may have unknown bugs, may break scaling or cause production downtime. It might be a great implementation
of a feature nobody wants. The idea of continuous deployment is to reduce that delay as much as possible to reduce the
liability of undeployed code.

A bigger deployment with more change is definitely riskier. "*If it hurts, do it more often*" - do everything
continuously, for the build pipeline it means - run the full build on every commit.

Shim - a thin piece of wood that fills a gap where two structures meet. In deployments, shim is a bit of code that helps
join old and new versions of the application. For example when migrating database, old instances will read from the old
table, new instances will be reading from the new table. Shims can be achieved using SQL triggers - insert to one table
is propagated to the other.

[MUTABLE INFRASTRUCTURE] We typically update machines in batches. You must choose to divide your machines into
equal-sized groups. Suppose we have five groups: Alpha, Bravo, Charlie, Delta, Foxtrot. Rollout would go like this:

1. Instruct Alpha to stop accepting new requests
2. Wait for load to drain from Alpha
3. Run the configuration management tool to update code and config
4. Wait for green health checks on all machines in Alpha
5. Instruct Alpha to start accepting requests
6. Repeat the process for Bravo, Charlie, Delta, Foxtrot

First group should be the canary group. Pause there to evaluate the build before moving on to the next group. Use
traffic shaping at your load balancer to gradually ramp up the traffic to the canary group while watching monitoring for
anomalies and metrics.

Every application should include an end-to-end health check.

[IMMUTABLE INFRASTRUCTURE] To roll code out here, we don't change the old machines. Instead, we spin up new machines on
the new version of the code. Machines can be started in the existing cluster or in a new cluster. With frequent
deployments, you are better off starting new machines in the existing cluster, that avoids interrupting open connections
when switching between clusters. Be careful about cache and session.

Remember about the post-rollout cleanup - drop old tables, views, columns, aliases, ...

DEPLOY LIKE THE PROS - Currently deployments are frequent and should be seamless. The boundary between operations and
development has become fractal. Designing for deployment gives the ability to make large changes in small steps. This
all rests on a foundation of automated action and quality checking. The build pipeline should be able to apply all the
accumulated wisdom of your architects, developers, designers, testers and DBAs.

Software should be designed to be deployed easily. Zero downtime is the objective. Smaller, easier deployments mean you
can make big changes over a series of small steps. That reduces disruption to your users.

## Chapter 14: Handling Versions

It is better for everyone if we do some extra work on our end to maintain compatibility rather than pushing migration
costs out onto other teams. How your software can be a good citizen?

Each consuming application has its own development team that operates on its own schedule. If you want others to respect
your autonomy, then you must respect theirs. That means you can't force consumers to match your release schedule. Trying
to coordinate consumer and provider deployments doesn't scale.

TCP specification (Postel's Robustness Principle):

> Be conservative in what you do, be liberal in what you accept from others.

Consumer and provider must share a number of agreements in order to communicate: connection handshaking and duration,
request framing, content encoding, message syntax, message semantics, authorisation and authentication.

Postel's Robustness Principle can be seen as Liskov Substitution Principle: We can always accept more than we accepted
before, but we cannot less or require more. We can return more than we returned before, but we cannot return less.

Handling breaking changes - best approach is to add a version discriminator to the URL. This is the most common
approach. You have to support both the old and the new versions for some period of time. Both versions should operate
side by side. This allows consumers to upgrade as they are able. Internally you want to avoid duplication. Handle this
in the controller, methods that handle the new API go directly to the most current version of the business logic,
methods that handle the old API get updated, so they convert old objects to the current ones on requests and convert new
objects to old ones on responses.

When receiving requests or messages, your application has no control over the format. The same goes for calling out to
other services. The other endpoint can start rejecting your requests at any time. After all, they may not observe the
same safety rules we just described. Always be defensive.

## Chapter 15: Case Study: Trampled by Your Own Customers

Conway's Law:

> If you have four teams working on a compiler, you will get a form-pass compiler.

Conway argues, two people must - in some fashion - communicate about the specification for that interface. If the
communication does not occur, interface cannot be built.

Sometimes when you ask questions, but you don't get answers, it means nobody knows the answers. At other times, it means
nobody wants to be seen answering the questions.

Load testing is about: defining a test plan, creating some scripts, configuring the load generators and test
dispatchers.

Tests often are prepared wrongly, real word is crude and rude, there are scrapers not respecting your cookie policy,
search browsers indexing your website, users doing weird stuff.

Most websites have terms and conditions stating "*By viewing this page you agree to ...*", with this you can sue or at
least block sources of bots hitting your website millions of times.

## Chapter 16: Adaptation

To make a change, your company has to go through a decision cycle - plan -> do -> check -> act. In small companies this
communication may involve just one or two people, in larger companies an entire committee. Getting around the cycle
faster makes you more competitive. This drives the "*fail fast*" motto for startups.

Agile and lean development methods helped remove delay from "act", DevOps helps remove even more in "act" and offers
tons of new tools to help with "observe".

Thrashing - happens when organisation changes direction without taking the time to receive, process and incorporate
feedback. You may recognise it as constantly shifting development priorities or an unending series of crises. It creates
team confusion, unfinished work and lost productivity. To avoid trashing, try to create a steady cadence of delivery and
feedback.

The platform team should not implement all your specific monitoring rules, instead this team should provide an API that
lets you install your monitoring rules into the monitoring service provided by the platform.

> If your developers only use the platform because it is mandatory, then the platform is not good enough

The Fallacy of the DevOps Team - in larger companies, it is common to find a group called DevOps team. This team sits
between development and operations with the goal of moving faster and automating releases into production. *This is an
antipattern*. DevOps should soften the interface between different teams. DevOps goes deeper than deployment automation.
It is a shift from ticket and blame-driven operations with throw-it-over-the-wall releases TO one based on open sharing
of information and skills, data-driven decision-making about architecture and design, production availability and
responsiveness. Isolating these ideas to a single team undermines the whole point.

Frequent releases with incremental functionality allow your company to outpace its competitors.

Blue/green deployment - machines are divided into pools. One pool is active in production. The other pool gets the new
deployment. That leaves time to test it before exposing it to customers. Once the new pool looks good, you shift
production traffic over to it.

More code, means it is harder to change. Large codebases are more likely to become overgeneralised. A shared database
means every change has a higher potential to disrupt. The big service will accumulate complexity faster than the sum of
two smaller services. It is easier to maintain and prune a bonsai juniper than a hundred-foot oak.

The key to making evolutionary architecture work is failure. You have to try different approaches to similar problems
and kill the ones that are less successful.

Jeff Bezos said that every team should be sized no bugger than you can feed with 2 large pizzas. Important but
misleading. It is not just about having fever people on a team. A self-sufficient two-pizza team also means each team
member has to cover more than one discipline. You can't have a two-pizza team if you need a dedicated DBA, frontend
developer, an infra guru a backend developer, an ML expert, a product manager, a GUI designed, and so on. The two-pizza
team is about reducing external dependencies. A thousand of dependencies will keep you from breaking free. It is really
about having a small group that can be self-sufficient and push things all the way through to production.

No coordinated deployments - If you ever find that you need to update both the provider and the caller of a service
interface at the same time, it is a warning sign that those services are strongly coupled.

Evolutionary architecture is the one that supports incremental, guided c change as a first principle across multiple
dimensions. Architecture styles:

- Microservice - very small, disposable units of code. Emphasise scalability, team-scale autonomy. Vulnerable to
  coupling with platform for monitoring, tracing and continuous delivery
- Microkernel and plugins - in-process, in-memory message passing core with formal interfaces to extensions. Good for
  incremental change in requirements, combining work from different teams. Vulnerable to language and runtime
  environment.
- Event-based - prefers asynchronous messages for communication, avoiding direct calls. Good for temporal decoupling,
  Allows new subscribers without change to publishers. Allows logic change and reconstruction from history. Vulnerable
  to semantic change in message formats over time.

Microservice size: ideally it should be no bigger than what fits in one developer's head.

Don't pursue microservices just because the Silicon Valley unicorns are doing it. Make sure they address a real problem
you are likely to suffer. Otherwise, the operational overhead and debugging difficulty of microservices will outweigh
your benefits.

Systems should exhibit loose clustering. In a loose cluster, the loss of an individual instance is no more significant
than the fall of a single tree in a forest. The members of a cluster should not be configured to know the identities of
other members of the cluster.

Modular systems inherently have more options than monolithic ones. 5 modular operators - borrowed from a hardware:

1. Splitting - breaking things into modules, or a module into submodules. The key with splitting is that the interface
   to the original modules is unchanged. Before splitting, it handles the whole thing itself. Afterward, it delegates
   work to the new modules but supports the same interface.
2. Substituting - is just replacing one module with another (like swapping nVidia card with AMD). The original module
   and the substitute need to share a common interface.
3. Augmenting and Excluding - augmenting is adding a module to a system. Excluding is removing one. If you design your
   parent system to make augmenting and excluding into first-class citizens, then you will reach a different design.
4. Inversion - works by taking functionality that is distributed in several modules and raising it up higher in the
   system.
5. Porting - is about repurposing a module from a different system. Any time we use a service created by a different
   project or system, we are porting that service to our system. Porting risks adding a coupling.

Information architecture is how we structure data. It is the data and the metadata we used to describe the things that
matter to our systems. It is a set of related models that capture some facets of reality. Your job in building systems
is to decide what facets of reality matter to your system, how are you going to represent those and how that
representation can survive over time.

Events can be used for:

- Notifications - fire and forget, one-way announcement, no response is expected
- Even-carried state transfer - an event that replicates entities or parts of entities so other systems can do their
  work
- Event sourcing - when all changes are recorded as events that describe the change
- Command-query responsibility segregation - reading and writing with different structures. Not the same as events, but
  events are often found on the "command" side.

Versioning can be a real challenge with events, especially once you have years' worth of them. Stay away from closed
formats like serialised objects. Look toward open formats like JSON or self-describing messages. Avoid frameworks that
require code generation based on schema. Treat messages like data instead of objects, and you are going to have a better
time supporting very old formats.

Extract "*policy proxy*", questions of ownership and access control can be factored out of the service itself into a
more centrally controlled location.

Use URL dualism to support many databases by using URLs as both the item identifier and a resolvable resource. Be
careful you should be able to verify that whatever you receive back is something you generated.

One of the basic enterprise architecture patterns is the "Single System of Record". The idea is that any particular
concept should originate in exactly one system, and that system will be enterprise-wide authority on entities within
that concept.

We need to be careful about exposing internal concepts to other systems. It creates semantics and operational coupling
that hinders future change.

## Chapter 17: Chaos Engineering

Chaos engineering - the discipline of experimenting on a distributed system in order to build confidence in the system's
capability to withstand turbulent conditions in production. Staging or Qa environments aren't much of a guide to the
large-scale behaviour of systems in production.

Congested networks behave in a qualitatively different way than uncontested ones. Systems that work in a lo-latency,
low-loss network mat break badly in a congested network. Related paradox - *Volkswagen microbus* - you learn how to fix
the things that often break. You don't learn how to fix the things that rarely break. But that means when they do break,
the situation is likely to be more dire. We want a continuous low level of breakage to make sure our system can handle
the big things.

We use chaos engineering the way a weightlifter uses iron: to create tolerable levels of stress and breakage to increase
the strength of the system over time.

At Netflix, chaos is an opt-out process. That means every service in production will be subject to Chaos Monkey. Other
companies adopting chaos engineering have chosen an opt-in approach. When you are adding chaos engineering to an
organisation, consider starting with opting-in.

You must be able to break the system without breaking the bank. It that is not the case, chaos engineering is not for
you.

> If you have a wall full of green dashboards, that means your monitoring tools aren't good enough. There is always
> something weird going on.

Make sure you have a recovery plan. The system may not automatically return to a healthy state when you turn off the
chaos. You need to know what to restart, disconnect or clean up.

Chaos Monkey does one kind of injection - it kills instances (randomly). There are different types of monkeys: Latency
Monkey, Janitor Monkey, Chaos King, ...

Killing instances is the most basic and crude kind of injection. It will absolutely find weaknesses in your system.

Netflix uses failure injection testing (FIT). FIT can tag a request at the inbound edge with a cookie that says, e.g. "
Does the line, this request is going to fail when service G calls service H". Netflix uses a common framework for all
its outbound service calls, so it has a way to propagate this cookie and treat it uniformly.

High-reliability organisations use drills and simulations to find the same kind of systematic weaknesses in their human
side as in the software side. You can make this more fun by calling it a "*zombie apocalypse simulation*". Randomly
select 50% of your people and tell them they are zombies for the rest of the day.

After the simulation review the issues. 


================================================
FILE: books/system-design-interview.md
================================================
[go back](https://github.com/pkardas/learning)

# System Design Interview

Book by Alex Xu & Sahn Lam

- [1. Proximity Service](#1-proximity-service)

## 1. Proximity Service


================================================
FILE: books/tidy-first.md
================================================
[go back](https://github.com/pkardas/learning)

# Tidy First?

Book by Kent Beck

- [1. Guard Classes](#1-guard-classes)
- [2. Dead code](#2-dead-code)
- [3. Normalize symmetries](#3-normalize-symmetries)
- [4. New Interface, Old implementation](#4-new-interface-old-implementation)
- [5. Reading Order](#5-reading-order)
- [6. Cohesion Order](#6-cohesion-order)
- [7. Move Declaration and Initialization Together](#7-move-declaration-and-initialization-together)
- [8. Explaining variables](#8-explaining-variables)
- [9. Explaining constants](#9-explaining-constants)
- [10. Explicit parameters](#10-explicit-parameters)
- [11. Chunk statements](#11-chunk-statements)
- [12. Extract helper](#12-extract-helper)
- [13. One pile](#13-one-pile)
- [14. Explaining comments](#14-explaining-comments)
- [15. Delete redundant comments](#15-delete-redundant-comments)
- [16. Separate Tidying](#16-separate-tidying)
- [17. Chaining](#17-chaining)
- [17. Chaining](#18-batch-sizes)
- [18. Batch Sizes](#18-batch-sizes)
- [19. Rhythm](#19-rhythm)
- [20. Getting Untangled](#20-getting-untangled)
- [21. First, After, Later, Never](#21-first-after-later-never)
- [22. Beneficially Relating Elements](#22-beneficially-relating-elements)
- [23. Structure and behavior](#23-structure-and-behavior)

## 1. Guard Classes

If you see code like:

```
if condition: ...
```

or

```
if condition:
    if another condition: ...
```

tidy the above to:

```
if not condition: return
if not another condition: return
...
```

Exit immediately, it is easier to read -- before we get into the details, there are some preconditions we need to bear
in mind.

https://github.com/Bogdanp/dramatiq/pull/470

## 2. Dead code

Delete it. If you need it later, use version control. Delete only a little code in each tidying diff. Just in case, if
it turns out that you were wrong, it will be easy to rever the change.

## 3. Normalize symmetries

Tidy forms of unnecessary variations. Use common style for your functions. Things get confusing when two or more
patterns are used interchangeably.

## 4. New Interface, Old implementation

If some interface you need to use is very difficult to use, implement the interface you wish you could call and call it.
Implement the interface by simply calling the old one.

## 5. Reading Order

Reorder the code in the file in the order in which a reader would prefer to encounter it.

## 6. Cohesion Order

If 2 functions are coupled, put them next to each other, if 2 files are coupled, put them in the same directory, ...

If you know how to eliminate coupling, go for it.

## 7. Move Declaration and Initialization Together

It is easier to understand the code if each of the variables is declared and initialized just before it's used. It is
hard to read when declaration is separated from initialization.

## 8. Explaining variables

When you understand a part of a big, hairy expression, extract the subexpression into a variable named after the
intention of the expression.

Always separate the tidying commit from the behaviour change commit.

## 9. Explaining constants

Create a symbolic constant. Replace uses of the literal constant with the symbol.

## 10. Explicit parameters

It's common to see blocks of parameters passed in a map. This makes it hard to read and understand what data it
required. Make the parameters explicit:

```
foo(params) -> foo(a, b)
```

## 11. Chunk statements

The simplest tidying. Put a blank line between 2 parts doing different things. After you've chunked statements, you have
many paths forward: Explaining Variables, Extract Helper or Explaining Comments.

## 12. Extract helper

A block of code that has an obvious purpose and limited interaction with the rest of the code can be extracted into a
helper function. Using the helper can be taken care of in another tidying.

## 13. One pile

Sometimes you read the code that has been split into many tine pieces, which makes it hard to understand. The biggest
cost of code is the cost of reading it, not the cost of writing it.

Sometimes in order to regain the clarity, the code must be merged together, so new, easier-to-understand parts can be
extracted.

## 14. Explaining comments

Write down only what wasn't obvious from the code. Put yourself in the place of the future reader, or yourself 15
minutes ago.

Immediately upon finding a defect is a good time to comment. It is much better to add the comment that points out the
issue, rather than leaving it buried in the sand.

## 15. Delete redundant comments

When you see a comment that says exactly what the code says, remove it.

## 16. Separate Tidying

Tidying should go into their own separate PRs, with as few tidyings per PR as possible. Behavior and structure changes
should be in separate PRs.

## 17. Chaining

Tidying can set up another tidyings. You will begin to flow tidyings together to achieve larger changes to the
structure of your code. Be wary of changing too much, too fast. A failed tidying is expensive relative to the cost of a
series of successful tidyings.

## 18. Batch Sizes

The more tidyings per batch, the longer the delay before integrating, and the greater the chance that a tidying collides
with someone else is doing.

The change of a batch accidentally changing behavior rises with the number of tidyings in the batch.

The more tidyings per batch, the more we are prone to tidying just because, with all the additional costs that creates.

In many orgs, the fixed cost of getting a single change through review and deployment is substantial. Programmers feel
this cost, so they move right in the trade-off space (despite collisions, interactions, ...).

## 19. Rhythm

More than an hour of tidying at a time before making a behavioral change likely means you've lost track of the minimum
set of structure changes needed to enable your desired behavior change.

Tidying is a minutes-to-an-hour kind of activity. Sometimes it may take longer, but not for long.

## 20. Getting Untangled

Tidying leads to more and more tidying. What to do? 3 options:

1. Ship as it is [very impolite, prone to errors, but quick]
2. Untangle the tidyings into separate PRs [more polite, but may require a lot of work]
3. Start over, tidying first [more work, but leaves a coherent chain of commits]

Re-implementation raises the possibility that you will see something new as you re-implement, letting you squeeze more
value out of the same set of behavioral changes.

## 21. First, After, Later, Never

**Never**

- you are never changing this code again
- there is nothing to learn by improving the design

**Later**

- you have a big batch of tidying to do without immediate payoff
- there is eventual payoff for completing the tidying
- you can tidy in little batches

**After**

- waiting until next time to tidy first will be more expensive
- you won't feel a sense of completion if you don't tidy after

**First**

- it will pay off immediately, either in improved comprehension or in cheaper behavior changes
- you know what to tidy and how

## 22. Beneficially Relating Elements

Software design is beneficially relating elements.

Elements: Tokens -> Expressions -> Statements -> FUnctions -> Objects/modules -> Systems. Elements have boundaries.

Relating: In software design we have a handful of relations like:

- invokes
- publishes
- listens
- refers

Beneficially relating elements. Software designers can only:

- Create and delete elements
- Create and delete relationships
- Increase the benefit of a relationship

```
caller()
    return box.width() + box.height()
```

This function has 2 relationships with the box. This relationship can be adjusted. we can have `box.area()`.

```
caller()
    return box.area()
```

The benefit is that it is simpler and the cost is that `box` has additional method.

## 23. Structure and behavior

Software creates value in two ways:

- what it does today
- the possibility of new things we can make it do tomorrow

Behavior creates value. Rather than having to calculate a bunch of numbers by hand, the computer can calculate millions
of the every second. If running software costs $1, you can charge folks $10 to run it on their behalf, then you have a
business.

The structure creates options. The structure could make it easy to add new features to our system, or it could make it
hard.

## 24. Economics: Time Value and Optionality

- A dollar today is worth more than a dollar tomorrow, so earn sooner and spend later
    - you can't spend it so it's worthless
    - you can't invest it
    - there's some chance that you won't get the dollar
    - in the scope of this book: the time value of money encourages tidy after over tidy first
- In a chaotic situation, options are better than things, so create options in the face of uncertainty

Software design has to reconcile the imperatives of "earn sooner/spend later" abd "create options, not things".


================================================
FILE: books/understanding-distributed-systems.md
================================================
[go back](https://github.com/pkardas/learning)

# Understanding Distributed Systems: What every developer should know about large distributed applications 

Book by Roberto Vitillo


================================================
FILE: case-studies/reddit.md
================================================
[go back](https://github.com/pkardas/learning)

# How Reddit mastered managing growth

*Presentation by Greg Taylor*

330M monthly active users. 8th most popular website in the World. 12M posts per month. 2B votes per month.

Reddit in 2016 - small engineering team with a monolith application. The Infrastructure team was responsible for
provisioning and configuring all infrastructure, operating most of the systems and handling non-trivial debugging.
Static infrastructure. This approach worked for more than a decade.

In 2016 team started rapidly growing. But monolith application was so fragile, every deploy was an adventure - blocker
for the organisation.

How to make everyone's life easier? How to onboard new employees?

Reddit decided to pursue with SOA - Service-Oriented-Architecture. This gave better separation of concerns between
teams. However, if you have a monolith, and it works well for you: "go home, give it a hug, tell it you love it, warts
and all".

Growing pains: Automated tests - they started using CI, master branch always had to be green.

Growing pains: Something to build on - instead of copying and pasting services out from another they needed to have a
service framework to base off of. Services are configured in the same way, they expose similar set of ports, they have
the same async event loop, they fetch secrets the same way, ... - baseplate.readthedocs.io

Growing pains: Artisanal infrastructure - they had hand-crated infrastructure, switched to Terraform (infrastructure as
code) - reusable modules - really valuable. Pulling existing infrastructure to Terraform was painful.

Growing pains: Staging/integration woes - their approach for staging was inappropriate for SOA, so they started using
Kubernetes.

Growing pains: Infra team as a bottleneck - everything was depending on the infrastructure team, so they gave developers
more freedom to modify Terraform. Not all teams want to operate the full stack for their service.

Service ownership, service owner is empowered to:

- Dev and test their service in a prod-like env
- Do most of the work to get to production
- Own the health of their service
- Diagnose issues

Service ownership comes with some challenges: you need to train developers and still there are mistakes going to happen.
Mistakes are learning opportunities.

How to build infrastructure as product? Service owners - learn some Kubernetes basics, deploy and operate their own
services. Reddit Infrastructure - Keep the Kubernetes cluster running, provision AWS resources, support and advise
Service owners.

Engineers instead of learning entire stack, had to learn only one technology - Kubernetes. If developer needs e.g. S3 -
infra engineer is responsible for providing this.

Batteries included - engineers do not have to worry about logging, secrets, security, ... - everything is out of the
box.

Extensive documentation and training for developers. Without it, you don't have a product, you have a pile of
technology.

> An engineer should not require deep infra experience in order to be productive.

Preventing damage: resource limits, throttling, network policy, access controls, scanning for common mistakes, docker
image policies


================================================
FILE: conferences/aws-innovate-ai-ml-21.md
================================================
[go back](https://github.com/pkardas/learning)

# AWS Innovate: AI/ML Edition 2021

- [Move and scale your ML experiments in the cloud](#move-and-scale-your-ml-experiments-in-the-cloud)
- [Detect potential bias in your datasets and explain how your models predict](#detect-potential-bias-in-your-datasets-and-explain-how-your-models-predict)
- [Deploy state-of-the-art ML models and solutions in a single click](#deploy-state-of-the-art-ml-models-and-solutions-in-a-single-click)

Online conference took part on 24.02.2021, I participated in a couple of talks.

## Move and scale your ML experiments in the cloud

Machine learning experiments (labeling the data, storage, sharing, saving, tuning parameters) can be done in Amazon
SageMaker IDE - secure, scalable, compliant solution - DevOps ready solution.

**How to start?** We usually start with local notebooks, which are not powerful enough. You could move your Jupiter
Notebook to the cloud (doing it on your own - a lot of maintenance), we can do better.

DEMO:

1. Just go to the SageMaker page on AWS
2. Open SageMaker Studio (limitation: one instance per region)
3. We are going through Standard setup:
    1. Authentication method selection (SSO or IAM)
    2. Permissions: which resources it can access - e.g. storage, by default SageMaker has access to any bucket with "
       sagemaker" in the name
    3. You can make your notebook shareable
    4. Network and storage definitions - VPC or Public Internet, security groups, encryption
    5. You can add your tags to identify resources
4. Setup will take a few minutes

You can open the application. This is literally JupyterLab. You can copy for example GitHub repo there and run the
notebooks (it has git integration, so switching between branches is easy). You can easily switch machines, largest:
488GB of RAM!

![aws-innovate-ai-ml-21-1](../_images/aws-innovate-ai-ml-21-1.png)

Example training:

![aws-innovate-ai-ml-21-2](../_images/aws-innovate-ai-ml-21-2.png)

SageMaker is not just a notebook - it allows for data preparation, building models, training, tuning and deployment.

## Detect potential bias in your datasets and explain how your models predict

Bias - unfair representation of reality, as we use datasets, there is a risk, that data we use does not represent
reality.

Explainability - complex models, hard to understand why model came up with a prediction (e.g. deep learning). We need to
know why model came up with certain decision, e.g. medicine, legal obligations.

**How to solve these issues?**

We used some dataset that have the following columns: age, sex, skin colour, ... Zooming in on sex: 1/3 female, 2/3
males - imbalanced. Zoom even more, 1:7 for sex earnings with >50k USD. Model can be biased towards overrepresented
group.

So the first approach is to visualise the data to detect bias. But AWS has something better.

**Analysis using Amazon SageMaker Clarify**

Bias analysis: pre-training analysis and post-training analysis. We define "potential" biased group: `faced_name="Sex"`.
Results are displayed in a nice charts (many awesome metrics):

![aws-innovate-ai-ml-21-3](../_images/aws-innovate-ai-ml-21-3.png)

It also outputs report in HTML and Jupyter Notebook.

**Explainability** - it uses SHAP 🎉 https://github.com/slundberg/shap

For explainability AWS outputs similar report:

![aws-innovate-ai-ml-21-4](../_images/aws-innovate-ai-ml-21-4.png)

## Deploy state-of-the-art ML models and solutions in a single click

SageMaker Studio. Problem: text analysis, there are 60 models prepared for text analysis. We can select one, e.g.
trained on Wikipedia. Then we can deploy the model, we can fine tune the model - we need to provide the dataset in a
special format. Model has an endpoint, which can be tested in the Jupyter Notebook.

![aws-innovate-ai-ml-21-5](../_images/aws-innovate-ai-ml-21-5.png)

We have a notebook, but we can not give it to the Product Managers, that is why we can integrate it with for example an
UI. There are libraries for the integration with JavaScript. Example: banana slicer review from Amazon:

![aws-innovate-ai-ml-21-6](../_images/aws-innovate-ai-ml-21-6.png)

New data flow - tool for preparing a new data. Then you can pass the data to the model to train.

**Remember to shut down the endpoint because you pay for it $$$.**


================================================
FILE: conferences/brown-bags.md
================================================
[go back](https://github.com/pkardas/learning)

- [NLP - State of the Art](#nlp---state-of-the-art)
- [Kanban Training](#kanban-training)

## NLP - State of the Art

*By Michał Jakóbczyk*

Turing Test - are you able to distinguish if you are talking to a computer or a person? It determined the direction of
development of NLP.

> The Man Who Mistook His Wife For a Hat - Olivier Sacks - book recommendation.

Analyse sentence:

```python
from spacy import displacy

displacy.render(nlp("Some sentence"))
```

"They ate the pizza with anchovies" - context matters (with fishes or using fishes?).

"They ate the pizza with hands"

"I shot an elephant in my pyjamas" - model will refer pyjama to the elephant.

"I shot an elephant, in my pyjamas" - model will refer pyjama to the person.

We know about these differences! Models have difficulties.

40-50 years ago, NLP was mostly about POS tags analysis, recently is more about machine learning.

Python code -> Assembler <- Machine learning model. In the end everything is Assembly.

*playground.tensorflow.org* - 1 square = 1 neuron that is basically checking one if / one line.

Text to number:

- document vectorisation - if document contains word - 1, 0 otherwise
- one-hot encoding - you can use it for encoding word position (2D matrix) - a lot of memory
- word embeddings - place word in a multidimensional space
    - adding vectors - drawing a multidimensional sphere containing multiple words
    - *projector.tensorflow.org*

We can compare sentences using embeddings.

```python
nlp("Gave a research talk in Boston").similarity(nlp("Had a science lecture in Seattle"))
```

Training is done using input text, then every word is removed (word by word) and machine is supposed to guess missing
word.

GPT-3 - the biggest transformer, almost 5M$ spent on training this model

## Kanban Training

*By Marcin Lelek*

https://tools.kaiten.io/featureban

KANBAN - card + signal, name of the board, method for implementing improvements requested by client. Created by Toyota.

3 rules:

- stat with what you do now
- gain agreement to evolutionary change (don't make changes against people, agree on change)
- encourage acts of leadership at all levels (independent teams)

General practices:

- you need to have a board to visualise progress
- number of items in Work In Progress is limited
- manage flow - work flow management, not people optimisation
- make policies explicit - define policy how to treat a card in a column, e.g. when card moves from one column to
  another
- implement feedback loops
- improve collaboratively
- evolve experimentally

Different levels of Kanban boards - e.g. 1 WIP per person.


================================================
FILE: conferences/pycon-2022.md
================================================
[go back](https://github.com/pkardas/learning)

- [[EN] Don’t use a lot where a little will do. A story of programming tricks you wish you invented](#en-dont-use-a-lot-where-a-little-will-do-a-story-of-programming-tricks-you-wish-you-invented)
- [[EN] Effective data science teams with databooks](#en-effective-data-science-teams-with-databooks)
- [[PL] Poetry - poezja pythonowych pakietów](#pl-poetry---poezja-pythonowych-pakietw)
- [[EN] Interfaces in Python. The benefits and harms](#en-interfaces-in-python-the-benefits-and-harms)
- [[EN] Observability in backends with Python and OpenTelemetry](#en-observability-in-backends-with-python-and-opentelemetry)
- [[EN] Hitchhiker's guide to typing](#en-hitchhikers-guide-to-typing)
- [[EN] Lightning talks](#en-lightning-talks)
- [[PL] Dzielenie monolitu w praktyce](#pl-dzielenie-monolitu-w-praktyce)
- [[EN] pytest on steroids](#en-pytest-on-steroids)
- [[EN] Music information retrieval with Python](#en-music-information-retrieval-with-python)

## [EN] Don’t use a lot where a little will do. A story of programming tricks you wish you invented

Regex has a debug mode - `re.DEBUG`

Python has a built-in HTTP server capable of serving static files from the current directory.

## [EN] Effective data science teams with databooks

`databooks` - a tool for dealing with notebooks (automatic conflict resolution, metadata stripping, pre-commit hooks,
printing a notebook in a terminal, pretty printing git diff)

Architecture as Code (AaC) with Python or way to become your own boss

Prototyping and visualization of system architecture using code.

`diagrams` - a library for creating diagrams from Python

## [PL] Poetry - poezja pythonowych pakietów

Narzędzie do zarządzania zależnościami, a także do tworzenia pakietów oraz ich publikacji.

Może zastąpić `pip` czy `virtualenva`.

Wersjonowanie semantyczne: `major.minor.patch`

## [EN] Interfaces in Python. The benefits and harms

Abstract classes in Python - ABC

Sequence - any collection implementing 2 methods (length and getter)

Dependency Injection - passing parameters directly to for example init method.

## [EN] Observability in backends with Python and OpenTelemetry

Trace - a JSON object, can travel between services. Simple types (int, bool, lists, ...)

Auto-instrumentation - install a couple of libraries, run the command, done.

Manual-instrumentation - via a context manager or a decorator inside the code.

Distributed tracing with queues - context of the trace is going to be part of the massage that you enqueue.

Jagger - one of tools compatible with OpenTelemetry.

unicorn has a separate thread for OpenTelemetry data.

## [EN] Hitchhiker's guide to typing

urllib3 case study: https://sethmlarson.dev/blog/tests-arent-enough-case-study-after-adding-types-to-urllib3

## [EN] Lightning talks

GitHub Actions are capable of running cron jobs.

Idea: when learning a new language, rewrite an existing command line tool in selected language.

## [PL] Dzielenie monolitu w praktyce

Kryteria sukcesu wydzielania mikroserwisu: chce szybko widzieć efekty, moc wycofać się w każdym momencie, testować
system z ruchem produkcyjnym, ALE nie chce zepsuć produkcji, chce moc wrócić do starego rozwiązania, jak najmniej
zmieniać w monolicie.

Przekształcanie monolitu w mikroserwisu: wydzielenie interfejsu w monolicie, stworzenie mikroserwisu z identycznym
interfejsem, dodanie nowej implementacji w monolicie korzystającej z nowego serwisu. Gdy przychodzi zapytanie możemy je
wysyłać do dwóch miejsc, ostateczna odpowiedź powinna pochodzić ze starego systemu, po okresie testów przełączamy się na
nowe rozwiązanie.

![pycon-2022-monolith](../_images/pycon-2022-monolith.jpeg)

Strangler Pattern - nazwa pochodzi od rośliny, która pasożytuje na drzewie, wykorzystuje je żeby rosnąć w górę po czym
je zabija.

Działanie w Shadow Mode - wydzielenie mikroserwisu, zebranie zapytań i wyników.

## [EN] pytest on steroids

Everything in pytest is a plugin. When you create a fixture you create a local plugin.

## [EN] Music information retrieval with Python

`pedalbord` by Spotify - a python library for audio effects

`Pyo` - audio synthesis engine, effects control, implementing loopers, used in live music

![pycon-2022-apis](../_images/pycon-2022-apis.jpeg)

`ISMIR dataset` - various datasets with music, lyrics, ...

`mirdata` - a Python wrapper for ISMIR datasets

`Librosa` - a library for music analysis

In general, there are plenty tools for music analysis, which then can be used to train ML models.

![pycon-2022-music-tagging](../_images/pycon-2022-music-tagging.jpeg)

![pycon-2022-source-separation](../_images/pycon-2022-source-separation.jpeg)

![pycon-2022-source-separation-1](../_images/pycon-2022-source-separation-1.jpeg)

![pycon-2022-transcription](../_images/pycon-2022-transcription.jpeg)

Music recommendations: very complex, massive business and cultural impact:

![pycon-2022-music-recommendations](../_images/pycon-2022-music-recommendations.jpeg)

Generating music - neural audio synthesis or symbolic composition (then needs to be played by a human).

Links:
- https://openai.com/blog/jukebox/
- https://youtu.be/bXBliLjImio
- https://youtu.be/MwtVkPKx3RA
- https://youtu.be/tgq1YTQ2c0s
- https://magenta.tensorflow.org


================================================
FILE: courses/fast-ai.md
================================================
[go back](https://github.com/pkardas/learning)

# Practical Deep Learning for Coders

Course -> https://course.fast.ai/

[TOC]

## Lesson 1

Truth, to start with Deep Learning:

- high school math is sufficient
- there is no need for enormous amounts of data
- no need for expensive hardware for basic usage

1961 first machine built on top of mathematical model from 1943. Heavily criticised by Minsky - example that artificial
neural network could not learn simple XOR. Global academic gave up on neural networks.

1986 MIT released a paper defining requirements for building and using neural networks. Later researchers proved, that
adding additional layers of neural networks is enough to approximate any mathematical model. But in fact these models
were too slow and too big to be useful.

**What is ML?** Like regular programming, a way to get computers to complete a specific task. Instead of telling the
computer the exact steps to solve a problem, show it examples of the problem to solve and let it figure out how to solve
it itself.

*Neural network* - parametrised function that can solve any problem to any level of accuracy (in theory - *universal
approximation theorem)*.

What does it mean to train neural network? It means finding good weights. This is called **SDG**. SDG - Stochastic
Gradient Descent.

Neural Networks work using patterns, need labeled data and create PREDICTIONS not recommended actions.

You need to be super careful what is the input data (initial bias, stereotypic data) will produce biased results. E.g.
marihuana consumption is equal amon whites and blacks, but black people are mor often arrested for marijuana possession.
Given biased input data will produce biased predictions, e.g. send more police officers to black neighbourhoods.

Segmentation - marking areas on images (trees, cars, ...)

## Lesson 2

When you want to predict a category you are facing a classification problem. Whenever you want to predict a number you
are dealing with regression problem.

```python
learn = cnn_learner(data, architecture, metric)
```

Architecture - e.g. *resnet32, resnet64* - name of the architecture (64 layers) - function that we are optimising.

Epoch - e.g. looking at every image in the training set = 1 epoch, 1 loop

Metric - function measuring quality of the model's predictions (*error_rate, accuracy*), we care about it.

Loss != Metric, loss - computer uses this to update parameters, computer cares about it. For example tweaking parameters
just a little might not change accuracy or error rate.

Model might cheat - "I have seen this image, this is a cat", we don't want model to memorise images. That is why we need
splitting into training and validation. For validating time-series, you should not removed e.g. 20% of the data, instead,
drop off the end and let the model predict e.g. next 2 weeks.

*Transfer learning* - using a pretrained model for a task different to what it was originally trained for. Take
pretrained (initial weights), add more epochs on your specific dataset and you will end up with way more better model.

*Fine tuning* - transfer learning technique where the weights of pretrained model are updated by training for additional
epochs using different task to that used for pretraining.

You can take advantage of pretrained feature - e.g. dog faces, patterns, etc.

Computer Vision can be used for variety of problems, e.g. sound, virus analysis (data transformed into images).

![fast-ai-1](../_images/fast-ai-1.png)

Set of pretrained models: https://modelzoo.co/

*How to decide if there is a relationship?*

*Null hypothesis* - e.g. "no relationship between X and Y" -> gather data -> how often do we see a relationship?

*P-Value* - probability of an observed result assuming that the null hypothesis is true.

## Lesson 3

Square images are easier to process, you need to remember the length of only one dimension. `Squishing` is the most
efficient method for resizing, because cropping removes information, adding black bars wastes computations. Another most
common method is `Random Resize Crop` - few batches, different parts of the image are taken

ImageClassifierCleaner - utility tool (GUI) for finding examples, classifier is least confident about. You can manually
improve labelling.

`VBox` - you can group multiple widgets together and create prototype application in notebook.

`viola` - plugin for hiding cells with code, only inputs and outputs are visible. Add `viola` to the URL, and it will
display an application-like website in the browser. Great for prototyping.

mybinder.org - you can turn notebook from GitHub into a publicly available web application.

*Healthy skin* example - bing returns images of a young white woman - bias!

Book recommendation: *Building Machine Learning Powered Applications*

Feedback loop - e.g. predictive policing - system that sends police - feedback loops can result in negative implications
of that bias getting worse and worse - e.g. you send police to the same place over and over.

FastPages - dump notebook into a page.

Recognising hand written digits (MNIST) was considered challenging problem ~20 years ago. Baseline idea: compare model /
ideal number with input - for MNIST, calculate average of the training set, on validation set - calculate distance (~95%
accuracy). Baseline should be something simple to implement - then you build something on top of it.

Broadcasting - if shapes of 2 elements don't match, e.g. A (1010, 28, 28) - B (28, 28), B will be subtracted from every
1010 items from A.

PyTorch has engine for calculating derivatives. In PyTorch `_`  at the end of the method means "method in place".

Learning rate - size of a step in gradient descent

## Plant Pathology

https://www.kaggle.com/c/plant-pathology-2021-fgvc8/overview

```python
import csv
from fastai.vision.all import *
from fastai.metrics import error_rate,
    accuracy

path = Path("/kaggle/input/plant-pathology-2021-fgvc8")

# Prepare data, labels are stored separately:
with open(path / "train.csv", mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)

    train_labels = {
        row["image"]: row["labels"]
        for row in csv_reader
    }


# Function used for labeling images:
def label_func(file_path: Path) -> str:
    return train_labels[str(file_path).split('/')[-1]]


# Read data:
data_block = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    get_y=label_func,
    item_tfms=Resize(224)
)

# DataBlock to DataLoader:
data_loaders = data_block.dataloaders(path / "train_images")

# Available classes:
data_loaders.vocab

# Few example images:
data_loaders.show_batch()

# ResNet34 architecture for image classification:
learner = cnn_learner(data_loaders, models.resnet34, metrics=error_rate)

# 4 epochs, unfortunately one epoch takes ~1h most probably because of incorrect use of 'item_tfms' in DataBlock, which disables GPU usage:
learner.fine_tune(4)

# Model validation, this model achieved 0.62 error_rate. 
interpretation = ClassificationInterpretation.from_learner(learner)
interpretation.plot_confusion_matrix()
interpretation.plot_top_losses(5, nrows=1, figsize=(25, 5))

# Saving model:
learner.export()
```


================================================
FILE: patterns/abbreviations.md
================================================
[go back](https://github.com/pkardas/learning)

# Abbreviations

- [SOLID](#solid)
- [DRY - Don't Repeat Yourself](#dry---dont-repeat-yourself)
- [KISS - Keep It Simple, Stupid](#kiss---keep-it-simple-stupid)
- [ACID](#acid)
- [BASE](#base)
- [CAP](#cap)
- [NF](#nf)

## SOLID

### SRP - Single Responsibility Principle

A class should have only one reason to change, so in order to reduce reasons for modifications - one class should have
one responsibility. It is a bad practise to create classes doing everything.

Why is it so important that class has only one reason to change? If class have more than one responsibility they become
coupled and this might lead to surprising consequences like one change breaks another functionality.

You can avoid these problems by asking a simple question before you make any changes: What is the responsibility of your
class / component / micro-service? If your answer includes the word “and”, you’re most likely breaking the single
responsibility principle.

### OCP - Open-Closed Principle

Classes, modules, functions, etc. should be open to extension but closed to modification.

Code should be extensible and adaptable to new requirements. In other words, we should be able to add new system
functionality without having to modify the existing code. We should add functionality only by writing new code.

If we want to add a new thing to the application and we have to modify the "old", existing code to achieve this, it is
quite likely that it was not written in the best way. Ideally, new behaviors are simply added.

### LSP - Liskov Substitution Principle

This rule deals with the correct use of inheritance and states that wherever we pass an object of a base class, we
should be able to pass an object of a class inheriting from that class.

Example of violation:

```python
class A:
    def foo() -> str:
        return "foo"


class B(A):
    def foo(bar: str) -> str:
        return f"foo {bar}"
```

B is not taking the same arguments, meaning A and B are not compatible. A can not be used instead of B, and B can not be
used instead of A.

### ISP - Interface Segregation Principle

Clients should not be forced to depend upon interfaces that they do not use. ISP splits interfaces that are very large
into smaller and more specific ones so that clients will only have to know about the methods that are of interest to
them.

Example of violation:

```python
class Shape:
    def area() -> float:
        raise NotImplementedError

    def volume() -> float():
        raise NotImplementedError
```

2D triangle does not have volume, hence it would need to implement interface that is not needed. In order to solve this,
there should be multiple interfaces: Shape and 3DShape.

### DIP - Dependency Inversion Principle

High-level modules, which provide complex logic, should be easily reusable and unaffected by changes in low-level
modules, which provide utility features. To achieve that, you need to introduce an abstraction that decouples the
high-level and low-level modules from each other.

> Entities must depend on abstractions, not on concretions. It states that the high-level module must not depend on the 
> low-level module, but they should depend on abstractions.

For example password reminder should not have knowledge about database provider (low level information).

## DRY - Don't Repeat Yourself

"Every piece of knowledge must have a single, unambiguous, authoritative representation within a system". When the DRY
principle is applied successfully, a modification of any single element of a system does not require a change in other
logically unrelated elements.

## KISS - Keep It Simple, Stupid

The KISS principle states that most systems work best if they are kept simple rather than made complicated; therefore,
simplicity should be a key goal in design, and unnecessary complexity should be avoided.

## ACID

### Atomicity

Each transaction is either properly carried out or the process halts and the database reverts back to the state before
the transaction started. This ensures that all data in the database is valid.

### Consistency

A processed transaction will never endanger the structural integrity of the database. Database is always in consistent
state.

### Isolation

Transactions cannot compromise the integrity of other transactions by interacting with them while they are still in
progress.

### Durability

The data related to the completed transaction will persist even in the cases of network or power outages. If a
transaction fails, it will not impact the manipulated data.

## BASE

### Basically Available

Ensure availability of data by spreading and replicating it across the nodes of the database cluster - this is not done
immediately.

### Soft State

Due to the lack of immediate consistency, data values may change over time. The state of the system could change over
time, so even during times without input there may be changes going on due to 'eventual consistency', thus the state of
the system is always 'soft'.

### Eventually Consistent

The system will *eventually* become consistent once it stops receiving input. The data will propagate to everywhere it
should sooner or later, but the system will continue to receive input and is not checking the consistency of every
transaction before it moves onto the next one.

## CAP

In theoretical computer science, the CAP theorem states that it is impossible for a distributed data store to
simultaneously provide more than two out of the following three guarantees:

### Consistency

Every read receives the most recent write or an error. Refers to whether a system operates fully or not. Does the system
reliably follow the established rules within its programming according to those defined rules? Do all nodes within a
cluster see all the data they are supposed to? This is the same idea presented in ACID.

### Availability

Every request receives a (non-error) response, without the guarantee that it contains the most recent write. Is the
given service or system available when requested? Does each request get a response outside of failure or success?

### Partition Tolerance

Represents the fact that a given system continues to operate even under circumstances of data loss or system failure. A
single node failure should not cause the entire system to collapse.

## NF

Database normalisation is the process of structuring a database, usually a relational database, in accordance with a
series of so-called normal forms in order to reduce data redundancy and improve data integrity.

### 1NF

To satisfy 1NF, the values in each column of a table must be atomic.

### 2NF

Must be in 1NF + single column primary key (no composite keys).

### 3NF

Must be in 2NF + no transitive functional dependencies.

Transitive Functional Dependencies - when changing a non-key column, might cause any of the other non-key columns to
change. For example:

![3nf-violation](../_images/3nf-violation.png)


================================================
FILE: patterns/architecture.md
================================================
[go back](https://github.com/pkardas/learning)

# Architecture Patterns

- [Command and Query Responsibility Segregation (CQRS)](#command-and-query-responsibility-segregation-cqrs)
- [Reporting Database](#reporting-database)
- [Event Sourcing](#event-sourcing)
- [Saga](#saga)

## Command and Query Responsibility Segregation (CQRS)

Based on: https://docs.microsoft.com/en-us/azure/architecture/patterns/cqrs, https://martinfowler.com/bliki/CQRS.html
, https://bulldogjob.pl/articles/122-cqrs-i-event-sourcing-czyli-latwa-droga-do-skalowalnosci-naszych-systemow_

This pattern separates read and update operations for a data store. Traditionally the same data model is used to query
and update a database. This might work well but for simple CRUD applications. For more complex applications, where there
are more advanced operations on read and write sides CQRS might be a better idea.

Commands update data, queries read data. Commands should be *task based*, rather than *data centric* (book hotel room
instead of set `reservation_status` to `reserved`). Queries *never* modify the database.

Usually whenever command updates data it is also publishing an event and this needs to be done within a single
transaction.

![patterns-architecture-cqrs-martin-fowler](../_images/patterns-architecture-cqrs-martin-fowler.png)

CQRS:

- you are able to scale Command and Query independently
- separate models for updating and querying might lead to eventual consistency
- suited for complex domains

## Reporting Database

Based on: https://martinfowler.com/bliki/ReportingDatabase.html

Set up second database for reporting purposes, this database is completely different from the operational (application)
database.

Reporting Database:

- designed specifically for reports

- can be denormalized, usually read-only - redundant information might speed up queries
- queries on the database don't add to the load on the operational database
- additional data might be derived from the operational database
- needs to be synced somehow with the main database (eg. sync data overnight or sync using events)

## Event Sourcing

Based on: https://docs.microsoft.com/en-us/azure/architecture/patterns/event-sourcing
, https://microservices.io/patterns/data/event-sourcing.html

> How to reliably/atomically update the database and publish messages/events?

Instead of maintaining current state, application can have a log of state changes. Whenever the state of a business
entity changes, a new event is appended to the list of events. Since saving an event is a single operation, it is
inherently atomic. The application reconstructs an entity’s current state by replaying the events.

The event log also behaves like message broker. When a service saves an event in the event store, it is delivered to all
interested subscribers.

> Event sourcing is commonly combined with the CQRS pattern by performing the data management tasks in response to the
> events, and by materialising views from the stored events.

In order to maintain consistency in multi-threaded applications, adding a timestamp to every event might help in
resolving issues, but not in all cases. Better approach is to label each event with an incremental identifier. If two
actions attempt to add events for the same entity at the same time, the event store can reject an event that matches an
existing entity identifier.

![patterns-architecture-event-sourcing-overview-microsoft](../_images/patterns-architecture-event-sourcing-overview.png)

This pattern is useful when:

- you want to capture intent, purpose, or reason in the data
- you want to record events that occur, and be able to replay them to restore the state of a system, roll back changes,
  or keep a history and audit log

Not useful when:

- small problems
- consistency and real-time updates to the views of the data are required
- history, and capabilities to roll back and replay actions are not required

Example: banking system - list of all transactions, basing on these transactions your total balance is calculated.

## Saga

Based on: https://microservices.io/patterns/data/saga.html

In a design where each service has its own database, sometimes transactions have to span multiple services, hence local
ACID transaction is not an option.

A solution to this problem is *Saga* - a sequence of local transactions. Each local transaction updates the database and
publishes a message or event to trigger the next local transaction in the saga. If a local transaction fails because it
violates a business rule then the saga executes a series of compensating transactions that undo the changes that were
made by the preceding local transactions.

For example: Service A creates a new Order with PENDING state and publishes an event that is consumed by another service
B, service B responds with an event to service A. Service A accepts or rejects new Order.

DON'T: Based on `Chapter 17: Microservices Architecture` @ `Fundamentals of Software Architecture`:

> Don't do transactions in microservices - fix granularity instead.


================================================
FILE: teaching/python-intermediate/README.md
================================================
[go back](https://github.com/pkardas/learning)

# Python Intermediate

Repository with the code and tasks: https://github.com/pkardas/shapes


================================================
FILE: teaching/python-intro/README.md
================================================
[go back](https://github.com/pkardas/learning)

# Introduction to Programming: Python for beginners

This folder contains the presentation and the notebook used during "Introduction to Programming: Python for beginners" classes. Training was intended for people with no prior programming skills. Each training was scheduled for 2 hours.

`presentation` - meeting agenda, topics, theory, examples

`notebook` - Jupyter Notebook with assignments, audience was supposed to fill in the gaps using provided theory and examples.


================================================
FILE: teaching/python-intro/notebook.ipynb
================================================
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Introduction to programming: Python for beginners.ipynb",
      "provenance": [],
      "collapsed_sections": [
        "rDyFlkw1DnX_",
        "lLgIUzF_PwR7",
        "jxT57KJoPSs3",
        "2eRI479WVjka",
        "4i61CcItIwUv",
        "jvxBKZp8nRZP",
        "6wz3rtllMw6k",
        "JOGgKUXDx356"
      ]
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "rDyFlkw1DnX_"
      },
      "source": [
        "# Task 1 \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6oNEjs4RbFo8"
      },
      "source": [
        "def airhelp() -> str:\n",
        "  return \"AirHelp\"\n",
        "\n",
        "airhelp()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-2d4bfTpaaUB"
      },
      "source": [
        "from datetime import date, timedelta\n",
        "\n",
        "def yesterday() -> date:\n",
        "  return date.today() - timedelta(days=1)\n",
        "\n",
        "yesterday()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "glbGi1MgD0RH"
      },
      "source": [
        "# Write a function with a name `hello_world` that **returns**: \"Hello world!\". Fill the gaps with Python code. \n",
        "\n",
        "def AAA() -> str:\n",
        "  return BBB\n",
        "\n",
        "hello_world()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lLgIUzF_PwR7"
      },
      "source": [
        "# Task 2: \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mCBTZQCZbaMD"
      },
      "source": [
        "def y(x: int) -> int:\n",
        "  return 2 * x\n",
        "\n",
        "y(10)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "e1GeGmhqbjIw"
      },
      "source": [
        "from typing import List\n",
        "\n",
        "def odds(numbers: List[int]) -> List[int]:\n",
        "  return [number for number in numbers if number % 2 != 0]\n",
        "\n",
        "odds([1, 2, 3, 4, 5, 6, 7, 8, 9])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "g8oV4dh_PwR8"
      },
      "source": [
        "# Write a function that greets the user, user's name is provided via a parameter. Return string with injected user name.\n",
        "\n",
        "def CCC(DDD: str) -> str:\n",
        "  return f\"Hello, {DDD} 👋\"\n",
        "\n",
        "print(hello(\"Kamil\")) # Hello, Kamil! 👋\n",
        "print(hello(\"Piotr\")) # Hello, Piotr! 👋\n",
        "print(hello(\"Marta\")) # Hello, Marta! 👋"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jxT57KJoPSs3"
      },
      "source": [
        "# Task 3: \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lLU8BZZSdRZa"
      },
      "source": [
        "class Person:\n",
        "  def __init__(self, name: str, surname: str, age: int) -> None:\n",
        "    self.name = name\n",
        "    self.surname = surname\n",
        "    self.age = age\n",
        "\n",
        "p0 = Person(\"Anja\", \"Rubik\", 37)\n",
        "p1 = Person(\"Elon\", \"Musk\", 49)\n",
        "\n",
        "def introduce_person(person: Person) -> str:\n",
        "  return f\"{person.name} is {person.age} years old.\"\n",
        "\n",
        "print(introduce_person(p0))\n",
        "print(introduce_person(p1))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9kZa8oAbPSs4"
      },
      "source": [
        "# Build a new data type - Message. Message should have 3 attributes: `content`, `sender_email` and `received_at`. Fill the gaps.\n",
        "\n",
        "from datetime import datetime\n",
        "\n",
        "class Message:\n",
        "  def EEE(self, FFF: str, GGG: str, HHH: datetime) -> None:\n",
        "    self.FFF = FFF\n",
        "    self.GGG = GGG\n",
        "    self.HHH = HHH\n",
        "\n",
        "m0 = Message(\"Hello! How are you?\", \"adam@gmail.com\", datetime(2021, 4, 21, 12, 0, 0))\n",
        "m1 = Message(\"I am fine!\",          \"dan@gmail.com\",  datetime.utcnow())\n",
        "\n",
        "print(m0.content, m0.sender_email, m0.received_at)\n",
        "print(m1.content, m1.sender_email, m1.received_at)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2eRI479WVjka"
      },
      "source": [
        "# Task 4: \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1u5ekl3OfHoJ"
      },
      "source": [
        "class Person:\n",
        "  def __init__(self, name: str, surname: str, age: int) -> None:\n",
        "    self.name = name\n",
        "    self.surname = surname\n",
        "    self.age = age\n",
        "\n",
        "  def introduce(self) -> str:\n",
        "    return f\"{self.name} is {self.age} years old.\"\n",
        "\n",
        "p0 = Person(\"Anja\", \"Rubik\", 37)\n",
        "p1 = Person(\"Elon\", \"Musk\", 49)\n",
        "\n",
        "print(p0.introduce())\n",
        "print(p1.introduce())"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8RzdyrbNe2yx"
      },
      "source": [
        "# Extend `Message`. Add a method that will return message language."
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gzrXvaK_WmxA"
      },
      "source": [
        "! pip install langdetect"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "DpnviQcAVjkc"
      },
      "source": [
        "from langdetect import detect\n",
        "\n",
        "class Message:\n",
        "  def __init__(self, content: str, sender_email: str, received_at: datetime) -> None:\n",
        "    self.content = content\n",
        "    self.sender_email = sender_email\n",
        "    self.received_at = received_at\n",
        "    \n",
        "  @property\n",
        "  def language(self) -> str:\n",
        "    return detect(self.JJJ).upper()\n",
        "\n",
        "m0 = Message(\"Hi Johny.\",            \"adam@gmail.com\", datetime(2021, 4, 21, 12, 0, 0))\n",
        "m1 = Message(\"こんにちは、Akikoさん。\", \"dan@gmail.com\",  datetime(2021, 4, 21, 13, 0, 0))\n",
        "\n",
        "print(f\"'{m0.content}' is in {m0.language}\")  # This should print: \"'Hi Johny.' is in EN\"\n",
        "print(f\"'{m1.content}' is in {m1.language}\")  # This should print: \"'こんにちは、Akikoさん。' is in JA\""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4i61CcItIwUv"
      },
      "source": [
        "# Task 5: "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UmHhUjSafdSV"
      },
      "source": [
        "def print_people(people: List[Person]) -> None:\n",
        "  for i, person in enumerate(people):\n",
        "    print(i, person.name, person.surname)\n",
        "\n",
        "p0 = Person(\"Anja\", \"Rubik\", 37)\n",
        "p1 = Person(\"Elon\", \"Musk\", 49)\n",
        "p2 = Person(\"Abel\", \"Tesfaye\", 31)\n",
        "p3 = Person(\"Guido\", \"van Rossum\", 65)\n",
        "\n",
        "\n",
        "people = [p0, p1, p2, p3]\n",
        "\n",
        "print_people(people)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "AW4cx5OugafV"
      },
      "source": [
        "people[2].surname"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JYGq-YtQIwUx"
      },
      "source": [
        "from typing import List\n",
        "\n",
        "m0 = Message(\"Today is a beautiful day\",          \"tom@gmail.com\",  datetime(2020, 1,  1))\n",
        "m1 = Message(\"Today is rather average day\",       \"adam@gmail.com\", datetime(2005, 12, 5))\n",
        "m2 = Message(\"Dziś jest piękny dzień\",            \"ewa@gmail.com\",  datetime(2021, 4,  21))\n",
        "m3 = Message(\"Aujourd'hui est une belle journée\", \"tina@gmail.com\", datetime(2020, 12, 5))\n",
        "\n",
        "def print_messages(messages: List[Message]) -> None:\n",
        "  for i, message in enumerate(messages):\n",
        "    print(i, message.content)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zfSSyrxNJ5eB"
      },
      "source": [
        "# Group messages `m0, m1, m2, m3` together\n",
        "messages = KKK\n",
        "\n",
        "print_messages(messages)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "k2zBcriFlaXi"
      },
      "source": [
        "# Access first message from the list\n",
        "messages[LLL].content"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "DfB4WNlAlr0O"
      },
      "source": [
        "# Access the last message from the list\n",
        "messages[MMM].content"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "k2X690Xql1B3"
      },
      "source": [
        "# Assign the last message to the variable and display message language\n",
        "last_message = messages[NNN]\n",
        "last_message.language"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "D5K5KejanLa6"
      },
      "source": [
        "# Display the language of the last message without assigning to the variable\n",
        "messages[NNN].language"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Id6lwNVtmHYT"
      },
      "source": [
        "# Append message m4 to the existing list of the messages\n",
        "m4 = Message(\"Can you append me to the list, please?\", \"karen@gmail.com\", datetime(2021, 1, 5))\n",
        "messages.OOO(m4)\n",
        "\n",
        "print_messages(messages)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Ebap0NT4ngOt"
      },
      "source": [
        "# ITERATE over the list of messages and print: message content, sender and message language.\n",
        "for PPP in QQQ:\n",
        "  print(PPP.content, PPP.sender_email, PPP.language)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jvxBKZp8nRZP"
      },
      "source": [
        "# Task 6: \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Xedg0en_aPbd"
      },
      "source": [
        "people_over_40 = [person for person in people if person.age > 40]\n",
        "\n",
        "print_people(people_over_40)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cl7oWpTqrtbA"
      },
      "source": [
        "# Write a function returning filtered messages. Filter by message language.\n",
        "\n",
        "def messages_in_language(messages: List[Message], country_code: str) -> List[Message]:\n",
        "  return [RRR for RRR in SSS if RRR.language == country_code]\n",
        "\n",
        "messages = [\n",
        "  Message(\"This message is in English\",          \"xyz@gmail.com\", datetime.now()),\n",
        "  Message(\"This message is also in English\",     \"xyz@gmail.com\", datetime.now()),\n",
        "  Message(\"Ta wiadomość jest po polsku\",         \"xyz@gmail.com\", datetime.now()),\n",
        "  Message(\"Ta wiadomość również jest po polsku\", \"xyz@gmail.com\", datetime.now()),\n",
        "  Message(\"このメッセージは日本語で書かれています。\",   \"xyz@gmail.com\", datetime.now()),\n",
        "  Message(\"このメッセージは日本語でも書かれています\",   \"xyz@gmail.com\", datetime.now()),\n",
        "]\n",
        "\n",
        "print(\"-- PL --\")\n",
        "print_messages(messages_in_language(messages, \"PL\"))\n",
        "print(\"-- EN --\")\n",
        "print_messages(messages_in_language(messages, \"EN\"))\n",
        "print(\"-- JA --\")\n",
        "print_messages(messages_in_language(messages, \"JA\"))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6wz3rtllMw6k"
      },
      "source": [
        "# Task 7: \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gxNBaSLpvFVY"
      },
      "source": [
        "[1, 1, 1, 1, 1, 2]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Oii5vHEFvLPi"
      },
      "source": [
        "(1, 1, 1, 1, 1, 2)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vXB9LKlmvQAE"
      },
      "source": [
        "{1, 1, 1, 1, 1, 2}"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "H4Gss1dPMw6m"
      },
      "source": [
        "# Write a function returning unique e-mails from the provided list of messages.\n",
        "\n",
        "def unique_emails(messages: List[Message]) -> List[str]:\n",
        "  return list({message.TTT for message in messages})\n",
        "\n",
        "messages = [\n",
        "  Message(\"Lorem ipsum\", \"anna@gmail.com\", datetime.now()),\n",
        "  Message(\"Lorem ipsum\", \"dan@gmail.com\",  datetime.now()),\n",
        "  Message(\"Lorem ipsum\", \"tom@gmail.com\",  datetime.now()),\n",
        "  Message(\"Lorem ipsum\", \"kate@gmail.com\", datetime.now()),\n",
        "  Message(\"Lorem ipsum\", \"tom@gmail.com\",  datetime.now()),\n",
        "  Message(\"Lorem ipsum\", \"kate@gmail.com\", datetime.now()),\n",
        "  Message(\"Lorem ipsum\", \"anna@gmail.com\", datetime.now()),\n",
        "  Message(\"Lorem ipsum\", \"kate@gmail.com\", datetime.now()),\n",
        "]\n",
        "\n",
        "# This should print, somthing like:\n",
        "# ['tom@gmail.com', 'anna@gmail.com', 'dan@gmail.com', 'kate@gmail.com']\n",
        "# (order might be different)\n",
        "unique_emails(messages)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JOGgKUXDx356"
      },
      "source": [
        "# Task 8: \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RbDPROw9avi9"
      },
      "source": [
        "sorted_people = sorted(people, key=lambda person: person.age)\n",
        "\n",
        "print_people(sorted_people)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NO-du1hNx35-"
      },
      "source": [
        "# Write a function returning messages sorted by date.\n",
        "\n",
        "def messages_sorted_by_date(messages: List[Message]) -> List[Message]:\n",
        "  return sorted(VVV, key=lambda message: message.UUU)\n",
        "\n",
        "messages = [\n",
        "  Message(\"1\", \"example@gmail.com\", datetime(2005, 1, 1)),\n",
        "  Message(\"3\", \"example@gmail.com\", datetime(2006, 6, 2)),\n",
        "  Message(\"6\", \"example@gmail.com\", datetime(2020, 6, 6)),\n",
        "  Message(\"4\", \"example@gmail.com\", datetime(2007, 4, 1)),\n",
        "  Message(\"8\", \"example@gmail.com\", datetime(2021, 5, 5)),\n",
        "  Message(\"2\", \"example@gmail.com\", datetime(2005, 2, 6)),\n",
        "  Message(\"7\", \"example@gmail.com\", datetime(2020, 9, 9)),\n",
        "  Message(\"5\", \"example@gmail.com\", datetime(2010, 9, 1)),\n",
        "]\n",
        "\n",
        "# This should print something like: \"1, 2, 3, 4, 5, 6, 7, 8\"\n",
        "print_messages(messages_sorted_by_date(messages))"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}